From d920f7c6628c63a390009c237fb80a203c2e400a Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Tue, 3 Jan 2017 14:40:44 +0100
Subject: genksyms: Fix segfault with invalid declarations

Do not try to recover too early and segfault when parsing invalid
declarations such as

echo 'int (int);' | scripts/genksyms/genksyms
echo 'int a, (int);' | scripts/genksyms/genksyms
echo 'extern void *__inline_memcpy((void *), (const void *), (__kernel_size_t));' | scripts/genksyms/genksyms

The last one was a real-life bug with
include/asm-generic/asm-prototypes.h on x86_64.

Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/genksyms/parse.y | 2 --
 1 file changed, 2 deletions(-)

diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y
index 4fba255e54ae..00a6d7e54971 100644
--- a/scripts/genksyms/parse.y
+++ b/scripts/genksyms/parse.y
@@ -322,8 +322,6 @@ direct_declarator:
 		{ $$ = $2; }
 	| '(' declarator ')'
 		{ $$ = $3; }
-	| '(' error ')'
-		{ $$ = $3; }
 	;
 
 /* Nested declarators differ from regular declarators in that they do
-- 
cgit v1.2.3


From fde42bfcd232fb1a46d844dbf486cb67cb910004 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Tue, 3 Jan 2017 14:55:24 +0100
Subject: genksyms: Regenerate parser

Regenerate the parser after d920f7c6628c ("genksyms: Fix segfault with
invalid declarations").

Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 scripts/genksyms/parse.tab.c_shipped | 474 +++++++++++++++++------------------
 1 file changed, 234 insertions(+), 240 deletions(-)

diff --git a/scripts/genksyms/parse.tab.c_shipped b/scripts/genksyms/parse.tab.c_shipped
index 69148d30ca3f..d02258bafe7b 100644
--- a/scripts/genksyms/parse.tab.c_shipped
+++ b/scripts/genksyms/parse.tab.c_shipped
@@ -440,16 +440,16 @@ union yyalloc
 /* YYFINAL -- State number of the termination state.  */
 #define YYFINAL  4
 /* YYLAST -- Last index in YYTABLE.  */
-#define YYLAST   524
+#define YYLAST   522
 
 /* YYNTOKENS -- Number of terminals.  */
 #define YYNTOKENS  55
 /* YYNNTS -- Number of nonterminals.  */
 #define YYNNTS  49
 /* YYNRULES -- Number of rules.  */
-#define YYNRULES  134
+#define YYNRULES  133
 /* YYNRULES -- Number of states.  */
-#define YYNSTATES  189
+#define YYNSTATES  187
 
 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
 #define YYUNDEFTOK  2
@@ -506,13 +506,13 @@ static const yytype_uint16 yyprhs[] =
       97,   101,   105,   109,   112,   115,   118,   120,   122,   124,
      126,   128,   130,   132,   134,   136,   138,   140,   142,   145,
      146,   148,   150,   153,   155,   157,   159,   161,   164,   166,
-     168,   170,   175,   180,   183,   187,   191,   194,   196,   198,
-     200,   205,   210,   213,   217,   221,   224,   226,   230,   231,
-     233,   235,   239,   242,   245,   247,   248,   250,   252,   257,
-     262,   265,   269,   273,   277,   278,   280,   283,   287,   291,
-     292,   294,   296,   299,   303,   306,   307,   309,   311,   315,
-     318,   321,   323,   326,   327,   330,   334,   339,   341,   345,
-     347,   351,   354,   355,   357
+     168,   170,   175,   180,   183,   187,   190,   192,   194,   196,
+     201,   206,   209,   213,   217,   220,   222,   226,   227,   229,
+     231,   235,   238,   241,   243,   244,   246,   248,   253,   258,
+     261,   265,   269,   273,   274,   276,   279,   283,   287,   288,
+     290,   292,   295,   299,   302,   303,   305,   307,   311,   314,
+     317,   319,   322,   323,   326,   330,   335,   337,   341,   343,
+     347,   350,   351,   353
 };
 
 /* YYRHS -- A `-1'-separated list of the rules' RHS.  */
@@ -536,25 +536,24 @@ static const yytype_int8 yyrhs[] =
       74,    75,    -1,     8,    -1,    27,    -1,    32,    -1,    18,
       -1,    72,    76,    -1,    77,    -1,    39,    -1,    43,    -1,
       77,    49,    80,    50,    -1,    77,    49,     1,    50,    -1,
-      77,    35,    -1,    49,    76,    50,    -1,    49,     1,    50,
-      -1,    72,    78,    -1,    79,    -1,    39,    -1,    43,    -1,
-      79,    49,    80,    50,    -1,    79,    49,     1,    50,    -1,
-      79,    35,    -1,    49,    78,    50,    -1,    49,     1,    50,
-      -1,    81,    38,    -1,    81,    -1,    82,    48,    38,    -1,
-      -1,    82,    -1,    83,    -1,    82,    48,    83,    -1,    67,
-      84,    -1,    72,    84,    -1,    85,    -1,    -1,    39,    -1,
-      43,    -1,    85,    49,    80,    50,    -1,    85,    49,     1,
-      50,    -1,    85,    35,    -1,    49,    84,    50,    -1,    49,
-       1,    50,    -1,    66,    76,    34,    -1,    -1,    88,    -1,
-      52,    36,    -1,    53,    90,    47,    -1,    53,     1,    47,
-      -1,    -1,    91,    -1,    92,    -1,    91,    92,    -1,    66,
-      93,    46,    -1,     1,    46,    -1,    -1,    94,    -1,    95,
-      -1,    94,    48,    95,    -1,    78,    97,    -1,    39,    96,
-      -1,    96,    -1,    54,    36,    -1,    -1,    97,    32,    -1,
-      53,    99,    47,    -1,    53,    99,    48,    47,    -1,   100,
-      -1,    99,    48,   100,    -1,    39,    -1,    39,    52,    36,
-      -1,    31,    46,    -1,    -1,    31,    -1,    30,    49,    39,
-      50,    46,    -1
+      77,    35,    -1,    49,    76,    50,    -1,    72,    78,    -1,
+      79,    -1,    39,    -1,    43,    -1,    79,    49,    80,    50,
+      -1,    79,    49,     1,    50,    -1,    79,    35,    -1,    49,
+      78,    50,    -1,    49,     1,    50,    -1,    81,    38,    -1,
+      81,    -1,    82,    48,    38,    -1,    -1,    82,    -1,    83,
+      -1,    82,    48,    83,    -1,    67,    84,    -1,    72,    84,
+      -1,    85,    -1,    -1,    39,    -1,    43,    -1,    85,    49,
+      80,    50,    -1,    85,    49,     1,    50,    -1,    85,    35,
+      -1,    49,    84,    50,    -1,    49,     1,    50,    -1,    66,
+      76,    34,    -1,    -1,    88,    -1,    52,    36,    -1,    53,
+      90,    47,    -1,    53,     1,    47,    -1,    -1,    91,    -1,
+      92,    -1,    91,    92,    -1,    66,    93,    46,    -1,     1,
+      46,    -1,    -1,    94,    -1,    95,    -1,    94,    48,    95,
+      -1,    78,    97,    -1,    39,    96,    -1,    96,    -1,    54,
+      36,    -1,    -1,    97,    32,    -1,    53,    99,    47,    -1,
+      53,    99,    48,    47,    -1,   100,    -1,    99,    48,   100,
+      -1,    39,    -1,    39,    52,    36,    -1,    31,    46,    -1,
+      -1,    31,    -1,    30,    49,    39,    50,    46,    -1
 };
 
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
@@ -567,13 +566,13 @@ static const yytype_uint16 yyrline[] =
      238,   240,   242,   247,   250,   251,   255,   256,   257,   258,
      259,   260,   261,   262,   263,   264,   265,   266,   270,   275,
      276,   280,   281,   285,   285,   285,   286,   294,   295,   299,
-     308,   317,   319,   321,   323,   325,   332,   333,   337,   338,
-     339,   341,   343,   345,   347,   352,   353,   354,   358,   359,
-     363,   364,   369,   374,   376,   380,   381,   389,   393,   395,
-     397,   399,   401,   406,   415,   416,   421,   426,   427,   431,
-     432,   436,   437,   441,   443,   448,   449,   453,   454,   458,
-     459,   460,   464,   468,   469,   473,   474,   478,   479,   482,
-     487,   495,   499,   500,   504
+     308,   317,   319,   321,   323,   330,   331,   335,   336,   337,
+     339,   341,   343,   345,   350,   351,   352,   356,   357,   361,
+     362,   367,   372,   374,   378,   379,   387,   391,   393,   395,
+     397,   399,   404,   413,   414,   419,   424,   425,   429,   430,
+     434,   435,   439,   441,   446,   447,   451,   452,   456,   457,
+     458,   462,   466,   467,   471,   472,   476,   477,   480,   485,
+     493,   497,   498,   502
 };
 #endif
 
@@ -636,13 +635,13 @@ static const yytype_uint8 yyr1[] =
       70,    70,    70,    70,    70,    70,    71,    71,    71,    71,
       71,    71,    71,    71,    71,    71,    71,    71,    72,    73,
       73,    74,    74,    75,    75,    75,    75,    76,    76,    77,
-      77,    77,    77,    77,    77,    77,    78,    78,    79,    79,
-      79,    79,    79,    79,    79,    80,    80,    80,    81,    81,
-      82,    82,    83,    84,    84,    85,    85,    85,    85,    85,
-      85,    85,    85,    86,    87,    87,    88,    89,    89,    90,
-      90,    91,    91,    92,    92,    93,    93,    94,    94,    95,
-      95,    95,    96,    97,    97,    98,    98,    99,    99,   100,
-     100,   101,   102,   102,   103
+      77,    77,    77,    77,    77,    78,    78,    79,    79,    79,
+      79,    79,    79,    79,    80,    80,    80,    81,    81,    82,
+      82,    83,    84,    84,    85,    85,    85,    85,    85,    85,
+      85,    85,    86,    87,    87,    88,    89,    89,    90,    90,
+      91,    91,    92,    92,    93,    93,    94,    94,    95,    95,
+      95,    96,    97,    97,    98,    98,    99,    99,   100,   100,
+     101,   102,   102,   103
 };
 
 /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
@@ -655,13 +654,13 @@ static const yytype_uint8 yyr2[] =
        3,     3,     3,     2,     2,     2,     1,     1,     1,     1,
        1,     1,     1,     1,     1,     1,     1,     1,     2,     0,
        1,     1,     2,     1,     1,     1,     1,     2,     1,     1,
-       1,     4,     4,     2,     3,     3,     2,     1,     1,     1,
-       4,     4,     2,     3,     3,     2,     1,     3,     0,     1,
-       1,     3,     2,     2,     1,     0,     1,     1,     4,     4,
-       2,     3,     3,     3,     0,     1,     2,     3,     3,     0,
-       1,     1,     2,     3,     2,     0,     1,     1,     3,     2,
-       2,     1,     2,     0,     2,     3,     4,     1,     3,     1,
-       3,     2,     0,     1,     5
+       1,     4,     4,     2,     3,     2,     1,     1,     1,     4,
+       4,     2,     3,     3,     2,     1,     3,     0,     1,     1,
+       3,     2,     2,     1,     0,     1,     1,     4,     4,     2,
+       3,     3,     3,     0,     1,     2,     3,     3,     0,     1,
+       1,     2,     3,     2,     0,     1,     1,     3,     2,     2,
+       1,     2,     0,     2,     3,     4,     1,     3,     1,     3,
+       2,     0,     1,     5
 };
 
 /* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
@@ -675,189 +674,189 @@ static const yytype_uint8 yydefact[] =
        0,    56,     0,     0,    65,    36,    57,     5,    10,    17,
       23,    24,    26,    27,    33,    34,    11,    12,    13,    14,
       15,    39,     0,    43,     6,    37,     0,    44,    22,    38,
-      45,     0,     0,   131,    69,    70,     0,    59,     0,    18,
-      19,     0,   132,    68,    25,    42,   129,     0,   127,    22,
-      40,     0,   115,     0,     0,   111,     9,    17,    41,    95,
-       0,     0,     0,     0,    58,    60,    61,    16,     0,    67,
-     133,   103,   123,    73,     0,     0,   125,     0,     7,   114,
-     108,    78,    79,     0,     0,     0,   123,    77,     0,   116,
-     117,   121,   107,     0,   112,   132,    96,    57,     0,    95,
-      92,    94,    35,     0,    75,    74,    62,    20,   104,     0,
-       0,    86,    89,    90,   130,   126,   128,   120,     0,    78,
-       0,   122,    76,   119,    82,     0,   113,     0,     0,    97,
-       0,    93,   100,     0,   134,   124,     0,    21,   105,    72,
-      71,    85,     0,    84,    83,     0,     0,   118,   102,   101,
-       0,     0,   106,    87,    91,    81,    80,    99,    98
+      45,     0,     0,   130,    69,    70,     0,    59,     0,    18,
+      19,     0,   131,    68,    25,    42,   128,     0,   126,    22,
+      40,     0,   114,     0,     0,   110,     9,    17,    41,    94,
+       0,     0,     0,    58,    60,    61,    16,     0,    67,   132,
+     102,   122,    73,     0,     0,   124,     0,     7,   113,   107,
+      77,    78,     0,     0,     0,   122,    76,     0,   115,   116,
+     120,   106,     0,   111,   131,    95,    57,     0,    94,    91,
+      93,    35,     0,    74,    62,    20,   103,     0,     0,    85,
+      88,    89,   129,   125,   127,   119,     0,    77,     0,   121,
+      75,   118,    81,     0,   112,     0,     0,    96,     0,    92,
+      99,     0,   133,   123,     0,    21,   104,    72,    71,    84,
+       0,    83,    82,     0,     0,   117,   101,   100,     0,     0,
+     105,    86,    90,    80,    79,    98,    97
 };
 
 /* YYDEFGOTO[NTERM-NUM].  */
 static const yytype_int16 yydefgoto[] =
 {
       -1,     1,     2,     3,    37,    79,    58,    38,    68,    69,
-      70,    82,    40,    41,    42,    43,    44,    71,    94,    95,
-      45,   125,    73,   116,   117,   140,   141,   142,   143,   130,
-     131,    46,   167,   168,    57,    83,    84,    85,   118,   119,
-     120,   121,   138,    53,    77,    78,    47,   102,    48
+      70,    82,    40,    41,    42,    43,    44,    71,    93,    94,
+      45,   124,    73,   115,   116,   138,   139,   140,   141,   129,
+     130,    46,   165,   166,    57,    83,    84,    85,   117,   118,
+     119,   120,   136,    53,    77,    78,    47,   101,    48
 };
 
 /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
    STATE-NUM.  */
-#define YYPACT_NINF -111
+#define YYPACT_NINF -94
 static const yytype_int16 yypact[] =
 {
-    -111,    13,  -111,   210,  -111,  -111,    28,  -111,  -111,  -111,
-    -111,  -111,   -27,  -111,    44,  -111,  -111,  -111,  -111,  -111,
-    -111,  -111,  -111,  -111,   -24,  -111,   -20,  -111,  -111,  -111,
-      31,  -111,    32,    42,  -111,  -111,  -111,  -111,  -111,    34,
-     481,  -111,  -111,  -111,  -111,  -111,  -111,  -111,  -111,  -111,
-    -111,    51,    56,  -111,  -111,    52,   108,  -111,   481,    52,
-    -111,   481,    58,  -111,  -111,  -111,    19,     0,    54,    55,
-    -111,    34,    30,   -18,  -111,  -111,    68,   -25,  -111,   481,
-    -111,    45,    33,    59,   159,  -111,  -111,    34,  -111,   395,
-      57,    60,    81,    88,  -111,     0,  -111,  -111,    34,  -111,
-    -111,  -111,  -111,  -111,   257,    72,  -111,   -23,  -111,  -111,
-    -111,    85,  -111,    20,   106,    47,  -111,   -10,    97,    96,
-    -111,  -111,  -111,    99,  -111,   115,  -111,  -111,     5,    50,
-    -111,    11,  -111,   102,  -111,  -111,  -111,  -111,   -22,   100,
-     103,   111,   104,  -111,  -111,  -111,  -111,  -111,   113,  -111,
-     121,  -111,  -111,   124,  -111,   303,  -111,    33,   132,  -111,
-     139,  -111,  -111,   349,  -111,  -111,   122,  -111,  -111,  -111,
-    -111,  -111,   442,  -111,  -111,   140,   143,  -111,  -111,  -111,
-     144,   145,  -111,  -111,  -111,  -111,  -111,  -111,  -111
+     -94,    15,   -94,   208,   -94,   -94,    34,   -94,   -94,   -94,
+     -94,   -94,   -27,   -94,    -5,   -94,   -94,   -94,   -94,   -94,
+     -94,   -94,   -94,   -94,   -25,   -94,   -16,   -94,   -94,   -94,
+      -4,   -94,    19,   -24,   -94,   -94,   -94,   -94,   -94,    24,
+     479,   -94,   -94,   -94,   -94,   -94,   -94,   -94,   -94,   -94,
+     -94,    29,    48,   -94,   -94,    37,   106,   -94,   479,    37,
+     -94,   479,    54,   -94,   -94,   -94,    24,    -2,    49,    53,
+     -94,    24,   -14,   -11,   -94,   -94,    47,    38,   -94,   479,
+     -94,    51,    23,    55,   157,   -94,   -94,    24,   -94,   393,
+      56,    58,    68,   -94,    -2,   -94,   -94,    24,   -94,   -94,
+     -94,   -94,   -94,   255,    67,   -94,     5,   -94,   -94,   -94,
+      50,   -94,     7,    69,    40,   -94,    -8,    83,    88,   -94,
+     -94,   -94,    91,   -94,   109,   -94,   -94,     4,    45,   -94,
+      16,   -94,    95,   -94,   -94,   -94,   -23,    92,    93,   108,
+      96,   -94,   -94,   -94,   -94,   -94,    97,   -94,    98,   -94,
+     -94,   118,   -94,   301,   -94,    23,   101,   -94,   104,   -94,
+     -94,   347,   -94,   -94,   120,   -94,   -94,   -94,   -94,   -94,
+     440,   -94,   -94,   111,   119,   -94,   -94,   -94,   130,   137,
+     -94,   -94,   -94,   -94,   -94,   -94,   -94
 };
 
 /* YYPGOTO[NTERM-NUM].  */
 static const yytype_int16 yypgoto[] =
 {
-    -111,  -111,   160,  -111,  -111,  -111,  -111,   -51,  -111,  -111,
-      98,    -1,   -61,   -37,  -111,  -111,  -111,   -78,  -111,  -111,
-     -53,   -30,  -111,   -66,  -111,  -110,  -111,  -111,   -60,   -63,
-    -111,  -111,  -111,  -111,   -21,  -111,  -111,   116,  -111,  -111,
-      40,    90,    83,   152,  -111,   105,  -111,  -111,  -111
+     -94,   -94,   158,   -94,   -94,   -94,   -94,   -45,   -94,   -94,
+      94,    -1,   -61,   -29,   -94,   -94,   -94,   -79,   -94,   -94,
+     -63,    -7,   -94,   -93,   -94,   -92,   -94,   -94,   -60,   -57,
+     -94,   -94,   -94,   -94,   -19,   -94,   -94,   110,   -94,   -94,
+      33,    82,    78,   144,   -94,    99,   -94,   -94,   -94
 };
 
 /* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
    positive, shift that token.  If negative, reduce the rule which
    number is the opposite.  If YYTABLE_NINF, syntax error.  */
-#define YYTABLE_NINF -111
+#define YYTABLE_NINF -110
 static const yytype_int16 yytable[] =
 {
-      89,    90,    39,    74,   115,    60,   158,    86,    10,    72,
-     165,   129,    51,     4,    96,    55,    76,   103,    20,    59,
-      92,   148,   106,   107,   145,   154,    52,    29,   108,    56,
-     166,   104,    34,    56,    80,   115,    93,   115,    88,   155,
-     -95,    99,   136,    89,   126,   176,   162,   150,   159,   152,
-     129,   129,    74,   181,   128,   -95,    67,    87,    64,   149,
-     163,   100,    65,   112,   101,   160,   161,    54,    66,   113,
-      67,    67,   111,    64,    49,    50,   112,    65,    87,   115,
-      61,    62,   113,    66,    67,    67,   149,   114,    63,   126,
-     112,   109,   110,   159,    89,    76,   113,    91,    67,   128,
-      97,    67,    89,    98,    52,    56,   122,   132,   144,    81,
-     133,    89,   184,     7,     8,     9,    10,    11,    12,    13,
-     105,    15,    16,    17,    18,    19,    20,    21,    22,    23,
-      24,   134,    26,    27,    28,    29,    30,    31,   135,   114,
-      34,    35,   151,   156,   157,   109,   100,   -22,   164,   171,
-     169,    36,   172,   170,   -22,  -109,   165,   -22,   182,   -22,
-     123,     5,   -22,   173,     7,     8,     9,    10,    11,    12,
-      13,   174,    15,    16,    17,    18,    19,    20,    21,    22,
-      23,    24,   178,    26,    27,    28,    29,    30,    31,   179,
-     185,    34,    35,   186,   187,   188,   137,   177,   -22,   153,
-     124,   147,    36,    75,     0,   -22,  -110,     0,   -22,     0,
-     -22,     6,   146,   -22,     0,     7,     8,     9,    10,    11,
-      12,    13,    14,    15,    16,    17,    18,    19,    20,    21,
-      22,    23,    24,    25,    26,    27,    28,    29,    30,    31,
-      32,    33,    34,    35,     0,     0,     0,     0,     0,   -22,
-       0,     0,     0,    36,     0,     0,   -22,     0,   139,   -22,
-       0,   -22,     7,     8,     9,    10,    11,    12,    13,     0,
+      89,    90,    39,   114,    95,   156,    10,    60,   146,   163,
+     128,    74,    51,    86,    55,     4,    20,    99,    54,   148,
+     100,   150,    63,    59,   102,    29,    52,   152,    56,   164,
+      34,   134,    72,   114,   107,   114,    80,    56,   103,   -94,
+      88,   153,    89,   125,    76,    61,   147,   157,   128,   128,
+     111,   160,   143,   127,   -94,    67,   112,    87,    67,    92,
+      74,   174,   110,    64,    98,   161,   111,    65,    62,   179,
+     158,   159,   112,    66,    67,    67,   114,   113,    87,   147,
+      49,    50,    52,   111,   125,   105,   106,    76,   157,   112,
+      56,    67,    89,    91,   127,    96,    67,   108,   109,   104,
+      89,    97,   121,   142,   113,   149,   131,    81,   132,    89,
+     182,     7,     8,     9,    10,    11,    12,    13,   133,    15,
+      16,    17,    18,    19,    20,    21,    22,    23,    24,   154,
+      26,    27,    28,    29,    30,    31,   155,   108,    34,    35,
+      99,   162,   167,   168,   170,   -22,   169,   171,   172,    36,
+     163,   176,   -22,  -108,   177,   -22,   180,   -22,   122,     5,
+     -22,   183,     7,     8,     9,    10,    11,    12,    13,   184,
+      15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
+     185,    26,    27,    28,    29,    30,    31,   186,   175,    34,
+      35,   135,   145,   151,   123,    75,   -22,     0,     0,     0,
+      36,     0,     0,   -22,  -109,   144,   -22,     0,   -22,     6,
+       0,   -22,     0,     7,     8,     9,    10,    11,    12,    13,
+      14,    15,    16,    17,    18,    19,    20,    21,    22,    23,
+      24,    25,    26,    27,    28,    29,    30,    31,    32,    33,
+      34,    35,     0,     0,     0,     0,     0,   -22,     0,     0,
+       0,    36,     0,     0,   -22,     0,   137,   -22,     0,   -22,
+       7,     8,     9,    10,    11,    12,    13,     0,    15,    16,
+      17,    18,    19,    20,    21,    22,    23,    24,     0,    26,
+      27,    28,    29,    30,    31,     0,     0,    34,    35,     0,
+       0,     0,     0,   -87,     0,     0,     0,     0,    36,     0,
+       0,     0,   173,     0,     0,   -87,     7,     8,     9,    10,
+      11,    12,    13,     0,    15,    16,    17,    18,    19,    20,
+      21,    22,    23,    24,     0,    26,    27,    28,    29,    30,
+      31,     0,     0,    34,    35,     0,     0,     0,     0,   -87,
+       0,     0,     0,     0,    36,     0,     0,     0,   178,     0,
+       0,   -87,     7,     8,     9,    10,    11,    12,    13,     0,
       15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
        0,    26,    27,    28,    29,    30,    31,     0,     0,    34,
-      35,     0,     0,     0,     0,   -88,     0,     0,     0,     0,
-      36,     0,     0,     0,   175,     0,     0,   -88,     7,     8,
+      35,     0,     0,     0,     0,   -87,     0,     0,     0,     0,
+      36,     0,     0,     0,     0,     0,     0,   -87,     7,     8,
        9,    10,    11,    12,    13,     0,    15,    16,    17,    18,
       19,    20,    21,    22,    23,    24,     0,    26,    27,    28,
       29,    30,    31,     0,     0,    34,    35,     0,     0,     0,
-       0,   -88,     0,     0,     0,     0,    36,     0,     0,     0,
-     180,     0,     0,   -88,     7,     8,     9,    10,    11,    12,
+       0,     0,   125,     0,     0,     0,   126,     0,     0,     0,
+       0,     0,   127,     0,    67,     7,     8,     9,    10,    11,
+      12,    13,     0,    15,    16,    17,    18,    19,    20,    21,
+      22,    23,    24,     0,    26,    27,    28,    29,    30,    31,
+       0,     0,    34,    35,     0,     0,     0,     0,   181,     0,
+       0,     0,     0,    36,     7,     8,     9,    10,    11,    12,
       13,     0,    15,    16,    17,    18,    19,    20,    21,    22,
       23,    24,     0,    26,    27,    28,    29,    30,    31,     0,
-       0,    34,    35,     0,     0,     0,     0,   -88,     0,     0,
-       0,     0,    36,     0,     0,     0,     0,     0,     0,   -88,
-       7,     8,     9,    10,    11,    12,    13,     0,    15,    16,
-      17,    18,    19,    20,    21,    22,    23,    24,     0,    26,
-      27,    28,    29,    30,    31,     0,     0,    34,    35,     0,
-       0,     0,     0,     0,   126,     0,     0,     0,   127,     0,
-       0,     0,     0,     0,   128,     0,    67,     7,     8,     9,
-      10,    11,    12,    13,     0,    15,    16,    17,    18,    19,
-      20,    21,    22,    23,    24,     0,    26,    27,    28,    29,
-      30,    31,     0,     0,    34,    35,     0,     0,     0,     0,
-     183,     0,     0,     0,     0,    36,     7,     8,     9,    10,
-      11,    12,    13,     0,    15,    16,    17,    18,    19,    20,
-      21,    22,    23,    24,     0,    26,    27,    28,    29,    30,
-      31,     0,     0,    34,    35,     0,     0,     0,     0,     0,
-       0,     0,     0,     0,    36
+       0,    34,    35,     0,     0,     0,     0,     0,     0,     0,
+       0,     0,    36
 };
 
 #define yypact_value_is_default(Yystate) \
-  (!!((Yystate) == (-111)))
+  (!!((Yystate) == (-94)))
 
 #define yytable_value_is_error(Yytable_value) \
   YYID (0)
 
 static const yytype_int16 yycheck[] =
 {
-      61,    61,     3,    40,    82,    26,     1,    58,     8,    39,
-      32,    89,    39,     0,    67,    39,    39,    35,    18,    39,
-       1,     1,    47,    48,    47,    35,    53,    27,    79,    53,
-      52,    49,    32,    53,    55,   113,    66,   115,    59,    49,
-      35,    71,    95,   104,    39,   155,    35,   113,    43,   115,
-     128,   129,    89,   163,    49,    50,    51,    58,    39,    39,
-      49,    31,    43,    43,    34,   128,   129,    23,    49,    49,
-      51,    51,    39,    39,    46,    47,    43,    43,    79,   157,
-      49,    49,    49,    49,    51,    51,    39,    54,    46,    39,
-      43,    46,    47,    43,   155,    39,    49,    39,    51,    49,
-      46,    51,   163,    48,    53,    53,    47,    50,    36,     1,
-      50,   172,   172,     5,     6,     7,     8,     9,    10,    11,
-      52,    13,    14,    15,    16,    17,    18,    19,    20,    21,
-      22,    50,    24,    25,    26,    27,    28,    29,    50,    54,
-      32,    33,    36,    46,    48,    46,    31,    39,    46,    38,
-      50,    43,    48,    50,    46,    47,    32,    49,    36,    51,
-       1,     1,    54,    50,     5,     6,     7,     8,     9,    10,
-      11,    50,    13,    14,    15,    16,    17,    18,    19,    20,
-      21,    22,    50,    24,    25,    26,    27,    28,    29,    50,
-      50,    32,    33,    50,    50,    50,    98,   157,    39,   116,
-      84,   111,    43,    51,    -1,    46,    47,    -1,    49,    -1,
-      51,     1,   107,    54,    -1,     5,     6,     7,     8,     9,
-      10,    11,    12,    13,    14,    15,    16,    17,    18,    19,
-      20,    21,    22,    23,    24,    25,    26,    27,    28,    29,
-      30,    31,    32,    33,    -1,    -1,    -1,    -1,    -1,    39,
-      -1,    -1,    -1,    43,    -1,    -1,    46,    -1,     1,    49,
-      -1,    51,     5,     6,     7,     8,     9,    10,    11,    -1,
+      61,    61,     3,    82,    67,     1,     8,    26,     1,    32,
+      89,    40,    39,    58,    39,     0,    18,    31,    23,   112,
+      34,   114,    46,    39,    35,    27,    53,    35,    53,    52,
+      32,    94,    39,   112,    79,   114,    55,    53,    49,    35,
+      59,    49,   103,    39,    39,    49,    39,    43,   127,   128,
+      43,    35,    47,    49,    50,    51,    49,    58,    51,    66,
+      89,   153,    39,    39,    71,    49,    43,    43,    49,   161,
+     127,   128,    49,    49,    51,    51,   155,    54,    79,    39,
+      46,    47,    53,    43,    39,    47,    48,    39,    43,    49,
+      53,    51,   153,    39,    49,    46,    51,    46,    47,    52,
+     161,    48,    47,    36,    54,    36,    50,     1,    50,   170,
+     170,     5,     6,     7,     8,     9,    10,    11,    50,    13,
+      14,    15,    16,    17,    18,    19,    20,    21,    22,    46,
+      24,    25,    26,    27,    28,    29,    48,    46,    32,    33,
+      31,    46,    50,    50,    48,    39,    38,    50,    50,    43,
+      32,    50,    46,    47,    50,    49,    36,    51,     1,     1,
+      54,    50,     5,     6,     7,     8,     9,    10,    11,    50,
+      13,    14,    15,    16,    17,    18,    19,    20,    21,    22,
+      50,    24,    25,    26,    27,    28,    29,    50,   155,    32,
+      33,    97,   110,   115,    84,    51,    39,    -1,    -1,    -1,
+      43,    -1,    -1,    46,    47,   106,    49,    -1,    51,     1,
+      -1,    54,    -1,     5,     6,     7,     8,     9,    10,    11,
+      12,    13,    14,    15,    16,    17,    18,    19,    20,    21,
+      22,    23,    24,    25,    26,    27,    28,    29,    30,    31,
+      32,    33,    -1,    -1,    -1,    -1,    -1,    39,    -1,    -1,
+      -1,    43,    -1,    -1,    46,    -1,     1,    49,    -1,    51,
+       5,     6,     7,     8,     9,    10,    11,    -1,    13,    14,
+      15,    16,    17,    18,    19,    20,    21,    22,    -1,    24,
+      25,    26,    27,    28,    29,    -1,    -1,    32,    33,    -1,
+      -1,    -1,    -1,    38,    -1,    -1,    -1,    -1,    43,    -1,
+      -1,    -1,     1,    -1,    -1,    50,     5,     6,     7,     8,
+       9,    10,    11,    -1,    13,    14,    15,    16,    17,    18,
+      19,    20,    21,    22,    -1,    24,    25,    26,    27,    28,
+      29,    -1,    -1,    32,    33,    -1,    -1,    -1,    -1,    38,
+      -1,    -1,    -1,    -1,    43,    -1,    -1,    -1,     1,    -1,
+      -1,    50,     5,     6,     7,     8,     9,    10,    11,    -1,
       13,    14,    15,    16,    17,    18,    19,    20,    21,    22,
       -1,    24,    25,    26,    27,    28,    29,    -1,    -1,    32,
       33,    -1,    -1,    -1,    -1,    38,    -1,    -1,    -1,    -1,
-      43,    -1,    -1,    -1,     1,    -1,    -1,    50,     5,     6,
+      43,    -1,    -1,    -1,    -1,    -1,    -1,    50,     5,     6,
        7,     8,     9,    10,    11,    -1,    13,    14,    15,    16,
       17,    18,    19,    20,    21,    22,    -1,    24,    25,    26,
       27,    28,    29,    -1,    -1,    32,    33,    -1,    -1,    -1,
-      -1,    38,    -1,    -1,    -1,    -1,    43,    -1,    -1,    -1,
-       1,    -1,    -1,    50,     5,     6,     7,     8,     9,    10,
+      -1,    -1,    39,    -1,    -1,    -1,    43,    -1,    -1,    -1,
+      -1,    -1,    49,    -1,    51,     5,     6,     7,     8,     9,
+      10,    11,    -1,    13,    14,    15,    16,    17,    18,    19,
+      20,    21,    22,    -1,    24,    25,    26,    27,    28,    29,
+      -1,    -1,    32,    33,    -1,    -1,    -1,    -1,    38,    -1,
+      -1,    -1,    -1,    43,     5,     6,     7,     8,     9,    10,
       11,    -1,    13,    14,    15,    16,    17,    18,    19,    20,
       21,    22,    -1,    24,    25,    26,    27,    28,    29,    -1,
-      -1,    32,    33,    -1,    -1,    -1,    -1,    38,    -1,    -1,
-      -1,    -1,    43,    -1,    -1,    -1,    -1,    -1,    -1,    50,
-       5,     6,     7,     8,     9,    10,    11,    -1,    13,    14,
-      15,    16,    17,    18,    19,    20,    21,    22,    -1,    24,
-      25,    26,    27,    28,    29,    -1,    -1,    32,    33,    -1,
-      -1,    -1,    -1,    -1,    39,    -1,    -1,    -1,    43,    -1,
-      -1,    -1,    -1,    -1,    49,    -1,    51,     5,     6,     7,
-       8,     9,    10,    11,    -1,    13,    14,    15,    16,    17,
-      18,    19,    20,    21,    22,    -1,    24,    25,    26,    27,
-      28,    29,    -1,    -1,    32,    33,    -1,    -1,    -1,    -1,
-      38,    -1,    -1,    -1,    -1,    43,     5,     6,     7,     8,
-       9,    10,    11,    -1,    13,    14,    15,    16,    17,    18,
-      19,    20,    21,    22,    -1,    24,    25,    26,    27,    28,
-      29,    -1,    -1,    32,    33,    -1,    -1,    -1,    -1,    -1,
-      -1,    -1,    -1,    -1,    43
+      -1,    32,    33,    -1,    -1,    -1,    -1,    -1,    -1,    -1,
+      -1,    -1,    43
 };
 
 /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
@@ -873,16 +872,16 @@ static const yytype_uint8 yystos[] =
       89,    49,    49,    46,    39,    43,    49,    51,    63,    64,
       65,    72,    76,    77,    68,    98,    39,    99,   100,    60,
       89,     1,    66,    90,    91,    92,    62,    66,    89,    67,
-      83,    39,     1,    76,    73,    74,    75,    46,    48,    76,
-      31,    34,   102,    35,    49,    52,    47,    48,    62,    46,
-      47,    39,    43,    49,    54,    72,    78,    79,    93,    94,
-      95,    96,    47,     1,    92,    76,    39,    43,    49,    72,
-      84,    85,    50,    50,    50,    50,    75,    65,    97,     1,
-      80,    81,    82,    83,    36,    47,   100,    96,     1,    39,
-      78,    36,    78,    97,    35,    49,    46,    48,     1,    43,
-      84,    84,    35,    49,    46,    32,    52,    87,    88,    50,
-      50,    38,    48,    50,    50,     1,    80,    95,    50,    50,
-       1,    80,    36,    38,    83,    50,    50,    50,    50
+      83,    39,    76,    73,    74,    75,    46,    48,    76,    31,
+      34,   102,    35,    49,    52,    47,    48,    62,    46,    47,
+      39,    43,    49,    54,    72,    78,    79,    93,    94,    95,
+      96,    47,     1,    92,    76,    39,    43,    49,    72,    84,
+      85,    50,    50,    50,    75,    65,    97,     1,    80,    81,
+      82,    83,    36,    47,   100,    96,     1,    39,    78,    36,
+      78,    97,    35,    49,    46,    48,     1,    43,    84,    84,
+      35,    49,    46,    32,    52,    87,    88,    50,    50,    38,
+      48,    50,    50,     1,    80,    95,    50,    50,     1,    80,
+      36,    38,    83,    50,    50,    50,    50
 };
 
 #define yyerrok		(yyerrstatus = 0)
@@ -1928,12 +1927,12 @@ yyreduce:
 
   case 75:
 
-    { (yyval) = (yyvsp[(3) - (3)]); }
+    { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 76:
+  case 79:
 
-    { (yyval) = (yyvsp[(2) - (2)]); }
+    { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
   case 80:
@@ -1943,12 +1942,12 @@ yyreduce:
 
   case 81:
 
-    { (yyval) = (yyvsp[(4) - (4)]); }
+    { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
   case 82:
 
-    { (yyval) = (yyvsp[(2) - (2)]); }
+    { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
   case 83:
@@ -1958,45 +1957,40 @@ yyreduce:
 
   case 84:
 
-    { (yyval) = (yyvsp[(3) - (3)]); }
-    break;
-
-  case 85:
-
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 87:
+  case 86:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 88:
+  case 87:
 
     { (yyval) = NULL; }
     break;
 
-  case 91:
+  case 90:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 92:
+  case 91:
 
     { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); }
     break;
 
-  case 93:
+  case 92:
 
     { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); }
     break;
 
-  case 95:
+  case 94:
 
     { (yyval) = NULL; }
     break;
 
-  case 96:
+  case 95:
 
     { /* For version 2 checksums, we don't want to remember
 		     private parameter names.  */
@@ -2005,39 +1999,39 @@ yyreduce:
 		}
     break;
 
-  case 97:
+  case 96:
 
     { remove_node((yyvsp[(1) - (1)]));
 		  (yyval) = (yyvsp[(1) - (1)]);
 		}
     break;
 
-  case 98:
+  case 97:
 
     { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
-  case 99:
+  case 98:
 
     { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
-  case 100:
+  case 99:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 101:
+  case 100:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 102:
+  case 101:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 103:
+  case 102:
 
     { struct string_list *decl = *(yyvsp[(2) - (3)]);
 		  *(yyvsp[(2) - (3)]) = NULL;
@@ -2046,87 +2040,87 @@ yyreduce:
 		}
     break;
 
-  case 104:
+  case 103:
 
     { (yyval) = NULL; }
     break;
 
-  case 106:
+  case 105:
 
     { remove_list((yyvsp[(2) - (2)]), &(*(yyvsp[(1) - (2)]))->next); (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 107:
+  case 106:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 108:
+  case 107:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 109:
+  case 108:
 
     { (yyval) = NULL; }
     break;
 
-  case 112:
+  case 111:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 113:
+  case 112:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 114:
+  case 113:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 115:
+  case 114:
 
     { (yyval) = NULL; }
     break;
 
-  case 118:
+  case 117:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 119:
+  case 118:
 
     { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); }
     break;
 
-  case 120:
+  case 119:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 122:
+  case 121:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 123:
+  case 122:
 
     { (yyval) = NULL; }
     break;
 
-  case 125:
+  case 124:
 
     { (yyval) = (yyvsp[(3) - (3)]); }
     break;
 
-  case 126:
+  case 125:
 
     { (yyval) = (yyvsp[(4) - (4)]); }
     break;
 
-  case 129:
+  case 128:
 
     {
 			const char *name = strdup((*(yyvsp[(1) - (1)]))->string);
@@ -2134,7 +2128,7 @@ yyreduce:
 		}
     break;
 
-  case 130:
+  case 129:
 
     {
 			const char *name = strdup((*(yyvsp[(1) - (3)]))->string);
@@ -2143,17 +2137,17 @@ yyreduce:
 		}
     break;
 
-  case 131:
+  case 130:
 
     { (yyval) = (yyvsp[(2) - (2)]); }
     break;
 
-  case 132:
+  case 131:
 
     { (yyval) = NULL; }
     break;
 
-  case 134:
+  case 133:
 
     { export_symbol((*(yyvsp[(3) - (5)]))->string); (yyval) = (yyvsp[(5) - (5)]); }
     break;
-- 
cgit v1.2.3


From 60aadea57ed98e734a7e9edb3076b5a72eff0b0d Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Tue, 14 Feb 2017 13:01:14 -0800
Subject: clk: rockchip: Set "ignore unused" for PMU M0 clocks on rk3399

The PMU Cortex M0 on rk3399 is intended to be used for things like
DDRFreq transitions, suspend/resume, and other things that are the
purview of ARM Trusted Firmware and not the kernel.  As such, the
kernel shouldn't be messing with the clocks.  Add CLK_IGNORE_UNUSED to
these clocks.

Without this change, the following was observed on a Chromebook with a
rk3399 (using not-yet-upstream ARM Trusted Firmware code and
not-yet-upstream kernel code based on kernel-4.4):

1. We init the clock framework.

2. We start up "DDRFreq", which causes ATF to occasionally fire up the
   M0 for transitions.  Each time ATF fires up the M0 it will turn on
   these clocks and each time it is done it will turn them off.

3. We finally get to the the part of the kernel that calls
   clk_disable_unused() and we disables the clocks.

You can see the race above.  Basically everything is fine as long as
ARM Trusted Firmware isn't starting up the M0 at exactly the same time
that the kernel is disabling unused clocks.  ...but if the race
happens then we go boom.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 drivers/clk/rockchip/clk-rk3399.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index 73121b144634..fa3cbef08776 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -1477,10 +1477,10 @@ static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = {
 	GATE(PCLK_UART4_PMU, "pclk_uart4_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 14, GFLAGS),
 	GATE(PCLK_WDT_M0_PMU, "pclk_wdt_m0_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 15, GFLAGS),
 
-	GATE(FCLK_CM0S_PMU, "fclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 0, GFLAGS),
-	GATE(SCLK_CM0S_PMU, "sclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 1, GFLAGS),
-	GATE(HCLK_CM0S_PMU, "hclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 2, GFLAGS),
-	GATE(DCLK_CM0S_PMU, "dclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 3, GFLAGS),
+	GATE(FCLK_CM0S_PMU, "fclk_cm0s_pmu", "fclk_cm0s_src_pmu", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(2), 0, GFLAGS),
+	GATE(SCLK_CM0S_PMU, "sclk_cm0s_pmu", "fclk_cm0s_src_pmu", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(2), 1, GFLAGS),
+	GATE(HCLK_CM0S_PMU, "hclk_cm0s_pmu", "fclk_cm0s_src_pmu", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(2), 2, GFLAGS),
+	GATE(DCLK_CM0S_PMU, "dclk_cm0s_pmu", "fclk_cm0s_src_pmu", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(2), 3, GFLAGS),
 	GATE(HCLK_NOC_PMU, "hclk_noc_pmu", "fclk_cm0s_src_pmu", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(2), 5, GFLAGS),
 };
 
-- 
cgit v1.2.3


From 041cf7e064996373148b24788e1aa7a036266986 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 21 Feb 2017 18:30:45 +0000
Subject: dmaengine: stm32-dma: fix up error dev_err message

Trivial fix to spelling mistake and make channel plural.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/stm32-dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 49f86cabcfec..786fc8fcc38e 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -1008,7 +1008,7 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
 
 	c = dma_get_slave_channel(&chan->vchan.chan);
 	if (!c) {
-		dev_err(dev, "No more channel avalaible\n");
+		dev_err(dev, "No more channels available\n");
 		return NULL;
 	}
 
-- 
cgit v1.2.3


From 845d6b0ff92d2c8151892c81f2050b873d7a7ef7 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Tue, 14 Feb 2017 11:35:23 +0800
Subject: clk: sunxi-ng: gate: Support common pre-dividers

Some clock gates have a pre-divider between the source input and the
gate itself. A notable example is the HSIC 12 MHz clock found on the
A83T, which has the 24 MHz main oscillator as its input, and a /2
pre-divider.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu_gate.c | 47 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/drivers/clk/sunxi-ng/ccu_gate.c b/drivers/clk/sunxi-ng/ccu_gate.c
index 8a81f9d4a89f..cd069d5da215 100644
--- a/drivers/clk/sunxi-ng/ccu_gate.c
+++ b/drivers/clk/sunxi-ng/ccu_gate.c
@@ -75,8 +75,55 @@ static int ccu_gate_is_enabled(struct clk_hw *hw)
 	return ccu_gate_helper_is_enabled(&cg->common, cg->enable);
 }
 
+static unsigned long ccu_gate_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	struct ccu_gate *cg = hw_to_ccu_gate(hw);
+	unsigned long rate = parent_rate;
+
+	if (cg->common.features & CCU_FEATURE_ALL_PREDIV)
+		rate /= cg->common.prediv;
+
+	return rate;
+}
+
+static long ccu_gate_round_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long *prate)
+{
+	struct ccu_gate *cg = hw_to_ccu_gate(hw);
+	int div = 1;
+
+	if (cg->common.features & CCU_FEATURE_ALL_PREDIV)
+		div = cg->common.prediv;
+
+	if (clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) {
+		unsigned long best_parent = rate;
+
+		if (cg->common.features & CCU_FEATURE_ALL_PREDIV)
+			best_parent *= div;
+		*prate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent);
+	}
+
+	return *prate / div;
+}
+
+static int ccu_gate_set_rate(struct clk_hw *hw, unsigned long rate,
+			     unsigned long parent_rate)
+{
+	/*
+	 * We must report success but we can do so unconditionally because
+	 * clk_factor_round_rate returns values that ensure this call is a
+	 * nop.
+	 */
+
+	return 0;
+}
+
 const struct clk_ops ccu_gate_ops = {
 	.disable	= ccu_gate_disable,
 	.enable		= ccu_gate_enable,
 	.is_enabled	= ccu_gate_is_enabled,
+	.round_rate	= ccu_gate_round_rate,
+	.set_rate	= ccu_gate_set_rate,
+	.recalc_rate	= ccu_gate_recalc_rate,
 };
-- 
cgit v1.2.3


From 9be1c8afb4926aee043d48493c0474c0eea56540 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.xyz>
Date: Thu, 2 Mar 2017 04:13:39 +0800
Subject: clk: sunxi-ng: add Allwinner H5 CCU support for H3 CCU driver

Allwinner H5 is a SoC that features a design which keeps the peripheral
compatible with H3, so that it have also a CCU like the one on H3 --
only one bus gate/reset is added, and the mmc sample/output phases are
removed because of MMC controller update.

Add its support in our existing H3 CCU driver.

Signed-off-by: Icenowy Zheng <icenowy@aosc.xyz>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 .../devicetree/bindings/clock/sunxi-ccu.txt        |   1 +
 drivers/clk/sunxi-ng/Kconfig                       |   2 +-
 drivers/clk/sunxi-ng/ccu-sun8i-h3.c                | 327 ++++++++++++++++++++-
 drivers/clk/sunxi-ng/ccu-sun8i-h3.h                |   3 +-
 include/dt-bindings/clock/sun8i-h3-ccu.h           |   5 +-
 include/dt-bindings/reset/sun8i-h3-ccu.h           |   5 +-
 6 files changed, 332 insertions(+), 11 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
index bae5668cf427..68512aa398a9 100644
--- a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
@@ -10,6 +10,7 @@ Required properties :
 		- "allwinner,sun8i-v3s-ccu"
 		- "allwinner,sun9i-a80-ccu"
 		- "allwinner,sun50i-a64-ccu"
+		- "allwinner,sun50i-h5-ccu"
 
 - reg: Must contain the registers base address and length
 - clocks: phandle to the oscillators feeding the CCU. Two are needed:
diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index 72109d2cf41b..a4b26640680d 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -119,7 +119,7 @@ config SUN8I_H3_CCU
 	select SUNXI_CCU_NM
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
-	default MACH_SUN8I
+	default MACH_SUN8I || (ARM64 && ARCH_SUNXI)
 
 config SUN8I_V3S_CCU
 	bool "Support for the Allwinner V3s CCU"
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
index a26c8a19fe93..4cbc1b701b7c 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
@@ -300,8 +300,10 @@ static SUNXI_CCU_GATE(bus_uart2_clk,	"bus-uart2",	"apb2",
 		      0x06c, BIT(18), 0);
 static SUNXI_CCU_GATE(bus_uart3_clk,	"bus-uart3",	"apb2",
 		      0x06c, BIT(19), 0);
-static SUNXI_CCU_GATE(bus_scr_clk,	"bus-scr",	"apb2",
+static SUNXI_CCU_GATE(bus_scr0_clk,	"bus-scr0",	"apb2",
 		      0x06c, BIT(20), 0);
+static SUNXI_CCU_GATE(bus_scr1_clk,	"bus-scr1",	"apb2",
+		      0x06c, BIT(21), 0);
 
 static SUNXI_CCU_GATE(bus_ephy_clk,	"bus-ephy",	"ahb1",
 		      0x070, BIT(0), 0);
@@ -546,7 +548,7 @@ static struct ccu_common *sun8i_h3_ccu_clks[] = {
 	&bus_uart1_clk.common,
 	&bus_uart2_clk.common,
 	&bus_uart3_clk.common,
-	&bus_scr_clk.common,
+	&bus_scr0_clk.common,
 	&bus_ephy_clk.common,
 	&bus_dbg_clk.common,
 	&ths_clk.common,
@@ -597,6 +599,114 @@ static struct ccu_common *sun8i_h3_ccu_clks[] = {
 	&gpu_clk.common,
 };
 
+static struct ccu_common *sun50i_h5_ccu_clks[] = {
+	&pll_cpux_clk.common,
+	&pll_audio_base_clk.common,
+	&pll_video_clk.common,
+	&pll_ve_clk.common,
+	&pll_ddr_clk.common,
+	&pll_periph0_clk.common,
+	&pll_gpu_clk.common,
+	&pll_periph1_clk.common,
+	&pll_de_clk.common,
+	&cpux_clk.common,
+	&axi_clk.common,
+	&ahb1_clk.common,
+	&apb1_clk.common,
+	&apb2_clk.common,
+	&ahb2_clk.common,
+	&bus_ce_clk.common,
+	&bus_dma_clk.common,
+	&bus_mmc0_clk.common,
+	&bus_mmc1_clk.common,
+	&bus_mmc2_clk.common,
+	&bus_nand_clk.common,
+	&bus_dram_clk.common,
+	&bus_emac_clk.common,
+	&bus_ts_clk.common,
+	&bus_hstimer_clk.common,
+	&bus_spi0_clk.common,
+	&bus_spi1_clk.common,
+	&bus_otg_clk.common,
+	&bus_ehci0_clk.common,
+	&bus_ehci1_clk.common,
+	&bus_ehci2_clk.common,
+	&bus_ehci3_clk.common,
+	&bus_ohci0_clk.common,
+	&bus_ohci1_clk.common,
+	&bus_ohci2_clk.common,
+	&bus_ohci3_clk.common,
+	&bus_ve_clk.common,
+	&bus_tcon0_clk.common,
+	&bus_tcon1_clk.common,
+	&bus_deinterlace_clk.common,
+	&bus_csi_clk.common,
+	&bus_tve_clk.common,
+	&bus_hdmi_clk.common,
+	&bus_de_clk.common,
+	&bus_gpu_clk.common,
+	&bus_msgbox_clk.common,
+	&bus_spinlock_clk.common,
+	&bus_codec_clk.common,
+	&bus_spdif_clk.common,
+	&bus_pio_clk.common,
+	&bus_ths_clk.common,
+	&bus_i2s0_clk.common,
+	&bus_i2s1_clk.common,
+	&bus_i2s2_clk.common,
+	&bus_i2c0_clk.common,
+	&bus_i2c1_clk.common,
+	&bus_i2c2_clk.common,
+	&bus_uart0_clk.common,
+	&bus_uart1_clk.common,
+	&bus_uart2_clk.common,
+	&bus_uart3_clk.common,
+	&bus_scr0_clk.common,
+	&bus_scr1_clk.common,
+	&bus_ephy_clk.common,
+	&bus_dbg_clk.common,
+	&ths_clk.common,
+	&nand_clk.common,
+	&mmc0_clk.common,
+	&mmc1_clk.common,
+	&mmc2_clk.common,
+	&ts_clk.common,
+	&ce_clk.common,
+	&spi0_clk.common,
+	&spi1_clk.common,
+	&i2s0_clk.common,
+	&i2s1_clk.common,
+	&i2s2_clk.common,
+	&spdif_clk.common,
+	&usb_phy0_clk.common,
+	&usb_phy1_clk.common,
+	&usb_phy2_clk.common,
+	&usb_phy3_clk.common,
+	&usb_ohci0_clk.common,
+	&usb_ohci1_clk.common,
+	&usb_ohci2_clk.common,
+	&usb_ohci3_clk.common,
+	&dram_clk.common,
+	&dram_ve_clk.common,
+	&dram_csi_clk.common,
+	&dram_deinterlace_clk.common,
+	&dram_ts_clk.common,
+	&de_clk.common,
+	&tcon_clk.common,
+	&tve_clk.common,
+	&deinterlace_clk.common,
+	&csi_misc_clk.common,
+	&csi_sclk_clk.common,
+	&csi_mclk_clk.common,
+	&ve_clk.common,
+	&ac_dig_clk.common,
+	&avs_clk.common,
+	&hdmi_clk.common,
+	&hdmi_ddc_clk.common,
+	&mbus_clk.common,
+	&gpu_clk.common,
+};
+
 /* We hardcode the divider to 4 for now */
 static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
 			"pll-audio-base", 4, 1, CLK_SET_RATE_PARENT);
@@ -677,7 +787,7 @@ static struct clk_hw_onecell_data sun8i_h3_hw_clks = {
 		[CLK_BUS_UART1]		= &bus_uart1_clk.common.hw,
 		[CLK_BUS_UART2]		= &bus_uart2_clk.common.hw,
 		[CLK_BUS_UART3]		= &bus_uart3_clk.common.hw,
-		[CLK_BUS_SCR]		= &bus_scr_clk.common.hw,
+		[CLK_BUS_SCR0]		= &bus_scr0_clk.common.hw,
 		[CLK_BUS_EPHY]		= &bus_ephy_clk.common.hw,
 		[CLK_BUS_DBG]		= &bus_dbg_clk.common.hw,
 		[CLK_THS]		= &ths_clk.common.hw,
@@ -727,7 +837,123 @@ static struct clk_hw_onecell_data sun8i_h3_hw_clks = {
 		[CLK_MBUS]		= &mbus_clk.common.hw,
 		[CLK_GPU]		= &gpu_clk.common.hw,
 	},
-	.num	= CLK_NUMBER,
+	.num	= CLK_NUMBER_H3,
+};
+
+static struct clk_hw_onecell_data sun50i_h5_hw_clks = {
+	.hws	= {
+		[CLK_PLL_CPUX]		= &pll_cpux_clk.common.hw,
+		[CLK_PLL_AUDIO_BASE]	= &pll_audio_base_clk.common.hw,
+		[CLK_PLL_AUDIO]		= &pll_audio_clk.hw,
+		[CLK_PLL_AUDIO_2X]	= &pll_audio_2x_clk.hw,
+		[CLK_PLL_AUDIO_4X]	= &pll_audio_4x_clk.hw,
+		[CLK_PLL_AUDIO_8X]	= &pll_audio_8x_clk.hw,
+		[CLK_PLL_VIDEO]		= &pll_video_clk.common.hw,
+		[CLK_PLL_VE]		= &pll_ve_clk.common.hw,
+		[CLK_PLL_DDR]		= &pll_ddr_clk.common.hw,
+		[CLK_PLL_PERIPH0]	= &pll_periph0_clk.common.hw,
+		[CLK_PLL_PERIPH0_2X]	= &pll_periph0_2x_clk.hw,
+		[CLK_PLL_GPU]		= &pll_gpu_clk.common.hw,
+		[CLK_PLL_PERIPH1]	= &pll_periph1_clk.common.hw,
+		[CLK_PLL_DE]		= &pll_de_clk.common.hw,
+		[CLK_CPUX]		= &cpux_clk.common.hw,
+		[CLK_AXI]		= &axi_clk.common.hw,
+		[CLK_AHB1]		= &ahb1_clk.common.hw,
+		[CLK_APB1]		= &apb1_clk.common.hw,
+		[CLK_APB2]		= &apb2_clk.common.hw,
+		[CLK_AHB2]		= &ahb2_clk.common.hw,
+		[CLK_BUS_CE]		= &bus_ce_clk.common.hw,
+		[CLK_BUS_DMA]		= &bus_dma_clk.common.hw,
+		[CLK_BUS_MMC0]		= &bus_mmc0_clk.common.hw,
+		[CLK_BUS_MMC1]		= &bus_mmc1_clk.common.hw,
+		[CLK_BUS_MMC2]		= &bus_mmc2_clk.common.hw,
+		[CLK_BUS_NAND]		= &bus_nand_clk.common.hw,
+		[CLK_BUS_DRAM]		= &bus_dram_clk.common.hw,
+		[CLK_BUS_EMAC]		= &bus_emac_clk.common.hw,
+		[CLK_BUS_TS]		= &bus_ts_clk.common.hw,
+		[CLK_BUS_HSTIMER]	= &bus_hstimer_clk.common.hw,
+		[CLK_BUS_SPI0]		= &bus_spi0_clk.common.hw,
+		[CLK_BUS_SPI1]		= &bus_spi1_clk.common.hw,
+		[CLK_BUS_OTG]		= &bus_otg_clk.common.hw,
+		[CLK_BUS_EHCI0]		= &bus_ehci0_clk.common.hw,
+		[CLK_BUS_EHCI1]		= &bus_ehci1_clk.common.hw,
+		[CLK_BUS_EHCI2]		= &bus_ehci2_clk.common.hw,
+		[CLK_BUS_EHCI3]		= &bus_ehci3_clk.common.hw,
+		[CLK_BUS_OHCI0]		= &bus_ohci0_clk.common.hw,
+		[CLK_BUS_OHCI1]		= &bus_ohci1_clk.common.hw,
+		[CLK_BUS_OHCI2]		= &bus_ohci2_clk.common.hw,
+		[CLK_BUS_OHCI3]		= &bus_ohci3_clk.common.hw,
+		[CLK_BUS_VE]		= &bus_ve_clk.common.hw,
+		[CLK_BUS_TCON0]		= &bus_tcon0_clk.common.hw,
+		[CLK_BUS_TCON1]		= &bus_tcon1_clk.common.hw,
+		[CLK_BUS_DEINTERLACE]	= &bus_deinterlace_clk.common.hw,
+		[CLK_BUS_CSI]		= &bus_csi_clk.common.hw,
+		[CLK_BUS_TVE]		= &bus_tve_clk.common.hw,
+		[CLK_BUS_HDMI]		= &bus_hdmi_clk.common.hw,
+		[CLK_BUS_DE]		= &bus_de_clk.common.hw,
+		[CLK_BUS_GPU]		= &bus_gpu_clk.common.hw,
+		[CLK_BUS_MSGBOX]	= &bus_msgbox_clk.common.hw,
+		[CLK_BUS_SPINLOCK]	= &bus_spinlock_clk.common.hw,
+		[CLK_BUS_CODEC]		= &bus_codec_clk.common.hw,
+		[CLK_BUS_SPDIF]		= &bus_spdif_clk.common.hw,
+		[CLK_BUS_PIO]		= &bus_pio_clk.common.hw,
+		[CLK_BUS_THS]		= &bus_ths_clk.common.hw,
+		[CLK_BUS_I2S0]		= &bus_i2s0_clk.common.hw,
+		[CLK_BUS_I2S1]		= &bus_i2s1_clk.common.hw,
+		[CLK_BUS_I2S2]		= &bus_i2s2_clk.common.hw,
+		[CLK_BUS_I2C0]		= &bus_i2c0_clk.common.hw,
+		[CLK_BUS_I2C1]		= &bus_i2c1_clk.common.hw,
+		[CLK_BUS_I2C2]		= &bus_i2c2_clk.common.hw,
+		[CLK_BUS_UART0]		= &bus_uart0_clk.common.hw,
+		[CLK_BUS_UART1]		= &bus_uart1_clk.common.hw,
+		[CLK_BUS_UART2]		= &bus_uart2_clk.common.hw,
+		[CLK_BUS_UART3]		= &bus_uart3_clk.common.hw,
+		[CLK_BUS_SCR0]		= &bus_scr0_clk.common.hw,
+		[CLK_BUS_SCR1]		= &bus_scr1_clk.common.hw,
+		[CLK_BUS_EPHY]		= &bus_ephy_clk.common.hw,
+		[CLK_BUS_DBG]		= &bus_dbg_clk.common.hw,
+		[CLK_THS]		= &ths_clk.common.hw,
+		[CLK_NAND]		= &nand_clk.common.hw,
+		[CLK_MMC0]		= &mmc0_clk.common.hw,
+		[CLK_MMC1]		= &mmc1_clk.common.hw,
+		[CLK_MMC2]		= &mmc2_clk.common.hw,
+		[CLK_TS]		= &ts_clk.common.hw,
+		[CLK_CE]		= &ce_clk.common.hw,
+		[CLK_SPI0]		= &spi0_clk.common.hw,
+		[CLK_SPI1]		= &spi1_clk.common.hw,
+		[CLK_I2S0]		= &i2s0_clk.common.hw,
+		[CLK_I2S1]		= &i2s1_clk.common.hw,
+		[CLK_I2S2]		= &i2s2_clk.common.hw,
+		[CLK_SPDIF]		= &spdif_clk.common.hw,
+		[CLK_USB_PHY0]		= &usb_phy0_clk.common.hw,
+		[CLK_USB_PHY1]		= &usb_phy1_clk.common.hw,
+		[CLK_USB_PHY2]		= &usb_phy2_clk.common.hw,
+		[CLK_USB_PHY3]		= &usb_phy3_clk.common.hw,
+		[CLK_USB_OHCI0]		= &usb_ohci0_clk.common.hw,
+		[CLK_USB_OHCI1]		= &usb_ohci1_clk.common.hw,
+		[CLK_USB_OHCI2]		= &usb_ohci2_clk.common.hw,
+		[CLK_USB_OHCI3]		= &usb_ohci3_clk.common.hw,
+		[CLK_DRAM]		= &dram_clk.common.hw,
+		[CLK_DRAM_VE]		= &dram_ve_clk.common.hw,
+		[CLK_DRAM_CSI]		= &dram_csi_clk.common.hw,
+		[CLK_DRAM_DEINTERLACE]	= &dram_deinterlace_clk.common.hw,
+		[CLK_DRAM_TS]		= &dram_ts_clk.common.hw,
+		[CLK_DE]		= &de_clk.common.hw,
+		[CLK_TCON0]		= &tcon_clk.common.hw,
+		[CLK_TVE]		= &tve_clk.common.hw,
+		[CLK_DEINTERLACE]	= &deinterlace_clk.common.hw,
+		[CLK_CSI_MISC]		= &csi_misc_clk.common.hw,
+		[CLK_CSI_SCLK]		= &csi_sclk_clk.common.hw,
+		[CLK_CSI_MCLK]		= &csi_mclk_clk.common.hw,
+		[CLK_VE]		= &ve_clk.common.hw,
+		[CLK_AC_DIG]		= &ac_dig_clk.common.hw,
+		[CLK_AVS]		= &avs_clk.common.hw,
+		[CLK_HDMI]		= &hdmi_clk.common.hw,
+		[CLK_HDMI_DDC]		= &hdmi_ddc_clk.common.hw,
+		[CLK_MBUS]		= &mbus_clk.common.hw,
+		[CLK_GPU]		= &gpu_clk.common.hw,
+	},
+	.num	= CLK_NUMBER_H5,
 };
 
 static struct ccu_reset_map sun8i_h3_ccu_resets[] = {
@@ -790,7 +1016,71 @@ static struct ccu_reset_map sun8i_h3_ccu_resets[] = {
 	[RST_BUS_UART1]		=  { 0x2d8, BIT(17) },
 	[RST_BUS_UART2]		=  { 0x2d8, BIT(18) },
 	[RST_BUS_UART3]		=  { 0x2d8, BIT(19) },
-	[RST_BUS_SCR]		=  { 0x2d8, BIT(20) },
+	[RST_BUS_SCR0]		=  { 0x2d8, BIT(20) },
+};
+
+static struct ccu_reset_map sun50i_h5_ccu_resets[] = {
+	[RST_USB_PHY0]		=  { 0x0cc, BIT(0) },
+	[RST_USB_PHY1]		=  { 0x0cc, BIT(1) },
+	[RST_USB_PHY2]		=  { 0x0cc, BIT(2) },
+	[RST_USB_PHY3]		=  { 0x0cc, BIT(3) },
+
+	[RST_MBUS]		=  { 0x0fc, BIT(31) },
+
+	[RST_BUS_CE]		=  { 0x2c0, BIT(5) },
+	[RST_BUS_DMA]		=  { 0x2c0, BIT(6) },
+	[RST_BUS_MMC0]		=  { 0x2c0, BIT(8) },
+	[RST_BUS_MMC1]		=  { 0x2c0, BIT(9) },
+	[RST_BUS_MMC2]		=  { 0x2c0, BIT(10) },
+	[RST_BUS_NAND]		=  { 0x2c0, BIT(13) },
+	[RST_BUS_DRAM]		=  { 0x2c0, BIT(14) },
+	[RST_BUS_EMAC]		=  { 0x2c0, BIT(17) },
+	[RST_BUS_TS]		=  { 0x2c0, BIT(18) },
+	[RST_BUS_HSTIMER]	=  { 0x2c0, BIT(19) },
+	[RST_BUS_SPI0]		=  { 0x2c0, BIT(20) },
+	[RST_BUS_SPI1]		=  { 0x2c0, BIT(21) },
+	[RST_BUS_OTG]		=  { 0x2c0, BIT(23) },
+	[RST_BUS_EHCI0]		=  { 0x2c0, BIT(24) },
+	[RST_BUS_EHCI1]		=  { 0x2c0, BIT(25) },
+	[RST_BUS_EHCI2]		=  { 0x2c0, BIT(26) },
+	[RST_BUS_EHCI3]		=  { 0x2c0, BIT(27) },
+	[RST_BUS_OHCI0]		=  { 0x2c0, BIT(28) },
+	[RST_BUS_OHCI1]		=  { 0x2c0, BIT(29) },
+	[RST_BUS_OHCI2]		=  { 0x2c0, BIT(30) },
+	[RST_BUS_OHCI3]		=  { 0x2c0, BIT(31) },
+
+	[RST_BUS_VE]		=  { 0x2c4, BIT(0) },
+	[RST_BUS_TCON0]		=  { 0x2c4, BIT(3) },
+	[RST_BUS_TCON1]		=  { 0x2c4, BIT(4) },
+	[RST_BUS_DEINTERLACE]	=  { 0x2c4, BIT(5) },
+	[RST_BUS_CSI]		=  { 0x2c4, BIT(8) },
+	[RST_BUS_TVE]		=  { 0x2c4, BIT(9) },
+	[RST_BUS_HDMI0]		=  { 0x2c4, BIT(10) },
+	[RST_BUS_HDMI1]		=  { 0x2c4, BIT(11) },
+	[RST_BUS_DE]		=  { 0x2c4, BIT(12) },
+	[RST_BUS_GPU]		=  { 0x2c4, BIT(20) },
+	[RST_BUS_MSGBOX]	=  { 0x2c4, BIT(21) },
+	[RST_BUS_SPINLOCK]	=  { 0x2c4, BIT(22) },
+	[RST_BUS_DBG]		=  { 0x2c4, BIT(31) },
+
+	[RST_BUS_EPHY]		=  { 0x2c8, BIT(2) },
+
+	[RST_BUS_CODEC]		=  { 0x2d0, BIT(0) },
+	[RST_BUS_SPDIF]		=  { 0x2d0, BIT(1) },
+	[RST_BUS_THS]		=  { 0x2d0, BIT(8) },
+	[RST_BUS_I2S0]		=  { 0x2d0, BIT(12) },
+	[RST_BUS_I2S1]		=  { 0x2d0, BIT(13) },
+	[RST_BUS_I2S2]		=  { 0x2d0, BIT(14) },
+
+	[RST_BUS_I2C0]		=  { 0x2d8, BIT(0) },
+	[RST_BUS_I2C1]		=  { 0x2d8, BIT(1) },
+	[RST_BUS_I2C2]		=  { 0x2d8, BIT(2) },
+	[RST_BUS_UART0]		=  { 0x2d8, BIT(16) },
+	[RST_BUS_UART1]		=  { 0x2d8, BIT(17) },
+	[RST_BUS_UART2]		=  { 0x2d8, BIT(18) },
+	[RST_BUS_UART3]		=  { 0x2d8, BIT(19) },
+	[RST_BUS_SCR0]		=  { 0x2d8, BIT(20) },
+	[RST_BUS_SCR1]		=  { 0x2d8, BIT(20) },
 };
 
 static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = {
@@ -803,6 +1093,16 @@ static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = {
 	.num_resets	= ARRAY_SIZE(sun8i_h3_ccu_resets),
 };
 
+static const struct sunxi_ccu_desc sun50i_h5_ccu_desc = {
+	.ccu_clks	= sun50i_h5_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun50i_h5_ccu_clks),
+
+	.hw_clks	= &sun50i_h5_hw_clks,
+
+	.resets		= sun50i_h5_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun50i_h5_ccu_resets),
+};
+
 static struct ccu_mux_nb sun8i_h3_cpu_nb = {
 	.common		= &cpux_clk.common,
 	.cm		= &cpux_clk.mux,
@@ -810,7 +1110,8 @@ static struct ccu_mux_nb sun8i_h3_cpu_nb = {
 	.bypass_index	= 1, /* index of 24 MHz oscillator */
 };
 
-static void __init sun8i_h3_ccu_setup(struct device_node *node)
+static void __init sunxi_h3_h5_ccu_init(struct device_node *node,
+					const struct sunxi_ccu_desc *desc)
 {
 	void __iomem *reg;
 	u32 val;
@@ -827,10 +1128,22 @@ static void __init sun8i_h3_ccu_setup(struct device_node *node)
 	val &= ~GENMASK(19, 16);
 	writel(val | (3 << 16), reg + SUN8I_H3_PLL_AUDIO_REG);
 
-	sunxi_ccu_probe(node, reg, &sun8i_h3_ccu_desc);
+	sunxi_ccu_probe(node, reg, desc);
 
 	ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk,
 				  &sun8i_h3_cpu_nb);
 }
+
+static void __init sun8i_h3_ccu_setup(struct device_node *node)
+{
+	sunxi_h3_h5_ccu_init(node, &sun8i_h3_ccu_desc);
+}
 CLK_OF_DECLARE(sun8i_h3_ccu, "allwinner,sun8i-h3-ccu",
 	       sun8i_h3_ccu_setup);
+
+static void __init sun50i_h5_ccu_setup(struct device_node *node)
+{
+	sunxi_h3_h5_ccu_init(node, &sun50i_h5_ccu_desc);
+}
+CLK_OF_DECLARE(sun50i_h5_ccu, "allwinner,sun50i-h5-ccu",
+	       sun50i_h5_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
index 78be712c7487..85973d1e8165 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
@@ -57,6 +57,7 @@
 
 /* And the GPU module clock is exported */
 
-#define CLK_NUMBER		(CLK_GPU + 1)
+#define CLK_NUMBER_H3		(CLK_GPU + 1)
+#define CLK_NUMBER_H5		(CLK_BUS_SCR1 + 1)
 
 #endif /* _CCU_SUN8I_H3_H_ */
diff --git a/include/dt-bindings/clock/sun8i-h3-ccu.h b/include/dt-bindings/clock/sun8i-h3-ccu.h
index efb7ba2bd515..c2afc41d6964 100644
--- a/include/dt-bindings/clock/sun8i-h3-ccu.h
+++ b/include/dt-bindings/clock/sun8i-h3-ccu.h
@@ -91,7 +91,7 @@
 #define CLK_BUS_UART1		63
 #define CLK_BUS_UART2		64
 #define CLK_BUS_UART3		65
-#define CLK_BUS_SCR		66
+#define CLK_BUS_SCR0		66
 #define CLK_BUS_EPHY		67
 #define CLK_BUS_DBG		68
 
@@ -142,4 +142,7 @@
 
 #define CLK_GPU			114
 
+/* New clocks imported in H5 */
+#define CLK_BUS_SCR1		115
+
 #endif /* _DT_BINDINGS_CLK_SUN8I_H3_H_ */
diff --git a/include/dt-bindings/reset/sun8i-h3-ccu.h b/include/dt-bindings/reset/sun8i-h3-ccu.h
index 6b7af80c26ec..484c2a22919d 100644
--- a/include/dt-bindings/reset/sun8i-h3-ccu.h
+++ b/include/dt-bindings/reset/sun8i-h3-ccu.h
@@ -98,6 +98,9 @@
 #define RST_BUS_UART1		50
 #define RST_BUS_UART2		51
 #define RST_BUS_UART3		52
-#define RST_BUS_SCR		53
+#define RST_BUS_SCR0		53
+
+/* New resets imported in H5 */
+#define RST_BUS_SCR1		54
 
 #endif /* _DT_BINDINGS_RST_SUN8I_H3_H_ */
-- 
cgit v1.2.3


From 05c04bef445d695917fe74422e05352b1b46f3c8 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Thu, 2 Mar 2017 17:43:57 +0000
Subject: clk: sunxi-ng: tighten SoC deps on explicit AllWinner SoCs

Tighten the depends on the various AllWinn SoCs so we don't
inadvertantly get clock drivers when we're not wanting them
like 32 bit SoC clocks for 64 bit configs. Ensure there's
still test coverage though.

Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/Kconfig | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index a4b26640680d..213cf64e4fab 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -63,6 +63,7 @@ config SUN50I_A64_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default ARM64 && ARCH_SUNXI
+	depends on (ARM64 && ARCH_SUNXI) || COMPILE_TEST
 
 config SUN5I_CCU
 	bool "Support for the Allwinner sun5i family CCM"
@@ -74,6 +75,7 @@ config SUN5I_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN5I
+	depends on MACH_SUN5I || COMPILE_TEST
 
 config SUN6I_A31_CCU
 	bool "Support for the Allwinner A31/A31s CCU"
@@ -85,6 +87,7 @@ config SUN6I_A31_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN6I
+	depends on MACH_SUN6I || COMPILE_TEST
 
 config SUN8I_A23_CCU
 	bool "Support for the Allwinner A23 CCU"
@@ -97,6 +100,7 @@ config SUN8I_A23_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN8I
+	depends on MACH_SUN8I || COMPILE_TEST
 
 config SUN8I_A33_CCU
 	bool "Support for the Allwinner A33 CCU"
@@ -109,6 +113,7 @@ config SUN8I_A33_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN8I
+	depends on MACH_SUN8I || COMPILE_TEST
 
 config SUN8I_H3_CCU
 	bool "Support for the Allwinner H3 CCU"
@@ -120,6 +125,7 @@ config SUN8I_H3_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN8I || (ARM64 && ARCH_SUNXI)
+	depends on MACH_SUN8I || (ARM64 && ARCH_SUNXI) || COMPILE_TEST
 
 config SUN8I_V3S_CCU
 	bool "Support for the Allwinner V3s CCU"
@@ -131,6 +137,7 @@ config SUN8I_V3S_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN8I
+	depends on MACH_SUN8I || COMPILE_TEST
 
 config SUN9I_A80_CCU
 	bool "Support for the Allwinner A80 CCU"
@@ -141,5 +148,6 @@ config SUN9I_A80_CCU
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE
 	default MACH_SUN9I
+	depends on MACH_SUN9I || COMPILE_TEST
 
 endif
-- 
cgit v1.2.3


From b0f0daa8fe9e74b85f6360288d38224ad1c2f2f4 Mon Sep 17 00:00:00 2001
From: Priit Laes <plaes@plaes.org>
Date: Thu, 2 Mar 2017 22:55:27 +0200
Subject: clk: sunxi-ng: sun5i: Fix mux width for csi clock

Mux for CSI clock is 3 bits, not 2.

Signed-off-by: Priit Laes <plaes@plaes.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun5i.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.c b/drivers/clk/sunxi-ng/ccu-sun5i.c
index 06edaa523479..5c476f966a72 100644
--- a/drivers/clk/sunxi-ng/ccu-sun5i.c
+++ b/drivers/clk/sunxi-ng/ccu-sun5i.c
@@ -469,7 +469,7 @@ static const char * const csi_parents[] = { "hosc", "pll-video0", "pll-video1",
 static const u8 csi_table[] = { 0, 1, 2, 5, 6 };
 static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(csi_clk, "csi",
 				       csi_parents, csi_table,
-				       0x134, 0, 5, 24, 2, BIT(31), 0);
+				       0x134, 0, 5, 24, 3, BIT(31), 0);
 
 static SUNXI_CCU_GATE(ve_clk,		"ve",		"pll-ve",
 		      0x13c, BIT(31), CLK_SET_RATE_PARENT);
-- 
cgit v1.2.3


From 7d0a7c7bdf7f849ff864a539f1c0c20b8a053f2e Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Tue, 14 Feb 2017 00:06:04 +0300
Subject: clk: renesas: r8a7795: Add IMR clocks

Add the IMR[0-3] clocks to the R8A7795 CPG/MSSR driver.

Based on the original (and large) patch by Konstantin Kozhevnikov
<Konstantin.Kozhevnikov@cogentembedded.com>.

Signed-off-by: Konstantin Kozhevnikov <Konstantin.Kozhevnikov@cogentembedded.com>
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/r8a7795-cpg-mssr.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
index bfffdb00df97..2add8218e0f7 100644
--- a/drivers/clk/renesas/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
@@ -208,6 +208,10 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("vin0",			 811,	R8A7795_CLK_S2D1),
 	DEF_MOD("etheravb",		 812,	R8A7795_CLK_S3D2),
 	DEF_MOD("sata0",		 815,	R8A7795_CLK_S3D2),
+	DEF_MOD("imr3",			 820,	R8A7795_CLK_S2D1),
+	DEF_MOD("imr2",			 821,	R8A7795_CLK_S2D1),
+	DEF_MOD("imr1",			 822,	R8A7795_CLK_S2D1),
+	DEF_MOD("imr0",			 823,	R8A7795_CLK_S2D1),
 	DEF_MOD("gpio7",		 905,	R8A7795_CLK_CP),
 	DEF_MOD("gpio6",		 906,	R8A7795_CLK_CP),
 	DEF_MOD("gpio5",		 907,	R8A7795_CLK_CP),
-- 
cgit v1.2.3


From 6c8a9312946374947287ac1bd3b94aba850a5d1f Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sun, 19 Feb 2017 00:39:26 +0300
Subject: clk: renesas: r8a7796: Add IMR clocks

Add the IMR[0-1] clocks to the R8A7796 CPG/MSSR driver.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
[geert: Correct parent clocks]
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/r8a7796-cpg-mssr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/clk/renesas/r8a7796-cpg-mssr.c b/drivers/clk/renesas/r8a7796-cpg-mssr.c
index 11e084a56b0d..12a23c18bc1e 100644
--- a/drivers/clk/renesas/r8a7796-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7796-cpg-mssr.c
@@ -179,6 +179,8 @@ static const struct mssr_mod_clk r8a7796_mod_clks[] __initconst = {
 	DEF_MOD("vin1",			 810,	R8A7796_CLK_S0D2),
 	DEF_MOD("vin0",			 811,	R8A7796_CLK_S0D2),
 	DEF_MOD("etheravb",		 812,	R8A7796_CLK_S0D6),
+	DEF_MOD("imr1",			 822,	R8A7796_CLK_S0D2),
+	DEF_MOD("imr0",			 823,	R8A7796_CLK_S0D2),
 	DEF_MOD("gpio7",		 905,	R8A7796_CLK_S3D4),
 	DEF_MOD("gpio6",		 906,	R8A7796_CLK_S3D4),
 	DEF_MOD("gpio5",		 907,	R8A7796_CLK_S3D4),
-- 
cgit v1.2.3


From 6ee60246ac684407a062ccfa4404d6b6e8e7d7c7 Mon Sep 17 00:00:00 2001
From: Alexandre Bailon <abailon@baylibre.com>
Date: Wed, 15 Feb 2017 14:56:32 +0100
Subject: dmaengine: cppi41: Remove usbss_mem

In order to make CPPI 4.1 DMA driver more generic, accesses to USBSS
have been removed. So it is not required anymore to map the "glue"
register's.
Remove usbss_mem.

Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index d74cee077842..34a8156eef06 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -131,7 +131,6 @@ struct cppi41_dd {
 	u32 first_td_desc;
 	struct cppi41_channel *chan_busy[ALLOC_DECS_NUM];
 
-	void __iomem *usbss_mem;
 	void __iomem *ctrl_mem;
 	void __iomem *sched_mem;
 	void __iomem *qmgr_mem;
@@ -995,6 +994,7 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	struct cppi41_dd *cdd;
 	struct device *dev = &pdev->dev;
 	const struct cppi_glue_infos *glue_info;
+	int index;
 	int irq;
 	int ret;
 
@@ -1021,16 +1021,20 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&cdd->ddev.channels);
 	cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
 
-	cdd->usbss_mem = of_iomap(dev->of_node, 0);
-	cdd->ctrl_mem = of_iomap(dev->of_node, 1);
-	cdd->sched_mem = of_iomap(dev->of_node, 2);
-	cdd->qmgr_mem = of_iomap(dev->of_node, 3);
+	index = of_property_match_string(dev->of_node,
+					 "reg-names", "controller");
+	if (index < 0)
+		return index;
+
+	cdd->ctrl_mem = of_iomap(dev->of_node, index);
+	cdd->sched_mem = of_iomap(dev->of_node, index + 1);
+	cdd->qmgr_mem = of_iomap(dev->of_node, index + 2);
 	spin_lock_init(&cdd->lock);
 	INIT_LIST_HEAD(&cdd->pending);
 
 	platform_set_drvdata(pdev, cdd);
 
-	if (!cdd->usbss_mem || !cdd->ctrl_mem || !cdd->sched_mem ||
+	if (!cdd->ctrl_mem || !cdd->sched_mem ||
 			!cdd->qmgr_mem)
 		return -ENXIO;
 
@@ -1090,7 +1094,6 @@ err_init_cppi:
 err_get_sync:
 	pm_runtime_put_sync(dev);
 	pm_runtime_disable(dev);
-	iounmap(cdd->usbss_mem);
 	iounmap(cdd->ctrl_mem);
 	iounmap(cdd->sched_mem);
 	iounmap(cdd->qmgr_mem);
@@ -1112,7 +1115,6 @@ static int cppi41_dma_remove(struct platform_device *pdev)
 	devm_free_irq(&pdev->dev, cdd->irq, cdd);
 	cleanup_chans(cdd);
 	deinit_cppi41(&pdev->dev, cdd);
-	iounmap(cdd->usbss_mem);
 	iounmap(cdd->ctrl_mem);
 	iounmap(cdd->sched_mem);
 	iounmap(cdd->qmgr_mem);
-- 
cgit v1.2.3


From e1f40bf02d1f8b8af42b3fad5151bcd3449e085f Mon Sep 17 00:00:00 2001
From: Alexandre Bailon <abailon@baylibre.com>
Date: Wed, 15 Feb 2017 14:56:33 +0100
Subject: dmaengine: cppi41: rename platform variables

Currently, only the AM335x is supported by the driver.
Though the driver has a glue layer to support different platforms,
some platform variable names are not prefixed with the platform name.
To facilitate the addition of a new platform,
rename some variables owned by the AM335x glue.

Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 34a8156eef06..d1b5569ef420 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -149,7 +149,7 @@ struct cppi41_dd {
 };
 
 #define FIST_COMPLETION_QUEUE	93
-static struct chan_queues usb_queues_tx[] = {
+static struct chan_queues am335x_usb_queues_tx[] = {
 	/* USB0 ENDP 1 */
 	[ 0] = { .submit = 32, .complete =  93},
 	[ 1] = { .submit = 34, .complete =  94},
@@ -185,7 +185,7 @@ static struct chan_queues usb_queues_tx[] = {
 	[29] = { .submit = 90, .complete = 139},
 };
 
-static const struct chan_queues usb_queues_rx[] = {
+static const struct chan_queues am335x_usb_queues_rx[] = {
 	/* USB0 ENDP 1 */
 	[ 0] = { .submit =  1, .complete = 109},
 	[ 1] = { .submit =  2, .complete = 110},
@@ -932,8 +932,9 @@ static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param)
 	else
 		queues = cdd->queues_rx;
 
-	BUILD_BUG_ON(ARRAY_SIZE(usb_queues_rx) != ARRAY_SIZE(usb_queues_tx));
-	if (WARN_ON(cchan->port_num > ARRAY_SIZE(usb_queues_rx)))
+	BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) !=
+		     ARRAY_SIZE(am335x_usb_queues_tx));
+	if (WARN_ON(cchan->port_num > ARRAY_SIZE(am335x_usb_queues_rx)))
 		return false;
 
 	cchan->q_num = queues[cchan->port_num].submit;
@@ -961,15 +962,15 @@ static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec,
 			&dma_spec->args[0]);
 }
 
-static const struct cppi_glue_infos usb_infos = {
+static const struct cppi_glue_infos am335x_usb_infos = {
 	.isr = cppi41_irq,
-	.queues_rx = usb_queues_rx,
-	.queues_tx = usb_queues_tx,
+	.queues_rx = am335x_usb_queues_rx,
+	.queues_tx = am335x_usb_queues_tx,
 	.td_queue = { .submit = 31, .complete = 0 },
 };
 
 static const struct of_device_id cppi41_dma_ids[] = {
-	{ .compatible = "ti,am3359-cppi41", .data = &usb_infos},
+	{ .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos},
 	{},
 };
 MODULE_DEVICE_TABLE(of, cppi41_dma_ids);
-- 
cgit v1.2.3


From 2d535b2629bfdf30c972ea394669bd146f752091 Mon Sep 17 00:00:00 2001
From: Alexandre Bailon <abailon@baylibre.com>
Date: Wed, 15 Feb 2017 14:56:34 +0100
Subject: dmaengine: cppi41: Move some constants to glue layer

Some constants are defined and use by the driver whereas they are
specifics to AM335x.
Add new variables to the glue layer, initialize them with the constants,
and use them in the driver.

Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index d1b5569ef420..676228ff71de 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -68,7 +68,6 @@
 #define QMGR_MEMCTRL_IDX_SH	16
 #define QMGR_MEMCTRL_DESC_SH	8
 
-#define QMGR_NUM_PEND	5
 #define QMGR_PEND(x)	(0x90 + (x) * 4)
 
 #define QMGR_PENDING_SLOT_Q(x)	(x / 32)
@@ -138,6 +137,8 @@ struct cppi41_dd {
 	const struct chan_queues *queues_rx;
 	const struct chan_queues *queues_tx;
 	struct chan_queues td_queue;
+	u16 first_completion_queue;
+	u16 qmgr_num_pend;
 
 	struct list_head pending;	/* Pending queued transfers */
 	spinlock_t lock;		/* Lock for pending list */
@@ -148,7 +149,6 @@ struct cppi41_dd {
 	bool is_suspended;
 };
 
-#define FIST_COMPLETION_QUEUE	93
 static struct chan_queues am335x_usb_queues_tx[] = {
 	/* USB0 ENDP 1 */
 	[ 0] = { .submit = 32, .complete =  93},
@@ -226,6 +226,8 @@ struct cppi_glue_infos {
 	const struct chan_queues *queues_rx;
 	const struct chan_queues *queues_tx;
 	struct chan_queues td_queue;
+	u16 first_completion_queue;
+	u16 qmgr_num_pend;
 };
 
 static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c)
@@ -284,19 +286,21 @@ static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
 static irqreturn_t cppi41_irq(int irq, void *data)
 {
 	struct cppi41_dd *cdd = data;
+	u16 first_completion_queue = cdd->first_completion_queue;
+	u16 qmgr_num_pend = cdd->qmgr_num_pend;
 	struct cppi41_channel *c;
 	int i;
 
-	for (i = QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE); i < QMGR_NUM_PEND;
+	for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend;
 			i++) {
 		u32 val;
 		u32 q_num;
 
 		val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i));
-		if (i == QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE) && val) {
+		if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) {
 			u32 mask;
 			/* set corresponding bit for completetion Q 93 */
-			mask = 1 << QMGR_PENDING_BIT_Q(FIST_COMPLETION_QUEUE);
+			mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue);
 			/* not set all bits for queues less than Q 93 */
 			mask--;
 			/* now invert and keep only Q 93+ set */
@@ -884,7 +888,7 @@ static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
 		return -ENOMEM;
 
 	cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
-	cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
+	cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE);
 	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
 
 	ret = init_descs(dev, cdd);
@@ -967,6 +971,8 @@ static const struct cppi_glue_infos am335x_usb_infos = {
 	.queues_rx = am335x_usb_queues_rx,
 	.queues_tx = am335x_usb_queues_tx,
 	.td_queue = { .submit = 31, .complete = 0 },
+	.first_completion_queue = 93,
+	.qmgr_num_pend = 5,
 };
 
 static const struct of_device_id cppi41_dma_ids[] = {
@@ -1049,6 +1055,8 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	cdd->queues_rx = glue_info->queues_rx;
 	cdd->queues_tx = glue_info->queues_tx;
 	cdd->td_queue = glue_info->td_queue;
+	cdd->qmgr_num_pend = glue_info->qmgr_num_pend;
+	cdd->first_completion_queue = glue_info->first_completion_queue;
 
 	ret = init_cppi41(dev, cdd);
 	if (ret)
-- 
cgit v1.2.3


From 5e46fe98ab658eab817398abd895908854bb4c43 Mon Sep 17 00:00:00 2001
From: Alexandre Bailon <abailon@baylibre.com>
Date: Wed, 15 Feb 2017 14:56:35 +0100
Subject: dmaengine: cppi41: init_sched(): Get number of channels from DT

Despite the driver is already using DT to get the number of channels,
init_sched() is using an hardcoded value to get it.
Use DT to get the number of channels.

Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 676228ff71de..54337cd0afcc 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -139,6 +139,8 @@ struct cppi41_dd {
 	struct chan_queues td_queue;
 	u16 first_completion_queue;
 	u16 qmgr_num_pend;
+	u32 n_chans;
+	u8 platform;
 
 	struct list_head pending;	/* Pending queued transfers */
 	spinlock_t lock;		/* Lock for pending list */
@@ -742,13 +744,8 @@ static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
 {
 	struct cppi41_channel *cchan;
 	int i;
-	int ret;
-	u32 n_chans;
+	u32 n_chans = cdd->n_chans;
 
-	ret = of_property_read_u32(dev->of_node, "#dma-channels",
-			&n_chans);
-	if (ret)
-		return ret;
 	/*
 	 * The channels can only be used as TX or as RX. So we add twice
 	 * that much dma channels because USB can only do RX or TX.
@@ -862,7 +859,7 @@ static void init_sched(struct cppi41_dd *cdd)
 
 	word = 0;
 	cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
-	for (ch = 0; ch < 15 * 2; ch += 2) {
+	for (ch = 0; ch < cdd->n_chans; ch += 2) {
 
 		reg = SCHED_ENTRY0_CHAN(ch);
 		reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX;
@@ -872,7 +869,7 @@ static void init_sched(struct cppi41_dd *cdd)
 		cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word));
 		word++;
 	}
-	reg = 15 * 2 * 2 - 1;
+	reg = cdd->n_chans * 2 - 1;
 	reg |= DMA_SCHED_CTRL_EN;
 	cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL);
 }
@@ -897,6 +894,7 @@ static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
 
 	cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ);
 	init_sched(cdd);
+
 	return 0;
 err_td:
 	deinit_cppi41(dev, cdd);
@@ -1058,6 +1056,11 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	cdd->qmgr_num_pend = glue_info->qmgr_num_pend;
 	cdd->first_completion_queue = glue_info->first_completion_queue;
 
+	ret = of_property_read_u32(dev->of_node,
+				   "#dma-channels", &cdd->n_chans);
+	if (ret)
+		goto err_get_n_chans;
+
 	ret = init_cppi41(dev, cdd);
 	if (ret)
 		goto err_init_cppi;
@@ -1100,6 +1103,7 @@ err_chans:
 	deinit_cppi41(dev, cdd);
 err_init_cppi:
 	pm_runtime_dont_use_autosuspend(dev);
+err_get_n_chans:
 err_get_sync:
 	pm_runtime_put_sync(dev);
 	pm_runtime_disable(dev);
-- 
cgit v1.2.3


From a15382b75b903fa918a3a3c4d3b87e8061790312 Mon Sep 17 00:00:00 2001
From: Alexandre Bailon <abailon@baylibre.com>
Date: Wed, 15 Feb 2017 14:56:36 +0100
Subject: dmaengine: cppi41: Remove isr callback from glue layer

All the platform code to manage IRQ has been moved to MUSB,
and now the interrupt handler is completely generic.
Remove the isr callback that is not useful anymore.

Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 54337cd0afcc..4573080e74c7 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -224,7 +224,6 @@ static const struct chan_queues am335x_usb_queues_rx[] = {
 };
 
 struct cppi_glue_infos {
-	irqreturn_t (*isr)(int irq, void *data);
 	const struct chan_queues *queues_rx;
 	const struct chan_queues *queues_tx;
 	struct chan_queues td_queue;
@@ -965,7 +964,6 @@ static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec,
 }
 
 static const struct cppi_glue_infos am335x_usb_infos = {
-	.isr = cppi41_irq,
 	.queues_rx = am335x_usb_queues_rx,
 	.queues_tx = am335x_usb_queues_tx,
 	.td_queue = { .submit = 31, .complete = 0 },
@@ -1075,7 +1073,7 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 
-	ret = devm_request_irq(&pdev->dev, irq, glue_info->isr, IRQF_SHARED,
+	ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED,
 			dev_name(dev), cdd);
 	if (ret)
 		goto err_irq;
-- 
cgit v1.2.3


From d567206e478f744f90b52f80b85debc3377ea5a0 Mon Sep 17 00:00:00 2001
From: Alexandre Bailon <abailon@baylibre.com>
Date: Mon, 30 Jan 2017 18:49:19 +0100
Subject: dt/bindings: da8xx-usb: Add binding for the CPPI 4.1 DMA controller

DT binding for the TI DA8xx/OMAP-L1x/AM17xx/AM18xx CPPI 4.1 DMA controller.

Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 .../devicetree/bindings/usb/da8xx-usb.txt          | 41 +++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/usb/da8xx-usb.txt b/Documentation/devicetree/bindings/usb/da8xx-usb.txt
index ccb844aba7d4..717c5f656237 100644
--- a/Documentation/devicetree/bindings/usb/da8xx-usb.txt
+++ b/Documentation/devicetree/bindings/usb/da8xx-usb.txt
@@ -18,10 +18,26 @@ Required properties:
 
  - phy-names: Should be "usb-phy"
 
+ - dmas: specifies the dma channels
+
+ - dma-names: specifies the names of the channels. Use "rxN" for receive
+   and "txN" for transmit endpoints. N specifies the endpoint number.
+
 Optional properties:
 ~~~~~~~~~~~~~~~~~~~~
  - vbus-supply: Phandle to a regulator providing the USB bus power.
 
+DMA
+~~~
+- compatible: ti,da830-cppi41
+- reg: offset and length of the following register spaces: CPPI DMA Controller,
+  CPPI DMA Scheduler, Queue Manager
+- reg-names: "controller", "scheduler", "queuemgr"
+- #dma-cells: should be set to 2. The first number represents the
+  channel number (0 … 3 for endpoints 1 … 4).
+  The second number is 0 for RX and 1 for TX transfers.
+- #dma-channels: should be set to 4 representing the 4 endpoints.
+
 Example:
 	usb_phy: usb-phy {
 		compatible = "ti,da830-usb-phy";
@@ -30,7 +46,10 @@ Example:
 	};
 	usb0: usb@200000 {
 		compatible = "ti,da830-musb";
-		reg =   <0x00200000 0x10000>;
+		reg = <0x00200000 0x1000>;
+		ranges;
+		#address-cells = <1>;
+		#size-cells = <1>;
 		interrupts = <58>;
 		interrupt-names = "mc";
 
@@ -39,5 +58,25 @@ Example:
 		phys = <&usb_phy 0>;
 		phy-names = "usb-phy";
 
+		dmas = <&cppi41dma 0 0 &cppi41dma 1 0
+			&cppi41dma 2 0 &cppi41dma 3 0
+			&cppi41dma 0 1 &cppi41dma 1 1
+			&cppi41dma 2 1 &cppi41dma 3 1>;
+		dma-names =
+			"rx1", "rx2", "rx3", "rx4",
+			"tx1", "tx2", "tx3", "tx4";
+
 		status = "okay";
+
+		cppi41dma: dma-controller@201000 {
+			compatible = "ti,da830-cppi41";
+			reg =  <0x201000 0x1000
+				0x202000 0x1000
+				0x204000 0x4000>;
+			reg-names = "controller", "scheduler", "queuemgr";
+			interrupts = <58>;
+			#dma-cells = <2>;
+			#dma-channels = <4>;
+		};
+
 	};
-- 
cgit v1.2.3


From e3fa49acf00a6db3bfdee725ac5d688dd98dc2c9 Mon Sep 17 00:00:00 2001
From: Alexandre Bailon <abailon@baylibre.com>
Date: Mon, 30 Jan 2017 18:49:20 +0100
Subject: dmaengine: cppi41: Add support of DA8xx to CPPI 4.1

The DA8xx has a CPPI 4.1 DMA controller.
This is add the glue layer required to make it work on DA8xx.

Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/Kconfig  |  6 +++---
 drivers/dma/cppi41.c | 23 +++++++++++++++++++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d01d59812cf3..fc3435c5240b 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -514,12 +514,12 @@ config TIMB_DMA
 	  Enable support for the Timberdale FPGA DMA engine.
 
 config TI_CPPI41
-	tristate "AM33xx CPPI41 DMA support"
-	depends on ARCH_OMAP
+	tristate "CPPI 4.1 DMA support"
+	depends on (ARCH_OMAP || ARCH_DAVINCI_DA8XX)
 	select DMA_ENGINE
 	help
 	  The Communications Port Programming Interface (CPPI) 4.1 DMA engine
-	  is currently used by the USB driver on AM335x platforms.
+	  is currently used by the USB driver on AM335x and DA8xx platforms.
 
 config TI_DMA_CROSSBAR
 	bool
diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 4573080e74c7..135fb45a96f0 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -223,6 +223,20 @@ static const struct chan_queues am335x_usb_queues_rx[] = {
 	[29] = { .submit = 30, .complete = 155},
 };
 
+static const struct chan_queues da8xx_usb_queues_tx[] = {
+	[0] = { .submit =  16, .complete = 24},
+	[1] = { .submit =  18, .complete = 24},
+	[2] = { .submit =  20, .complete = 24},
+	[3] = { .submit =  22, .complete = 24},
+};
+
+static const struct chan_queues da8xx_usb_queues_rx[] = {
+	[0] = { .submit =  1, .complete = 26},
+	[1] = { .submit =  3, .complete = 26},
+	[2] = { .submit =  5, .complete = 26},
+	[3] = { .submit =  7, .complete = 26},
+};
+
 struct cppi_glue_infos {
 	const struct chan_queues *queues_rx;
 	const struct chan_queues *queues_tx;
@@ -971,8 +985,17 @@ static const struct cppi_glue_infos am335x_usb_infos = {
 	.qmgr_num_pend = 5,
 };
 
+static const struct cppi_glue_infos da8xx_usb_infos = {
+	.queues_rx = da8xx_usb_queues_rx,
+	.queues_tx = da8xx_usb_queues_tx,
+	.td_queue = { .submit = 31, .complete = 0 },
+	.first_completion_queue = 24,
+	.qmgr_num_pend = 2,
+};
+
 static const struct of_device_id cppi41_dma_ids[] = {
 	{ .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos},
+	{ .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos},
 	{},
 };
 MODULE_DEVICE_TABLE(of, cppi41_dma_ids);
-- 
cgit v1.2.3


From 4f68b09ea3ed5b9619568625d967569da055f487 Mon Sep 17 00:00:00 2001
From: Alexandre Bailon <abailon@baylibre.com>
Date: Mon, 30 Jan 2017 18:49:21 +0100
Subject: dmaengine: cppi41: Fix teardown warnings

During the teardown of a RX channel, because there is only one
completion queue available for RX channel, descriptor of another
channel may be popped which will cause 2 warnings:
- the first one because we popped a wrong descriptor
  (neither the channel's descriptor, nor the teardown descriptor).
- the second one happen during the teardown of another channel,
  because we can't find the channel descriptor
  (that is, the one that caused the first warning).
To avoid that, use one free queue instead of a transmit completion queue.

Note that fix doesn't fix all the teardown warnings:
I still get some when I run some corner case.

Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 135fb45a96f0..675de6a27e39 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -648,7 +648,7 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
 		if (!c->is_tx) {
 			reg |= GCR_STARV_RETRY;
 			reg |= GCR_DESC_TYPE_HOST;
-			reg |= c->q_comp_num;
+			reg |= cdd->td_queue.complete;
 		}
 		reg |= GCR_TEARDOWN;
 		cppi_writel(reg, c->gcr_reg);
@@ -659,7 +659,7 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
 	if (!c->td_seen || !c->td_desc_seen) {
 
 		desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete);
-		if (!desc_phys)
+		if (!desc_phys && c->is_tx)
 			desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
 
 		if (desc_phys == c->desc_phys) {
-- 
cgit v1.2.3


From 0a5165a83c624cddb3dfa227a9324acac9ec413c Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Sat, 18 Feb 2017 01:08:17 +0300
Subject: mtd: spi-nor: hisi: do not ignore clk_prepare_enable() failure

hisi_spi_nor_probe() ignores clk_prepare_enable() error code.
The patch fixes that.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/hisi-sfc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/spi-nor/hisi-sfc.c b/drivers/mtd/spi-nor/hisi-sfc.c
index 20378b0d55e9..a286350627a6 100644
--- a/drivers/mtd/spi-nor/hisi-sfc.c
+++ b/drivers/mtd/spi-nor/hisi-sfc.c
@@ -448,8 +448,11 @@ static int hisi_spi_nor_probe(struct platform_device *pdev)
 	if (!host->buffer)
 		return -ENOMEM;
 
+	ret = clk_prepare_enable(host->clk);
+	if (ret)
+		return ret;
+
 	mutex_init(&host->lock);
-	clk_prepare_enable(host->clk);
 	hisi_spi_nor_init(host);
 	ret = hisi_spi_nor_register_all(host);
 	if (ret)
-- 
cgit v1.2.3


From 22d61878565916a0f0ff4babe9207365d0dc8bd6 Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <der.herr@hofr.at>
Date: Mon, 13 Feb 2017 09:13:56 +0100
Subject: mtd: spi-nor: intel: use true/false for boolean

writeable in struct intel_spi is a boolean and assignment should be to
true/false not 1/0 as recommended by boolinit.cocci.

Signed-off-by: Nicholas Mc Guire <der.herr@hofr.at>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/intel-spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c
index a10f6027b386..e43ce63881a6 100644
--- a/drivers/mtd/spi-nor/intel-spi.c
+++ b/drivers/mtd/spi-nor/intel-spi.c
@@ -704,7 +704,7 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
 		 * whole partition read-only to be on the safe side.
 		 */
 		if (intel_spi_is_protected(ispi, base, limit))
-			ispi->writeable = 0;
+			ispi->writeable = false;
 
 		end = (limit << 12) + 4096;
 		if (end > part->size)
-- 
cgit v1.2.3


From 011de1b1db534b0ab4ad8099b99c12a17e7c68e0 Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <der.herr@hofr.at>
Date: Mon, 13 Feb 2017 09:14:21 +0100
Subject: mtd: spi-nor: intel: use ERR_CAST in return statement

This fixes a sparse warning about incorrect type in return expression.

Signed-off-by: Nicholas Mc Guire <der.herr@hofr.at>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/intel-spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c
index e43ce63881a6..986a3d020a3a 100644
--- a/drivers/mtd/spi-nor/intel-spi.c
+++ b/drivers/mtd/spi-nor/intel-spi.c
@@ -728,7 +728,7 @@ struct intel_spi *intel_spi_probe(struct device *dev,
 
 	ispi->base = devm_ioremap_resource(dev, mem);
 	if (IS_ERR(ispi->base))
-		return ispi->base;
+		return ERR_CAST(ispi->base);
 
 	ispi->dev = dev;
 	ispi->info = info;
-- 
cgit v1.2.3


From ca1fa1a8a6dae3e10c73bc5f1da728e662ee9d8b Mon Sep 17 00:00:00 2001
From: "L. D. Pinney" <ldpinney@gmail.com>
Date: Tue, 14 Feb 2017 16:27:13 -0600
Subject: mtd: spi-nor: Add support for ESMT F25L32QA and F25L64QA

Add support for the ESMT F25L32QA and F25L64QA.
These are 4MB and 8MB SPI-NOR Chips from Elite Semiconductor Memory
Technology.

Signed-off-by: L. D. Pinney <ldpinney@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 1ae872bfc3ba..561e46de8faa 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -960,6 +960,8 @@ static const struct flash_info spi_nor_ids[] = {
 
 	/* ESMT */
 	{ "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64, SECT_4K | SPI_NOR_HAS_LOCK) },
+	{ "f25l32qa", INFO(0x8c4116, 0, 64 * 1024, 64, SECT_4K | SPI_NOR_HAS_LOCK) },
+	{ "f25l64qa", INFO(0x8c4117, 0, 64 * 1024, 128, SECT_4K | SPI_NOR_HAS_LOCK) },
 
 	/* Everspin */
 	{ "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
-- 
cgit v1.2.3


From 923386f761f5ff35da2ac778839876fe4a2f165f Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 18 Dec 2015 15:36:57 +0200
Subject: clk: ti: remove un-used definitions from public clk_hw_omap struct

Clksel support has been deprecated a while back, so remove these from
the struct also.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/clk/ti.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 6110fe09ed18..07308db5a15d 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -129,8 +129,6 @@ struct clk_hw_omap_ops {
  * @enable_bit: bitshift to write to enable/disable the clock (see @enable_reg)
  * @flags: see "struct clk.flags possibilities" above
  * @clksel_reg: for clksel clks, register va containing src/divisor select
- * @clksel_mask: bitmask in @clksel_reg for the src/divisor selector
- * @clksel: for clksel clks, pointer to struct clksel for this clock
  * @dpll_data: for DPLLs, pointer to struct dpll_data for this clock
  * @clkdm_name: clockdomain name that this clock is contained in
  * @clkdm: pointer to struct clockdomain, resolved from @clkdm_name at runtime
@@ -145,8 +143,6 @@ struct clk_hw_omap {
 	u8			enable_bit;
 	u8			flags;
 	void __iomem		*clksel_reg;
-	u32			clksel_mask;
-	const struct clksel	*clksel;
 	struct dpll_data	*dpll_data;
 	const char		*clkdm_name;
 	struct clockdomain	*clkdm;
-- 
cgit v1.2.3


From 21f0bf2d3dcc7b18cebe2fa3da700e3263c3a3ce Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Thu, 29 Sep 2016 12:00:57 +0300
Subject: clk: ti: add support for automatic clock alias generation

Large portions of the OMAP framework still depend on the support of
having clock aliases in place, so add support functions for generating
these automatically.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clk.c   | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/clk/ti/clock.h |  3 +++
 2 files changed, 67 insertions(+)

diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 5fcf247759ac..91bad55cfd5a 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -24,6 +24,7 @@
 #include <linux/list.h>
 #include <linux/regmap.h>
 #include <linux/bootmem.h>
+#include <linux/device.h>
 
 #include "clock.h"
 
@@ -453,3 +454,66 @@ void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks)
 		clk_prepare_enable(init_clk);
 	}
 }
+
+/**
+ * ti_clk_add_alias - add a clock alias for a TI clock
+ * @dev: device alias for this clock
+ * @clk: clock handle to create alias for
+ * @con: connection ID for this clock
+ *
+ * Creates a clock alias for a TI clock. Allocates the clock lookup entry
+ * and assigns the data to it. Returns 0 if successful, negative error
+ * value otherwise.
+ */
+int ti_clk_add_alias(struct device *dev, struct clk *clk, const char *con)
+{
+	struct clk_lookup *cl;
+
+	if (!clk)
+		return 0;
+
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
+	if (!cl)
+		return -ENOMEM;
+
+	if (dev)
+		cl->dev_id = dev_name(dev);
+	cl->con_id = con;
+	cl->clk = clk;
+
+	clkdev_add(cl);
+
+	return 0;
+}
+
+/**
+ * ti_clk_register - register a TI clock to the common clock framework
+ * @dev: device for this clock
+ * @hw: hardware clock handle
+ * @con: connection ID for this clock
+ *
+ * Registers a TI clock to the common clock framework, and adds a clock
+ * alias for it. Returns a handle to the registered clock if successful,
+ * ERR_PTR value in failure.
+ */
+struct clk *ti_clk_register(struct device *dev, struct clk_hw *hw,
+			    const char *con)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = clk_register(dev, hw);
+	if (IS_ERR(clk))
+		return clk;
+
+	ret = ti_clk_add_alias(dev, clk, con);
+	if (ret) {
+		clk_unregister(clk);
+		return ERR_PTR(ret);
+	}
+
+	return clk;
+}
diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index 13c37f48d9d6..c38de6d77e29 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -189,6 +189,9 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup);
 struct clk *ti_clk_register_divider(struct ti_clk *setup);
 struct clk *ti_clk_register_composite(struct ti_clk *setup);
 struct clk *ti_clk_register_dpll(struct ti_clk *setup);
+struct clk *ti_clk_register(struct device *dev, struct clk_hw *hw,
+			    const char *con);
+int ti_clk_add_alias(struct device *dev, struct clk *clk, const char *con);
 
 struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup);
 struct clk_hw *ti_clk_build_component_gate(struct ti_clk_gate *setup);
-- 
cgit v1.2.3


From c17435c56bb1feb7365ebffcfbe98baae50a0fb4 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Thu, 29 Sep 2016 12:05:00 +0300
Subject: clk: ti: add API for creating aliases automatically for simple clock
 types

This API generates clock aliases automatically for simple clock types
(fixed-clock, fixed-factor-clock), so that we don't need to add the data
for these statically into tables. Shall be called from the SoC specific
clock init.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clk.c   | 26 ++++++++++++++++++++++++++
 drivers/clk/ti/clock.h |  1 +
 2 files changed, 27 insertions(+)

diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 91bad55cfd5a..193862ed27c7 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -355,6 +355,12 @@ struct clk __init *ti_clk_register_clk(struct ti_clk *setup)
 	return clk;
 }
 
+static const struct of_device_id simple_clk_match_table[] __initconst = {
+	{ .compatible = "fixed-clock" },
+	{ .compatible = "fixed-factor-clock" },
+	{ }
+};
+
 int __init ti_clk_register_legacy_clks(struct ti_clk_alias *clks)
 {
 	struct clk *clk;
@@ -408,6 +414,26 @@ int __init ti_clk_register_legacy_clks(struct ti_clk_alias *clks)
 }
 #endif
 
+/**
+ * ti_clk_add_aliases - setup clock aliases
+ *
+ * Sets up any missing clock aliases. No return value.
+ */
+void __init ti_clk_add_aliases(void)
+{
+	struct device_node *np;
+	struct clk *clk;
+
+	for_each_matching_node(np, simple_clk_match_table) {
+		struct of_phandle_args clkspec;
+
+		clkspec.np = np;
+		clk = of_clk_get_from_provider(&clkspec);
+
+		ti_clk_add_alias(NULL, clk, np->name);
+	}
+}
+
 /**
  * ti_clk_setup_features - setup clock features flags
  * @features: features definition to use
diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index c38de6d77e29..ee6d22507a3d 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -192,6 +192,7 @@ struct clk *ti_clk_register_dpll(struct ti_clk *setup);
 struct clk *ti_clk_register(struct device *dev, struct clk_hw *hw,
 			    const char *con);
 int ti_clk_add_alias(struct device *dev, struct clk *clk, const char *con);
+void ti_clk_add_aliases(void);
 
 struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup);
 struct clk_hw *ti_clk_build_component_gate(struct ti_clk_gate *setup);
-- 
cgit v1.2.3


From 1ae79c46cf195ea39407bd5a2b234304d537980f Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Thu, 29 Sep 2016 12:06:40 +0300
Subject: clk: ti: use automatic clock alias generation framework

Generate clock aliases automatically for all TI clock drivers.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/apll.c         |  2 +-
 drivers/clk/ti/clk-dra7-atl.c | 11 ++++++++++-
 drivers/clk/ti/clk.c          | 20 +++++++++++++++-----
 drivers/clk/ti/composite.c    | 16 +++++++++++++++-
 drivers/clk/ti/divider.c      |  2 +-
 drivers/clk/ti/dpll.c         |  6 +++---
 drivers/clk/ti/fixed-factor.c |  1 +
 drivers/clk/ti/gate.c         |  2 +-
 drivers/clk/ti/interface.c    |  2 +-
 drivers/clk/ti/mux.c          |  2 +-
 10 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c
index 6411e132faa2..62b5db7b8fe9 100644
--- a/drivers/clk/ti/apll.c
+++ b/drivers/clk/ti/apll.c
@@ -164,7 +164,7 @@ static void __init omap_clk_register_apll(struct clk_hw *hw,
 
 	ad->clk_bypass = __clk_get_hw(clk);
 
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, node->name);
 	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 		kfree(clk_hw->hw.init->parent_names);
diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c
index 45d05339d583..13eb04f72389 100644
--- a/drivers/clk/ti/clk-dra7-atl.c
+++ b/drivers/clk/ti/clk-dra7-atl.c
@@ -24,6 +24,9 @@
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/clk/ti.h>
+
+#include "clock.h"
 
 #define DRA7_ATL_INSTANCES	4
 
@@ -171,6 +174,7 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node)
 	struct clk_init_data init = { NULL };
 	const char **parent_names = NULL;
 	struct clk *clk;
+	int ret;
 
 	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
 	if (!clk_hw) {
@@ -200,9 +204,14 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node)
 
 	init.parent_names = parent_names;
 
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, node->name);
 
 	if (!IS_ERR(clk)) {
+		ret = ti_clk_add_alias(NULL, clk, node->name);
+		if (ret) {
+			clk_unregister(clk);
+			goto cleanup;
+		}
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 		kfree(parent_names);
 		return;
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 193862ed27c7..024123f421d6 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -298,6 +298,7 @@ struct clk __init *ti_clk_register_clk(struct ti_clk *setup)
 	struct ti_clk_fixed *fixed;
 	struct ti_clk_fixed_factor *fixed_factor;
 	struct clk_hw *clk_hw;
+	int ret;
 
 	if (setup->clk)
 		return setup->clk;
@@ -308,6 +309,13 @@ struct clk __init *ti_clk_register_clk(struct ti_clk *setup)
 
 		clk = clk_register_fixed_rate(NULL, setup->name, NULL, 0,
 					      fixed->frequency);
+		if (!IS_ERR(clk)) {
+			ret = ti_clk_add_alias(NULL, clk, setup->name);
+			if (ret) {
+				clk_unregister(clk);
+				clk = ERR_PTR(ret);
+			}
+		}
 		break;
 	case TI_CLK_MUX:
 		clk = ti_clk_register_mux(setup);
@@ -325,6 +333,13 @@ struct clk __init *ti_clk_register_clk(struct ti_clk *setup)
 						fixed_factor->parent,
 						0, fixed_factor->mult,
 						fixed_factor->div);
+		if (!IS_ERR(clk)) {
+			ret = ti_clk_add_alias(NULL, clk, setup->name);
+			if (ret) {
+				clk_unregister(clk);
+				clk = ERR_PTR(ret);
+			}
+		}
 		break;
 	case TI_CLK_GATE:
 		clk = ti_clk_register_gate(setup);
@@ -378,9 +393,6 @@ int __init ti_clk_register_legacy_clks(struct ti_clk_alias *clks)
 				       clks->clk->name, PTR_ERR(clk));
 				return PTR_ERR(clk);
 			}
-		} else {
-			clks->lk.clk = clk;
-			clkdev_add(&clks->lk);
 		}
 		clks++;
 	}
@@ -403,8 +415,6 @@ int __init ti_clk_register_legacy_clks(struct ti_clk_alias *clks)
 				}
 			} else {
 				retry = true;
-				retry_clk->lk.clk = clk;
-				clkdev_add(&retry_clk->lk);
 				list_del(&retry_clk->link);
 			}
 		}
diff --git a/drivers/clk/ti/composite.c b/drivers/clk/ti/composite.c
index 1cf70f452e1e..3f60f990cad8 100644
--- a/drivers/clk/ti/composite.c
+++ b/drivers/clk/ti/composite.c
@@ -126,6 +126,7 @@ struct clk *ti_clk_register_composite(struct ti_clk *setup)
 	int num_parents = 1;
 	const char **parent_names = NULL;
 	struct clk *clk;
+	int ret;
 
 	comp = setup->data;
 
@@ -150,6 +151,12 @@ struct clk *ti_clk_register_composite(struct ti_clk *setup)
 				     &ti_composite_divider_ops, gate,
 				     &ti_composite_gate_ops, 0);
 
+	ret = ti_clk_add_alias(NULL, clk, setup->name);
+	if (ret) {
+		clk_unregister(clk);
+		return ERR_PTR(ret);
+	}
+
 	return clk;
 }
 #endif
@@ -163,6 +170,7 @@ static void __init _register_composite(struct clk_hw *hw,
 	int num_parents = 0;
 	const char **parent_names = NULL;
 	int i;
+	int ret;
 
 	/* Check for presence of each component clock */
 	for (i = 0; i < CLK_COMPONENT_TYPE_MAX; i++) {
@@ -217,8 +225,14 @@ static void __init _register_composite(struct clk_hw *hw,
 				     _get_hw(cclk, CLK_COMPONENT_TYPE_GATE),
 				     &ti_composite_gate_ops, 0);
 
-	if (!IS_ERR(clk))
+	if (!IS_ERR(clk)) {
+		ret = ti_clk_add_alias(NULL, clk, node->name);
+		if (ret) {
+			clk_unregister(clk);
+			goto cleanup;
+		}
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	}
 
 cleanup:
 	/* Free component clock list entries */
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index 6bb87784a0d6..0e8c1363879e 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -311,7 +311,7 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 	div->table = table;
 
 	/* register the clock */
-	clk = clk_register(dev, &div->hw);
+	clk = ti_clk_register(dev, &div->hw, name);
 
 	if (IS_ERR(clk))
 		kfree(div);
diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index 4b9a419d8e14..37624e16cf04 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -185,7 +185,7 @@ static void __init _register_dpll(struct clk_hw *hw,
 	dd->clk_bypass = __clk_get_hw(clk);
 
 	/* register the clock */
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, node->name);
 
 	if (!IS_ERR(clk)) {
 		omap2_init_clk_hw_omap_clocks(&clk_hw->hw);
@@ -288,7 +288,7 @@ struct clk *ti_clk_register_dpll(struct ti_clk *setup)
 	if (dpll->flags & CLKF_J_TYPE)
 		dd->flags |= DPLL_J_TYPE;
 
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, setup->name);
 
 	if (!IS_ERR(clk))
 		return clk;
@@ -340,7 +340,7 @@ static void _register_dpll_x2(struct device_node *node,
 	init.num_parents = 1;
 
 	/* register the clock */
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, name);
 
 	if (IS_ERR(clk)) {
 		kfree(clk_hw);
diff --git a/drivers/clk/ti/fixed-factor.c b/drivers/clk/ti/fixed-factor.c
index 3cd406768909..0174a51a4ba6 100644
--- a/drivers/clk/ti/fixed-factor.c
+++ b/drivers/clk/ti/fixed-factor.c
@@ -62,6 +62,7 @@ static void __init of_ti_fixed_factor_clk_setup(struct device_node *node)
 	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 		of_ti_clk_autoidle_setup(node);
+		ti_clk_add_alias(NULL, clk, clk_name);
 	}
 }
 CLK_OF_DECLARE(ti_fixed_factor_clk, "ti,fixed-factor-clock",
diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index bc05f276f32b..b3291dbca5ed 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c
@@ -120,7 +120,7 @@ static struct clk *_register_gate(struct device *dev, const char *name,
 
 	init.flags = flags;
 
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, name);
 
 	if (IS_ERR(clk))
 		kfree(clk_hw);
diff --git a/drivers/clk/ti/interface.c b/drivers/clk/ti/interface.c
index e505e6f8228d..7927e1a2ba02 100644
--- a/drivers/clk/ti/interface.c
+++ b/drivers/clk/ti/interface.c
@@ -58,7 +58,7 @@ static struct clk *_register_interface(struct device *dev, const char *name,
 	init.num_parents = 1;
 	init.parent_names = &parent_name;
 
-	clk = clk_register(NULL, &clk_hw->hw);
+	clk = ti_clk_register(NULL, &clk_hw->hw, name);
 
 	if (IS_ERR(clk))
 		kfree(clk_hw);
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index 44777ab6fdeb..0da149ed1549 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -127,7 +127,7 @@ static struct clk *_register_mux(struct device *dev, const char *name,
 	mux->table = table;
 	mux->hw.init = &init;
 
-	clk = clk_register(dev, &mux->hw);
+	clk = ti_clk_register(dev, &mux->hw, name);
 
 	if (IS_ERR(clk))
 		kfree(mux);
-- 
cgit v1.2.3


From b6f27b2db2df395d65b02a758861c7fc54edbec1 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 30 Sep 2016 14:10:11 +0300
Subject: clk: ti: add clkdm_lookup to the exported functions

This will be needed to move some additional clockdomain functionality
under clock driver.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/clock.c | 1 +
 include/linux/clk/ti.h      | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index 1270afdcacdf..6fac82609c96 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -57,6 +57,7 @@ u16 cpu_mask;
 static struct ti_clk_ll_ops omap_clk_ll_ops = {
 	.clkdm_clk_enable = clkdm_clk_enable,
 	.clkdm_clk_disable = clkdm_clk_disable,
+	.clkdm_lookup = clkdm_lookup,
 	.cm_wait_module_ready = omap_cm_wait_module_ready,
 	.cm_split_idlest_reg = cm_split_idlest_reg,
 };
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 07308db5a15d..bc7fd8f0fb5d 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -213,6 +213,7 @@ struct clk_omap_reg {
  * @clk_writel: pointer to register write function
  * @clkdm_clk_enable: pointer to clockdomain enable function
  * @clkdm_clk_disable: pointer to clockdomain disable function
+ * @clkdm_lookup: pointer to clockdomain lookup function
  * @cm_wait_module_ready: pointer to CM module wait ready function
  * @cm_split_idlest_reg: pointer to CM module function to split idlest reg
  *
@@ -228,6 +229,7 @@ struct ti_clk_ll_ops {
 	int	(*clkdm_clk_enable)(struct clockdomain *clkdm, struct clk *clk);
 	int	(*clkdm_clk_disable)(struct clockdomain *clkdm,
 				     struct clk *clk);
+	struct clockdomain * (*clkdm_lookup)(const char *name);
 	int	(*cm_wait_module_ready)(u8 part, s16 prcm_mod, u16 idlest_reg,
 					u8 idlest_shift);
 	int	(*cm_split_idlest_reg)(void __iomem *idlest_reg, s16 *prcm_inst,
-- 
cgit v1.2.3


From 2e1a294c0f2273a6d3537c91965ca46a6483bd8c Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 30 Sep 2016 14:13:38 +0300
Subject: clk: ti: move omap2_init_clk_clkdm under TI clock driver

This is not needed outside the driver, so move it inside it and remove
the prototype from the public header also.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/clock.c  | 32 --------------------------------
 drivers/clk/ti/clock.h       |  1 +
 drivers/clk/ti/clockdomain.c | 30 ++++++++++++++++++++++++++++++
 include/linux/clk/ti.h       |  1 -
 4 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index 6fac82609c96..ae5b23c19b83 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -79,38 +79,6 @@ int __init omap2_clk_setup_ll_ops(void)
  * OMAP2+ specific clock functions
  */
 
-/* Public functions */
-
-/**
- * omap2_init_clk_clkdm - look up a clockdomain name, store pointer in clk
- * @clk: OMAP clock struct ptr to use
- *
- * Convert a clockdomain name stored in a struct clk 'clk' into a
- * clockdomain pointer, and save it into the struct clk.  Intended to be
- * called during clk_register().  No return value.
- */
-void omap2_init_clk_clkdm(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	struct clockdomain *clkdm;
-	const char *clk_name;
-
-	if (!clk->clkdm_name)
-		return;
-
-	clk_name = __clk_get_name(hw->clk);
-
-	clkdm = clkdm_lookup(clk->clkdm_name);
-	if (clkdm) {
-		pr_debug("clock: associated clk %s to clkdm %s\n",
-			 clk_name, clk->clkdm_name);
-		clk->clkdm = clkdm;
-	} else {
-		pr_debug("clock: could not associate clk %s to clkdm %s\n",
-			 clk_name, clk->clkdm_name);
-	}
-}
-
 /**
  * ti_clk_init_features - init clock features struct for the SoC
  *
diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index ee6d22507a3d..ecf82d86118c 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -228,6 +228,7 @@ extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait;
 extern const struct clk_ops ti_clk_divider_ops;
 extern const struct clk_ops ti_clk_mux_ops;
 
+void omap2_init_clk_clkdm(struct clk_hw *hw);
 int omap2_clkops_enable_clkdm(struct clk_hw *hw);
 void omap2_clkops_disable_clkdm(struct clk_hw *hw);
 
diff --git a/drivers/clk/ti/clockdomain.c b/drivers/clk/ti/clockdomain.c
index 6cf9dd189a92..704157d8c0b7 100644
--- a/drivers/clk/ti/clockdomain.c
+++ b/drivers/clk/ti/clockdomain.c
@@ -103,6 +103,36 @@ void omap2_clkops_disable_clkdm(struct clk_hw *hw)
 	ti_clk_ll_ops->clkdm_clk_disable(clk->clkdm, hw->clk);
 }
 
+/**
+ * omap2_init_clk_clkdm - look up a clockdomain name, store pointer in clk
+ * @clk: OMAP clock struct ptr to use
+ *
+ * Convert a clockdomain name stored in a struct clk 'clk' into a
+ * clockdomain pointer, and save it into the struct clk.  Intended to be
+ * called during clk_register().  No return value.
+ */
+void omap2_init_clk_clkdm(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct clockdomain *clkdm;
+	const char *clk_name;
+
+	if (!clk->clkdm_name)
+		return;
+
+	clk_name = __clk_get_name(hw->clk);
+
+	clkdm = ti_clk_ll_ops->clkdm_lookup(clk->clkdm_name);
+	if (clkdm) {
+		pr_debug("clock: associated clk %s to clkdm %s\n",
+			 clk_name, clk->clkdm_name);
+		clk->clkdm = clkdm;
+	} else {
+		pr_debug("clock: could not associate clk %s to clkdm %s\n",
+			 clk_name, clk->clkdm_name);
+	}
+}
+
 static void __init of_ti_clockdomain_setup(struct device_node *node)
 {
 	struct clk *clk;
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index bc7fd8f0fb5d..626ae94b7444 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -238,7 +238,6 @@ struct ti_clk_ll_ops {
 
 #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw)
 
-void omap2_init_clk_clkdm(struct clk_hw *clk);
 int omap2_clk_disable_autoidle_all(void);
 int omap2_clk_enable_autoidle_all(void);
 int omap2_clk_allow_idle(struct clk *clk);
-- 
cgit v1.2.3


From ce382d4766bd82b8fdd487bb57afb1c4f9546de2 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Wed, 5 Oct 2016 15:37:02 +0300
Subject: clk: ti: enforce const types on string arrays

Constant string arrays should use const char * const instead of just
const char *. Change the implementations using these to proper type.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clock.h     | 2 +-
 drivers/clk/ti/composite.c | 2 +-
 drivers/clk/ti/mux.c       | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index ecf82d86118c..cb906a199e66 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -86,7 +86,7 @@ struct ti_clk_mux {
 	int num_parents;
 	u16 reg;
 	u8 module;
-	const char **parents;
+	const char * const *parents;
 	u16 flags;
 };
 
diff --git a/drivers/clk/ti/composite.c b/drivers/clk/ti/composite.c
index 3f60f990cad8..beea89463ca2 100644
--- a/drivers/clk/ti/composite.c
+++ b/drivers/clk/ti/composite.c
@@ -124,7 +124,7 @@ struct clk *ti_clk_register_composite(struct ti_clk *setup)
 	struct clk_hw *mux;
 	struct clk_hw *div;
 	int num_parents = 1;
-	const char **parent_names = NULL;
+	const char * const *parent_names = NULL;
 	struct clk *clk;
 	int ret;
 
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index 0da149ed1549..3cc6db4b31fb 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -97,10 +97,10 @@ const struct clk_ops ti_clk_mux_ops = {
 };
 
 static struct clk *_register_mux(struct device *dev, const char *name,
-				 const char **parent_names, u8 num_parents,
-				 unsigned long flags, void __iomem *reg,
-				 u8 shift, u32 mask, u8 clk_mux_flags,
-				 u32 *table)
+				 const char * const *parent_names,
+				 u8 num_parents, unsigned long flags,
+				 void __iomem *reg, u8 shift, u32 mask,
+				 u8 clk_mux_flags, u32 *table)
 {
 	struct clk_mux *mux;
 	struct clk *clk;
-- 
cgit v1.2.3


From b6312da57f4dfaff083e31e3aa61faa9e20d9ad5 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Tue, 22 Mar 2016 16:10:55 +0200
Subject: clk: ti: omap4: cleanup unnecessary clock aliases

Cleanup any unnecessary DT_CLK() alias entries from the OMAP4 clock file.
Most of these are now handled dynamically by the driver code.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clk-44xx.c | 188 +---------------------------------------------
 1 file changed, 2 insertions(+), 186 deletions(-)

diff --git a/drivers/clk/ti/clk-44xx.c b/drivers/clk/ti/clk-44xx.c
index 7a8b51b35f9f..1c8bb83003bf 100644
--- a/drivers/clk/ti/clk-44xx.c
+++ b/drivers/clk/ti/clk-44xx.c
@@ -34,196 +34,13 @@
 #define OMAP4_DPLL_USB_DEFFREQ				960000000
 
 static struct ti_dt_clk omap44xx_clks[] = {
-	DT_CLK(NULL, "extalt_clkin_ck", "extalt_clkin_ck"),
-	DT_CLK(NULL, "pad_clks_src_ck", "pad_clks_src_ck"),
-	DT_CLK(NULL, "pad_clks_ck", "pad_clks_ck"),
-	DT_CLK(NULL, "pad_slimbus_core_clks_ck", "pad_slimbus_core_clks_ck"),
-	DT_CLK(NULL, "secure_32k_clk_src_ck", "secure_32k_clk_src_ck"),
-	DT_CLK(NULL, "slimbus_src_clk", "slimbus_src_clk"),
-	DT_CLK(NULL, "slimbus_clk", "slimbus_clk"),
-	DT_CLK(NULL, "sys_32k_ck", "sys_32k_ck"),
-	DT_CLK(NULL, "virt_12000000_ck", "virt_12000000_ck"),
-	DT_CLK(NULL, "virt_13000000_ck", "virt_13000000_ck"),
-	DT_CLK(NULL, "virt_16800000_ck", "virt_16800000_ck"),
-	DT_CLK(NULL, "virt_19200000_ck", "virt_19200000_ck"),
-	DT_CLK(NULL, "virt_26000000_ck", "virt_26000000_ck"),
-	DT_CLK(NULL, "virt_27000000_ck", "virt_27000000_ck"),
-	DT_CLK(NULL, "virt_38400000_ck", "virt_38400000_ck"),
-	DT_CLK(NULL, "sys_clkin_ck", "sys_clkin_ck"),
-	DT_CLK(NULL, "tie_low_clock_ck", "tie_low_clock_ck"),
-	DT_CLK(NULL, "utmi_phy_clkout_ck", "utmi_phy_clkout_ck"),
-	DT_CLK(NULL, "xclk60mhsp1_ck", "xclk60mhsp1_ck"),
-	DT_CLK(NULL, "xclk60mhsp2_ck", "xclk60mhsp2_ck"),
-	DT_CLK(NULL, "xclk60motg_ck", "xclk60motg_ck"),
-	DT_CLK(NULL, "abe_dpll_bypass_clk_mux_ck", "abe_dpll_bypass_clk_mux_ck"),
-	DT_CLK(NULL, "abe_dpll_refclk_mux_ck", "abe_dpll_refclk_mux_ck"),
-	DT_CLK(NULL, "dpll_abe_ck", "dpll_abe_ck"),
-	DT_CLK(NULL, "dpll_abe_x2_ck", "dpll_abe_x2_ck"),
-	DT_CLK(NULL, "dpll_abe_m2x2_ck", "dpll_abe_m2x2_ck"),
-	DT_CLK(NULL, "abe_24m_fclk", "abe_24m_fclk"),
-	DT_CLK(NULL, "abe_clk", "abe_clk"),
-	DT_CLK(NULL, "aess_fclk", "aess_fclk"),
-	DT_CLK(NULL, "dpll_abe_m3x2_ck", "dpll_abe_m3x2_ck"),
-	DT_CLK(NULL, "core_hsd_byp_clk_mux_ck", "core_hsd_byp_clk_mux_ck"),
-	DT_CLK(NULL, "dpll_core_ck", "dpll_core_ck"),
-	DT_CLK(NULL, "dpll_core_x2_ck", "dpll_core_x2_ck"),
-	DT_CLK(NULL, "dpll_core_m6x2_ck", "dpll_core_m6x2_ck"),
-	DT_CLK(NULL, "dbgclk_mux_ck", "dbgclk_mux_ck"),
-	DT_CLK(NULL, "dpll_core_m2_ck", "dpll_core_m2_ck"),
-	DT_CLK(NULL, "ddrphy_ck", "ddrphy_ck"),
-	DT_CLK(NULL, "dpll_core_m5x2_ck", "dpll_core_m5x2_ck"),
-	DT_CLK(NULL, "div_core_ck", "div_core_ck"),
-	DT_CLK(NULL, "div_iva_hs_clk", "div_iva_hs_clk"),
-	DT_CLK(NULL, "div_mpu_hs_clk", "div_mpu_hs_clk"),
-	DT_CLK(NULL, "dpll_core_m4x2_ck", "dpll_core_m4x2_ck"),
-	DT_CLK(NULL, "dll_clk_div_ck", "dll_clk_div_ck"),
-	DT_CLK(NULL, "dpll_abe_m2_ck", "dpll_abe_m2_ck"),
-	DT_CLK(NULL, "dpll_core_m3x2_ck", "dpll_core_m3x2_ck"),
-	DT_CLK(NULL, "dpll_core_m7x2_ck", "dpll_core_m7x2_ck"),
-	DT_CLK(NULL, "iva_hsd_byp_clk_mux_ck", "iva_hsd_byp_clk_mux_ck"),
-	DT_CLK(NULL, "dpll_iva_ck", "dpll_iva_ck"),
-	DT_CLK(NULL, "dpll_iva_x2_ck", "dpll_iva_x2_ck"),
-	DT_CLK(NULL, "dpll_iva_m4x2_ck", "dpll_iva_m4x2_ck"),
-	DT_CLK(NULL, "dpll_iva_m5x2_ck", "dpll_iva_m5x2_ck"),
-	DT_CLK(NULL, "dpll_mpu_ck", "dpll_mpu_ck"),
-	DT_CLK(NULL, "dpll_mpu_m2_ck", "dpll_mpu_m2_ck"),
-	DT_CLK(NULL, "per_hs_clk_div_ck", "per_hs_clk_div_ck"),
-	DT_CLK(NULL, "per_hsd_byp_clk_mux_ck", "per_hsd_byp_clk_mux_ck"),
-	DT_CLK(NULL, "dpll_per_ck", "dpll_per_ck"),
-	DT_CLK(NULL, "dpll_per_m2_ck", "dpll_per_m2_ck"),
-	DT_CLK(NULL, "dpll_per_x2_ck", "dpll_per_x2_ck"),
-	DT_CLK(NULL, "dpll_per_m2x2_ck", "dpll_per_m2x2_ck"),
-	DT_CLK(NULL, "dpll_per_m3x2_ck", "dpll_per_m3x2_ck"),
-	DT_CLK(NULL, "dpll_per_m4x2_ck", "dpll_per_m4x2_ck"),
-	DT_CLK(NULL, "dpll_per_m5x2_ck", "dpll_per_m5x2_ck"),
-	DT_CLK(NULL, "dpll_per_m6x2_ck", "dpll_per_m6x2_ck"),
-	DT_CLK(NULL, "dpll_per_m7x2_ck", "dpll_per_m7x2_ck"),
-	DT_CLK(NULL, "usb_hs_clk_div_ck", "usb_hs_clk_div_ck"),
-	DT_CLK(NULL, "dpll_usb_ck", "dpll_usb_ck"),
-	DT_CLK(NULL, "dpll_usb_clkdcoldo_ck", "dpll_usb_clkdcoldo_ck"),
-	DT_CLK(NULL, "dpll_usb_m2_ck", "dpll_usb_m2_ck"),
-	DT_CLK(NULL, "ducati_clk_mux_ck", "ducati_clk_mux_ck"),
-	DT_CLK(NULL, "func_12m_fclk", "func_12m_fclk"),
-	DT_CLK(NULL, "func_24m_clk", "func_24m_clk"),
-	DT_CLK(NULL, "func_24mc_fclk", "func_24mc_fclk"),
-	DT_CLK(NULL, "func_48m_fclk", "func_48m_fclk"),
-	DT_CLK(NULL, "func_48mc_fclk", "func_48mc_fclk"),
-	DT_CLK(NULL, "func_64m_fclk", "func_64m_fclk"),
-	DT_CLK(NULL, "func_96m_fclk", "func_96m_fclk"),
-	DT_CLK(NULL, "init_60m_fclk", "init_60m_fclk"),
-	DT_CLK(NULL, "l3_div_ck", "l3_div_ck"),
-	DT_CLK(NULL, "l4_div_ck", "l4_div_ck"),
-	DT_CLK(NULL, "lp_clk_div_ck", "lp_clk_div_ck"),
-	DT_CLK(NULL, "l4_wkup_clk_mux_ck", "l4_wkup_clk_mux_ck"),
 	DT_CLK("smp_twd", NULL, "mpu_periphclk"),
-	DT_CLK(NULL, "ocp_abe_iclk", "ocp_abe_iclk"),
-	DT_CLK(NULL, "per_abe_24m_fclk", "per_abe_24m_fclk"),
-	DT_CLK(NULL, "per_abe_nc_fclk", "per_abe_nc_fclk"),
-	DT_CLK(NULL, "syc_clk_div_ck", "syc_clk_div_ck"),
-	DT_CLK(NULL, "aes1_fck", "aes1_fck"),
-	DT_CLK(NULL, "aes2_fck", "aes2_fck"),
-	DT_CLK(NULL, "dmic_sync_mux_ck", "dmic_sync_mux_ck"),
-	DT_CLK(NULL, "func_dmic_abe_gfclk", "func_dmic_abe_gfclk"),
-	DT_CLK(NULL, "dss_sys_clk", "dss_sys_clk"),
-	DT_CLK(NULL, "dss_tv_clk", "dss_tv_clk"),
-	DT_CLK(NULL, "dss_dss_clk", "dss_dss_clk"),
-	DT_CLK(NULL, "dss_48mhz_clk", "dss_48mhz_clk"),
-	DT_CLK(NULL, "dss_fck", "dss_fck"),
 	DT_CLK("omapdss_dss", "ick", "dss_fck"),
-	DT_CLK(NULL, "fdif_fck", "fdif_fck"),
-	DT_CLK(NULL, "gpio1_dbclk", "gpio1_dbclk"),
-	DT_CLK(NULL, "gpio2_dbclk", "gpio2_dbclk"),
-	DT_CLK(NULL, "gpio3_dbclk", "gpio3_dbclk"),
-	DT_CLK(NULL, "gpio4_dbclk", "gpio4_dbclk"),
-	DT_CLK(NULL, "gpio5_dbclk", "gpio5_dbclk"),
-	DT_CLK(NULL, "gpio6_dbclk", "gpio6_dbclk"),
-	DT_CLK(NULL, "sgx_clk_mux", "sgx_clk_mux"),
-	DT_CLK(NULL, "hsi_fck", "hsi_fck"),
-	DT_CLK(NULL, "iss_ctrlclk", "iss_ctrlclk"),
-	DT_CLK(NULL, "mcasp_sync_mux_ck", "mcasp_sync_mux_ck"),
-	DT_CLK(NULL, "func_mcasp_abe_gfclk", "func_mcasp_abe_gfclk"),
-	DT_CLK(NULL, "mcbsp1_sync_mux_ck", "mcbsp1_sync_mux_ck"),
-	DT_CLK(NULL, "func_mcbsp1_gfclk", "func_mcbsp1_gfclk"),
-	DT_CLK(NULL, "mcbsp2_sync_mux_ck", "mcbsp2_sync_mux_ck"),
-	DT_CLK(NULL, "func_mcbsp2_gfclk", "func_mcbsp2_gfclk"),
-	DT_CLK(NULL, "mcbsp3_sync_mux_ck", "mcbsp3_sync_mux_ck"),
-	DT_CLK(NULL, "func_mcbsp3_gfclk", "func_mcbsp3_gfclk"),
-	DT_CLK(NULL, "mcbsp4_sync_mux_ck", "mcbsp4_sync_mux_ck"),
-	DT_CLK(NULL, "per_mcbsp4_gfclk", "per_mcbsp4_gfclk"),
-	DT_CLK(NULL, "hsmmc1_fclk", "hsmmc1_fclk"),
-	DT_CLK(NULL, "hsmmc2_fclk", "hsmmc2_fclk"),
-	DT_CLK(NULL, "ocp2scp_usb_phy_phy_48m", "ocp2scp_usb_phy_phy_48m"),
-	DT_CLK(NULL, "sha2md5_fck", "sha2md5_fck"),
-	DT_CLK(NULL, "slimbus1_fclk_1", "slimbus1_fclk_1"),
-	DT_CLK(NULL, "slimbus1_fclk_0", "slimbus1_fclk_0"),
-	DT_CLK(NULL, "slimbus1_fclk_2", "slimbus1_fclk_2"),
-	DT_CLK(NULL, "slimbus1_slimbus_clk", "slimbus1_slimbus_clk"),
-	DT_CLK(NULL, "slimbus2_fclk_1", "slimbus2_fclk_1"),
-	DT_CLK(NULL, "slimbus2_fclk_0", "slimbus2_fclk_0"),
-	DT_CLK(NULL, "slimbus2_slimbus_clk", "slimbus2_slimbus_clk"),
-	DT_CLK(NULL, "smartreflex_core_fck", "smartreflex_core_fck"),
-	DT_CLK(NULL, "smartreflex_iva_fck", "smartreflex_iva_fck"),
-	DT_CLK(NULL, "smartreflex_mpu_fck", "smartreflex_mpu_fck"),
-	DT_CLK(NULL, "dmt1_clk_mux", "dmt1_clk_mux"),
-	DT_CLK(NULL, "cm2_dm10_mux", "cm2_dm10_mux"),
-	DT_CLK(NULL, "cm2_dm11_mux", "cm2_dm11_mux"),
-	DT_CLK(NULL, "cm2_dm2_mux", "cm2_dm2_mux"),
-	DT_CLK(NULL, "cm2_dm3_mux", "cm2_dm3_mux"),
-	DT_CLK(NULL, "cm2_dm4_mux", "cm2_dm4_mux"),
-	DT_CLK(NULL, "timer5_sync_mux", "timer5_sync_mux"),
-	DT_CLK(NULL, "timer6_sync_mux", "timer6_sync_mux"),
-	DT_CLK(NULL, "timer7_sync_mux", "timer7_sync_mux"),
-	DT_CLK(NULL, "timer8_sync_mux", "timer8_sync_mux"),
-	DT_CLK(NULL, "cm2_dm9_mux", "cm2_dm9_mux"),
-	DT_CLK(NULL, "usb_host_fs_fck", "usb_host_fs_fck"),
 	DT_CLK("usbhs_omap", "fs_fck", "usb_host_fs_fck"),
-	DT_CLK(NULL, "utmi_p1_gfclk", "utmi_p1_gfclk"),
-	DT_CLK(NULL, "usb_host_hs_utmi_p1_clk", "usb_host_hs_utmi_p1_clk"),
-	DT_CLK(NULL, "utmi_p2_gfclk", "utmi_p2_gfclk"),
-	DT_CLK(NULL, "usb_host_hs_utmi_p2_clk", "usb_host_hs_utmi_p2_clk"),
-	DT_CLK(NULL, "usb_host_hs_utmi_p3_clk", "usb_host_hs_utmi_p3_clk"),
-	DT_CLK(NULL, "usb_host_hs_hsic480m_p1_clk", "usb_host_hs_hsic480m_p1_clk"),
-	DT_CLK(NULL, "usb_host_hs_hsic60m_p1_clk", "usb_host_hs_hsic60m_p1_clk"),
-	DT_CLK(NULL, "usb_host_hs_hsic60m_p2_clk", "usb_host_hs_hsic60m_p2_clk"),
-	DT_CLK(NULL, "usb_host_hs_hsic480m_p2_clk", "usb_host_hs_hsic480m_p2_clk"),
-	DT_CLK(NULL, "usb_host_hs_func48mclk", "usb_host_hs_func48mclk"),
-	DT_CLK(NULL, "usb_host_hs_fck", "usb_host_hs_fck"),
 	DT_CLK("usbhs_omap", "hs_fck", "usb_host_hs_fck"),
-	DT_CLK(NULL, "otg_60m_gfclk", "otg_60m_gfclk"),
-	DT_CLK(NULL, "usb_otg_hs_xclk", "usb_otg_hs_xclk"),
-	DT_CLK(NULL, "usb_otg_hs_ick", "usb_otg_hs_ick"),
 	DT_CLK("musb-omap2430", "ick", "usb_otg_hs_ick"),
-	DT_CLK(NULL, "usb_phy_cm_clk32k", "usb_phy_cm_clk32k"),
-	DT_CLK(NULL, "usb_tll_hs_usb_ch2_clk", "usb_tll_hs_usb_ch2_clk"),
-	DT_CLK(NULL, "usb_tll_hs_usb_ch0_clk", "usb_tll_hs_usb_ch0_clk"),
-	DT_CLK(NULL, "usb_tll_hs_usb_ch1_clk", "usb_tll_hs_usb_ch1_clk"),
-	DT_CLK(NULL, "usb_tll_hs_ick", "usb_tll_hs_ick"),
 	DT_CLK("usbhs_omap", "usbtll_ick", "usb_tll_hs_ick"),
 	DT_CLK("usbhs_tll", "usbtll_ick", "usb_tll_hs_ick"),
-	DT_CLK(NULL, "usim_ck", "usim_ck"),
-	DT_CLK(NULL, "usim_fclk", "usim_fclk"),
-	DT_CLK(NULL, "pmd_stm_clock_mux_ck", "pmd_stm_clock_mux_ck"),
-	DT_CLK(NULL, "pmd_trace_clk_mux_ck", "pmd_trace_clk_mux_ck"),
-	DT_CLK(NULL, "stm_clk_div_ck", "stm_clk_div_ck"),
-	DT_CLK(NULL, "trace_clk_div_ck", "trace_clk_div_ck"),
-	DT_CLK(NULL, "auxclk0_src_ck", "auxclk0_src_ck"),
-	DT_CLK(NULL, "auxclk0_ck", "auxclk0_ck"),
-	DT_CLK(NULL, "auxclkreq0_ck", "auxclkreq0_ck"),
-	DT_CLK(NULL, "auxclk1_src_ck", "auxclk1_src_ck"),
-	DT_CLK(NULL, "auxclk1_ck", "auxclk1_ck"),
-	DT_CLK(NULL, "auxclkreq1_ck", "auxclkreq1_ck"),
-	DT_CLK(NULL, "auxclk2_src_ck", "auxclk2_src_ck"),
-	DT_CLK(NULL, "auxclk2_ck", "auxclk2_ck"),
-	DT_CLK(NULL, "auxclkreq2_ck", "auxclkreq2_ck"),
-	DT_CLK(NULL, "auxclk3_src_ck", "auxclk3_src_ck"),
-	DT_CLK(NULL, "auxclk3_ck", "auxclk3_ck"),
-	DT_CLK(NULL, "auxclkreq3_ck", "auxclkreq3_ck"),
-	DT_CLK(NULL, "auxclk4_src_ck", "auxclk4_src_ck"),
-	DT_CLK(NULL, "auxclk4_ck", "auxclk4_ck"),
-	DT_CLK(NULL, "auxclkreq4_ck", "auxclkreq4_ck"),
-	DT_CLK(NULL, "auxclk5_src_ck", "auxclk5_src_ck"),
-	DT_CLK(NULL, "auxclk5_ck", "auxclk5_ck"),
-	DT_CLK(NULL, "auxclkreq5_ck", "auxclkreq5_ck"),
 	DT_CLK("omap_i2c.1", "ick", "dummy_ck"),
 	DT_CLK("omap_i2c.2", "ick", "dummy_ck"),
 	DT_CLK("omap_i2c.3", "ick", "dummy_ck"),
@@ -263,9 +80,6 @@ static struct ti_dt_clk omap44xx_clks[] = {
 	DT_CLK("4013c000.timer", "timer_sys_ck", "syc_clk_div_ck"),
 	DT_CLK("4013e000.timer", "timer_sys_ck", "syc_clk_div_ck"),
 	DT_CLK(NULL, "cpufreq_ck", "dpll_mpu_ck"),
-	DT_CLK(NULL, "bandgap_fclk", "bandgap_fclk"),
-	DT_CLK(NULL, "div_ts_ck", "div_ts_ck"),
-	DT_CLK(NULL, "bandgap_ts_fclk", "bandgap_ts_fclk"),
 	{ .node_name = NULL },
 };
 
@@ -278,6 +92,8 @@ int __init omap4xxx_dt_clk_init(void)
 
 	omap2_clk_disable_autoidle_all();
 
+	ti_clk_add_aliases();
+
 	/*
 	 * Lock USB DPLL on OMAP4 devices so that the L3INIT power
 	 * domain can transition to retention state when not in use.
-- 
cgit v1.2.3


From c91f07801f144920f8467486a1e36e42ed9d9ff2 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Mon, 30 Jan 2017 16:01:36 +0200
Subject: clk: ti: drop unnecessary MEMMAP_ADDRESSING flag

This has been superceded by the usage of ti_clk_ll_ops for now.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/apll.c      | 1 -
 drivers/clk/ti/dpll.c      | 2 --
 drivers/clk/ti/gate.c      | 4 +---
 drivers/clk/ti/interface.c | 1 -
 include/linux/clk/ti.h     | 2 --
 5 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c
index 62b5db7b8fe9..5cba28c6ab35 100644
--- a/drivers/clk/ti/apll.c
+++ b/drivers/clk/ti/apll.c
@@ -194,7 +194,6 @@ static void __init of_dra7_apll_setup(struct device_node *node)
 
 	clk_hw->dpll_data = ad;
 	clk_hw->hw.init = init;
-	clk_hw->flags = MEMMAP_ADDRESSING;
 
 	init->name = node->name;
 	init->ops = &apll_ck_ops;
diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index 37624e16cf04..c149bd169f43 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -248,7 +248,6 @@ struct clk *ti_clk_register_dpll(struct ti_clk *setup)
 	clk_hw->dpll_data = dd;
 	clk_hw->ops = &clkhwops_omap3_dpll;
 	clk_hw->hw.init = &init;
-	clk_hw->flags = MEMMAP_ADDRESSING;
 
 	init.name = setup->name;
 	init.ops = ops;
@@ -380,7 +379,6 @@ static void __init of_ti_dpll_setup(struct device_node *node,
 	clk_hw->dpll_data = dd;
 	clk_hw->ops = &clkhwops_omap3_dpll;
 	clk_hw->hw.init = init;
-	clk_hw->flags = MEMMAP_ADDRESSING;
 
 	init->name = node->name;
 	init->ops = ops;
diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index b3291dbca5ed..5ff62e2c63d0 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c
@@ -113,7 +113,7 @@ static struct clk *_register_gate(struct device *dev, const char *name,
 	clk_hw->enable_bit = bit_idx;
 	clk_hw->ops = hw_ops;
 
-	clk_hw->flags = MEMMAP_ADDRESSING | clk_gate_flags;
+	clk_hw->flags = clk_gate_flags;
 
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
@@ -203,7 +203,6 @@ struct clk_hw *ti_clk_build_component_gate(struct ti_clk_gate *setup)
 		ops = &clkhwops_iclk_wait;
 
 	gate->ops = ops;
-	gate->flags = MEMMAP_ADDRESSING;
 
 	return &gate->hw;
 }
@@ -269,7 +268,6 @@ _of_ti_composite_gate_clk_setup(struct device_node *node,
 
 	gate->enable_bit = val;
 	gate->ops = hw_ops;
-	gate->flags = MEMMAP_ADDRESSING;
 
 	if (!ti_clk_add_component(node, &gate->hw, CLK_COMPONENT_TYPE_GATE))
 		return;
diff --git a/drivers/clk/ti/interface.c b/drivers/clk/ti/interface.c
index 7927e1a2ba02..42d9fd4f5f6a 100644
--- a/drivers/clk/ti/interface.c
+++ b/drivers/clk/ti/interface.c
@@ -47,7 +47,6 @@ static struct clk *_register_interface(struct device *dev, const char *name,
 
 	clk_hw->hw.init = &init;
 	clk_hw->ops = ops;
-	clk_hw->flags = MEMMAP_ADDRESSING;
 	clk_hw->enable_reg = reg;
 	clk_hw->enable_bit = bit_idx;
 
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 626ae94b7444..affdabd0b6a1 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -168,7 +168,6 @@ struct clk_hw_omap {
  *     should be used.  This is a temporary solution - a better approach
  *     would be to associate clock type-specific data with the clock,
  *     similar to the struct dpll_data approach.
- * MEMMAP_ADDRESSING: Use memmap addressing to access clock registers.
  */
 #define ENABLE_REG_32BIT	(1 << 0)	/* Use 32-bit access */
 #define CLOCK_IDLE_CONTROL	(1 << 1)
@@ -176,7 +175,6 @@ struct clk_hw_omap {
 #define ENABLE_ON_INIT		(1 << 3)	/* Enable upon framework init */
 #define INVERT_ENABLE		(1 << 4)	/* 0 enables, 1 disables */
 #define CLOCK_CLKOUTX2		(1 << 5)
-#define MEMMAP_ADDRESSING	(1 << 6)
 
 /* CM_CLKEN_PLL*.EN* bit values - not all are available for every DPLL */
 #define DPLL_LOW_POWER_STOP	0x1
-- 
cgit v1.2.3


From d83bc5b69f2f86ac8354cdb8bbf1b56f34c4ddee Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Thu, 9 Feb 2017 14:40:40 +0200
Subject: clk: ti: mux: convert TI mux clock to use its internal data
 representation

Instead of using the generic clock driver data struct, use one internal
for the TI clock driver itself. This allows modifying the register access
parts in subsequent patch.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clock.h | 11 +++++++++++
 drivers/clk/ti/mux.c   | 10 +++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index cb906a199e66..41913bfa63a5 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -16,6 +16,17 @@
 #ifndef __DRIVERS_CLK_TI_CLOCK__
 #define __DRIVERS_CLK_TI_CLOCK__
 
+struct clk_omap_mux {
+	struct clk_hw		hw;
+	void __iomem		*reg;
+	u32			*table;
+	u32			mask;
+	u8			shift;
+	u8			flags;
+};
+
+#define to_clk_omap_mux(_hw) container_of(_hw, struct clk_omap_mux, hw)
+
 enum {
 	TI_CLK_FIXED,
 	TI_CLK_MUX,
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index 3cc6db4b31fb..daa2dee6bafe 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -28,7 +28,7 @@
 
 static u8 ti_clk_mux_get_parent(struct clk_hw *hw)
 {
-	struct clk_mux *mux = to_clk_mux(hw);
+	struct clk_omap_mux *mux = to_clk_omap_mux(hw);
 	int num_parents = clk_hw_get_num_parents(hw);
 	u32 val;
 
@@ -65,7 +65,7 @@ static u8 ti_clk_mux_get_parent(struct clk_hw *hw)
 
 static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 {
-	struct clk_mux *mux = to_clk_mux(hw);
+	struct clk_omap_mux *mux = to_clk_omap_mux(hw);
 	u32 val;
 
 	if (mux->table) {
@@ -102,7 +102,7 @@ static struct clk *_register_mux(struct device *dev, const char *name,
 				 void __iomem *reg, u8 shift, u32 mask,
 				 u8 clk_mux_flags, u32 *table)
 {
-	struct clk_mux *mux;
+	struct clk_omap_mux *mux;
 	struct clk *clk;
 	struct clk_init_data init;
 
@@ -229,7 +229,7 @@ CLK_OF_DECLARE(mux_clk, "ti,mux-clock", of_mux_clk_setup);
 
 struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup)
 {
-	struct clk_mux *mux;
+	struct clk_omap_mux *mux;
 	struct clk_omap_reg *reg;
 	int num_parents;
 
@@ -260,7 +260,7 @@ struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup)
 
 static void __init of_ti_composite_mux_clk_setup(struct device_node *node)
 {
-	struct clk_mux *mux;
+	struct clk_omap_mux *mux;
 	unsigned int num_parents;
 	u32 val;
 
-- 
cgit v1.2.3


From 6dbde94756adb14fe1e3516249cb8eee725bd5e0 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Thu, 9 Feb 2017 14:45:45 +0200
Subject: clk: ti: divider: convert TI divider clock to use its own data
 representation

Instead of using the generic clock driver data struct, use one internal
for the TI clock driver itself. This allows modifying the register access
parts in subsequent patch.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clock.h   | 11 +++++++++++
 drivers/clk/ti/divider.c | 22 +++++++++++-----------
 drivers/clk/ti/gate.c    |  4 ++--
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index 41913bfa63a5..11d3f6a9da08 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -16,6 +16,17 @@
 #ifndef __DRIVERS_CLK_TI_CLOCK__
 #define __DRIVERS_CLK_TI_CLOCK__
 
+struct clk_omap_divider {
+	struct clk_hw		hw;
+	void __iomem		*reg;
+	u8			shift;
+	u8			width;
+	u8			flags;
+	const struct clk_div_table	*table;
+};
+
+#define to_clk_omap_divider(_hw) container_of(_hw, struct clk_omap_divider, hw)
+
 struct clk_omap_mux {
 	struct clk_hw		hw;
 	void __iomem		*reg;
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index 0e8c1363879e..a07652e58a17 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -39,7 +39,7 @@ static unsigned int _get_table_maxdiv(const struct clk_div_table *table)
 	return maxdiv;
 }
 
-static unsigned int _get_maxdiv(struct clk_divider *divider)
+static unsigned int _get_maxdiv(struct clk_omap_divider *divider)
 {
 	if (divider->flags & CLK_DIVIDER_ONE_BASED)
 		return div_mask(divider);
@@ -61,7 +61,7 @@ static unsigned int _get_table_div(const struct clk_div_table *table,
 	return 0;
 }
 
-static unsigned int _get_div(struct clk_divider *divider, unsigned int val)
+static unsigned int _get_div(struct clk_omap_divider *divider, unsigned int val)
 {
 	if (divider->flags & CLK_DIVIDER_ONE_BASED)
 		return val;
@@ -83,7 +83,7 @@ static unsigned int _get_table_val(const struct clk_div_table *table,
 	return 0;
 }
 
-static unsigned int _get_val(struct clk_divider *divider, u8 div)
+static unsigned int _get_val(struct clk_omap_divider *divider, u8 div)
 {
 	if (divider->flags & CLK_DIVIDER_ONE_BASED)
 		return div;
@@ -97,7 +97,7 @@ static unsigned int _get_val(struct clk_divider *divider, u8 div)
 static unsigned long ti_clk_divider_recalc_rate(struct clk_hw *hw,
 						unsigned long parent_rate)
 {
-	struct clk_divider *divider = to_clk_divider(hw);
+	struct clk_omap_divider *divider = to_clk_omap_divider(hw);
 	unsigned int div, val;
 
 	val = ti_clk_ll_ops->clk_readl(divider->reg) >> divider->shift;
@@ -131,7 +131,7 @@ static bool _is_valid_table_div(const struct clk_div_table *table,
 	return false;
 }
 
-static bool _is_valid_div(struct clk_divider *divider, unsigned int div)
+static bool _is_valid_div(struct clk_omap_divider *divider, unsigned int div)
 {
 	if (divider->flags & CLK_DIVIDER_POWER_OF_TWO)
 		return is_power_of_2(div);
@@ -172,7 +172,7 @@ static int _div_round(const struct clk_div_table *table,
 static int ti_clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 				  unsigned long *best_parent_rate)
 {
-	struct clk_divider *divider = to_clk_divider(hw);
+	struct clk_omap_divider *divider = to_clk_omap_divider(hw);
 	int i, bestdiv = 0;
 	unsigned long parent_rate, best = 0, now, maxdiv;
 	unsigned long parent_rate_saved = *best_parent_rate;
@@ -239,14 +239,14 @@ static long ti_clk_divider_round_rate(struct clk_hw *hw, unsigned long rate,
 static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 				   unsigned long parent_rate)
 {
-	struct clk_divider *divider;
+	struct clk_omap_divider *divider;
 	unsigned int div, value;
 	u32 val;
 
 	if (!hw || !rate)
 		return -EINVAL;
 
-	divider = to_clk_divider(hw);
+	divider = to_clk_omap_divider(hw);
 
 	div = DIV_ROUND_UP(parent_rate, rate);
 	value = _get_val(divider, div);
@@ -278,7 +278,7 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 				     u8 shift, u8 width, u8 clk_divider_flags,
 				     const struct clk_div_table *table)
 {
-	struct clk_divider *div;
+	struct clk_omap_divider *div;
 	struct clk *clk;
 	struct clk_init_data init;
 
@@ -379,7 +379,7 @@ _get_div_table_from_setup(struct ti_clk_divider *setup, u8 *width)
 
 struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup)
 {
-	struct clk_divider *div;
+	struct clk_omap_divider *div;
 	struct clk_omap_reg *reg;
 
 	if (!setup)
@@ -617,7 +617,7 @@ CLK_OF_DECLARE(divider_clk, "ti,divider-clock", of_ti_divider_clk_setup);
 
 static void __init of_ti_composite_divider_clk_setup(struct device_node *node)
 {
-	struct clk_divider *div;
+	struct clk_omap_divider *div;
 	u32 val;
 
 	div = kzalloc(sizeof(*div), GFP_KERNEL);
diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index 5ff62e2c63d0..a65f0827f605 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c
@@ -62,7 +62,7 @@ static const struct clk_ops omap_gate_clk_hsdiv_restore_ops = {
  */
 static int omap36xx_gate_clk_enable_with_hsdiv_restore(struct clk_hw *hw)
 {
-	struct clk_divider *parent;
+	struct clk_omap_divider *parent;
 	struct clk_hw *parent_hw;
 	u32 dummy_v, orig_v;
 	int ret;
@@ -72,7 +72,7 @@ static int omap36xx_gate_clk_enable_with_hsdiv_restore(struct clk_hw *hw)
 
 	/* Parent is the x2 node, get parent of parent for the m2 div */
 	parent_hw = clk_hw_get_parent(clk_hw_get_parent(hw));
-	parent = to_clk_divider(parent_hw);
+	parent = to_clk_omap_divider(parent_hw);
 
 	/* Restore the dividers */
 	if (!ret) {
-- 
cgit v1.2.3


From 4f6be5655dc95a83882e5e6f743baf603fcc1fbd Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Thu, 9 Feb 2017 14:46:53 +0200
Subject: clk: ti: divider: add driver internal API for parsing divider data

This can be used from the divider itself, and also from the clkctrl
clocks once this is introduced.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clock.h   |  4 +++
 drivers/clk/ti/divider.c | 63 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index 11d3f6a9da08..bdfdf26c9ebd 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -220,6 +220,10 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup);
 struct clk_hw *ti_clk_build_component_gate(struct ti_clk_gate *setup);
 struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup);
 
+int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div,
+			      u8 flags, u8 *width,
+			      const struct clk_div_table **table);
+
 void ti_clk_patch_legacy_clks(struct ti_clk **patch);
 struct clk *ti_clk_register_clk(struct ti_clk *setup);
 int ti_clk_register_legacy_clks(struct ti_clk_alias *clks);
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index a07652e58a17..1cc0242d823f 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -319,20 +319,17 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 	return clk;
 }
 
-static struct clk_div_table *
-_get_div_table_from_setup(struct ti_clk_divider *setup, u8 *width)
+int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div,
+			      u8 flags, u8 *width,
+			      const struct clk_div_table **table)
 {
 	int valid_div = 0;
-	struct clk_div_table *table;
-	int i;
-	int div;
 	u32 val;
-	u8 flags;
-
-	if (!setup->num_dividers) {
-		/* Clk divider table not provided, determine min/max divs */
-		flags = setup->flags;
+	int div;
+	int i;
+	struct clk_div_table *tmp;
 
+	if (!div_table) {
 		if (flags & CLKF_INDEX_STARTS_AT_ONE)
 			val = 1;
 		else
@@ -340,7 +337,7 @@ _get_div_table_from_setup(struct ti_clk_divider *setup, u8 *width)
 
 		div = 1;
 
-		while (div < setup->max_div) {
+		while (div < max_div) {
 			if (flags & CLKF_INDEX_POWER_OF_TWO)
 				div <<= 1;
 			else
@@ -349,30 +346,52 @@ _get_div_table_from_setup(struct ti_clk_divider *setup, u8 *width)
 		}
 
 		*width = fls(val);
+		*table = NULL;
 
-		return NULL;
+		return 0;
 	}
 
-	for (i = 0; i < setup->num_dividers; i++)
-		if (setup->dividers[i])
+	i = 0;
+
+	while (!num_dividers || i < num_dividers) {
+		if (div_table[i] == -1)
+			break;
+		if (div_table[i])
 			valid_div++;
+		i++;
+	}
 
-	table = kzalloc(sizeof(*table) * (valid_div + 1), GFP_KERNEL);
-	if (!table)
-		return ERR_PTR(-ENOMEM);
+	num_dividers = i;
+
+	tmp = kzalloc(sizeof(*tmp) * (valid_div + 1), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
 
 	valid_div = 0;
 	*width = 0;
 
-	for (i = 0; i < setup->num_dividers; i++)
-		if (setup->dividers[i]) {
-			table[valid_div].div = setup->dividers[i];
-			table[valid_div].val = i;
+	for (i = 0; i < num_dividers; i++)
+		if (div_table[i] > 0) {
+			tmp[valid_div].div = div_table[i];
+			tmp[valid_div].val = i;
 			valid_div++;
 			*width = i;
 		}
 
 	*width = fls(*width);
+	*table = tmp;
+
+	return 0;
+}
+
+static const struct clk_div_table *
+_get_div_table_from_setup(struct ti_clk_divider *setup, u8 *width)
+{
+	const struct clk_div_table *table = NULL;
+
+	ti_clk_parse_divider_data(setup->dividers, setup->num_dividers,
+				  setup->max_div, setup->flags, width,
+				  &table);
 
 	return table;
 }
@@ -414,7 +433,7 @@ struct clk *ti_clk_register_divider(struct ti_clk *setup)
 	u8 width;
 	u32 flags = 0;
 	u8 div_flags = 0;
-	struct clk_div_table *table;
+	const struct clk_div_table *table;
 	struct clk *clk;
 
 	div = setup->data;
-- 
cgit v1.2.3


From 9a00fa6843953cff1e3420002a454a61edfdd5f0 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Thu, 9 Feb 2017 11:10:19 +0200
Subject: clk: ti: gate: export gate_clk_ops locally

These are going to be used by the clkctrl support that will be introduced
later.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clock.h | 1 +
 drivers/clk/ti/gate.c  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index bdfdf26c9ebd..437ea768f837 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -253,6 +253,7 @@ extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait;
 
 extern const struct clk_ops ti_clk_divider_ops;
 extern const struct clk_ops ti_clk_mux_ops;
+extern const struct clk_ops omap_gate_clk_ops;
 
 void omap2_init_clk_clkdm(struct clk_hw *hw);
 int omap2_clkops_enable_clkdm(struct clk_hw *hw);
diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index a65f0827f605..77f0920e12f1 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c
@@ -35,7 +35,7 @@ static const struct clk_ops omap_gate_clkdm_clk_ops = {
 	.disable	= &omap2_clkops_disable_clkdm,
 };
 
-static const struct clk_ops omap_gate_clk_ops = {
+const struct clk_ops omap_gate_clk_ops = {
 	.init		= &omap2_init_clk_clkdm,
 	.enable		= &omap2_dflt_clk_enable,
 	.disable	= &omap2_dflt_clk_disable,
-- 
cgit v1.2.3


From 473adbf4e02857a6b78dfb3d9fcf752638bbadb9 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Thu, 9 Feb 2017 11:25:28 +0200
Subject: clk: ti: dpll44xx: fix clksel register initialization

clksel register pointer should be used for the DPLL-MX autoidle handling.
Currently this is not setup at all. Fix by adding proper handling for the
register.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/dpll.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index c149bd169f43..778bc90955b9 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -319,6 +319,7 @@ static void _register_dpll_x2(struct device_node *node,
 	struct clk_hw_omap *clk_hw;
 	const char *name = node->name;
 	const char *parent_name;
+	int ret;
 
 	parent_name = of_clk_get_parent_name(node, 0);
 	if (!parent_name) {
@@ -338,6 +339,20 @@ static void _register_dpll_x2(struct device_node *node,
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
 
+	if (hw_ops == &clkhwops_omap4_dpllmx) {
+		/* Check if register defined, if not, drop hw-ops */
+		ret = of_property_count_elems_of_size(node, "reg", 1);
+		if (ret <= 0) {
+			hw_ops = NULL;
+		} else {
+			clk_hw->clksel_reg = ti_clk_get_reg_addr(node, 0);
+			if (IS_ERR(clk_hw->clksel_reg)) {
+				kfree(clk_hw);
+				return;
+			}
+		}
+	}
+
 	/* register the clock */
 	clk = ti_clk_register(NULL, &clk_hw->hw, name);
 
-- 
cgit v1.2.3


From 6c0afb503937a12a8d20a805fcf263e31afa9871 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Thu, 9 Feb 2017 11:24:37 +0200
Subject: clk: ti: convert to use proper register definition for all accesses

Currently, TI clock driver uses an encapsulated struct that is cast into
a void pointer to store all register addresses. This can be considered
as rather nasty hackery, and prevents from expanding the register
address field also. Instead, replace all the code to use proper struct
in place for this, which contains all the previously used data.

This patch is rather large as it is touching multiple files, but this
can't be split up as we need to avoid any boot breakage.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/clkt2xxx_dpllcore.c |  3 +-
 arch/arm/mach-omap2/clock.c             |  2 +-
 arch/arm/mach-omap2/clock.h             |  2 ++
 arch/arm/mach-omap2/cm.h                |  5 +--
 arch/arm/mach-omap2/cm2xxx.c            |  9 ++---
 arch/arm/mach-omap2/cm3xxx.c            | 10 ++----
 arch/arm/mach-omap2/cm_common.c         |  2 +-
 drivers/clk/ti/apll.c                   | 47 ++++++++++++-------------
 drivers/clk/ti/autoidle.c               | 18 +++++-----
 drivers/clk/ti/clk-3xxx.c               | 55 +++++++++++++++--------------
 drivers/clk/ti/clk.c                    | 47 ++++++++++++-------------
 drivers/clk/ti/clkt_dflt.c              | 61 ++++++++++++---------------------
 drivers/clk/ti/clkt_dpll.c              |  6 ++--
 drivers/clk/ti/clkt_iclk.c              | 29 ++++++++--------
 drivers/clk/ti/clock.h                  | 11 +++---
 drivers/clk/ti/clockdomain.c            |  8 -----
 drivers/clk/ti/divider.c                | 24 +++++++------
 drivers/clk/ti/dpll.c                   | 48 ++++++++++----------------
 drivers/clk/ti/dpll3xxx.c               | 38 ++++++++++----------
 drivers/clk/ti/dpll44xx.c               | 14 ++++----
 drivers/clk/ti/gate.c                   | 32 ++++++++---------
 drivers/clk/ti/interface.c              | 22 ++++++------
 drivers/clk/ti/mux.c                    | 41 +++++++++-------------
 include/linux/clk/ti.h                  | 46 +++++++++++++------------
 24 files changed, 264 insertions(+), 316 deletions(-)

diff --git a/arch/arm/mach-omap2/clkt2xxx_dpllcore.c b/arch/arm/mach-omap2/clkt2xxx_dpllcore.c
index 59cf310bc1e9..e8d417309f33 100644
--- a/arch/arm/mach-omap2/clkt2xxx_dpllcore.c
+++ b/arch/arm/mach-omap2/clkt2xxx_dpllcore.c
@@ -138,7 +138,8 @@ int omap2_reprogram_dpllcore(struct clk_hw *hw, unsigned long rate,
 		if (!dd)
 			return -EINVAL;
 
-		tmpset.cm_clksel1_pll = readl_relaxed(dd->mult_div1_reg);
+		tmpset.cm_clksel1_pll =
+			omap_clk_ll_ops.clk_readl(&dd->mult_div1_reg);
 		tmpset.cm_clksel1_pll &= ~(dd->mult_mask |
 					   dd->div1_mask);
 		div = ((curr_prcm_set->xtal_speed / 1000000) - 1);
diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index ae5b23c19b83..42881f21cede 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -54,7 +54,7 @@ u16 cpu_mask;
 #define OMAP3PLUS_DPLL_FINT_MIN		32000
 #define OMAP3PLUS_DPLL_FINT_MAX		52000000
 
-static struct ti_clk_ll_ops omap_clk_ll_ops = {
+struct ti_clk_ll_ops omap_clk_ll_ops = {
 	.clkdm_clk_enable = clkdm_clk_enable,
 	.clkdm_clk_disable = clkdm_clk_disable,
 	.clkdm_lookup = clkdm_lookup,
diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index 4e66295dca25..cf45550197e6 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h
@@ -64,6 +64,8 @@
 #define OMAP4XXX_EN_DPLL_FRBYPASS		0x6
 #define OMAP4XXX_EN_DPLL_LOCKED			0x7
 
+extern struct ti_clk_ll_ops omap_clk_ll_ops;
+
 extern u16 cpu_mask;
 
 extern const struct clkops clkops_omap2_dflt_wait;
diff --git a/arch/arm/mach-omap2/cm.h b/arch/arm/mach-omap2/cm.h
index 1fe3e6b833d2..de75cbcdc9d1 100644
--- a/arch/arm/mach-omap2/cm.h
+++ b/arch/arm/mach-omap2/cm.h
@@ -23,6 +23,7 @@
 #define MAX_MODULE_READY_TIME		2000
 
 # ifndef __ASSEMBLER__
+#include <linux/clk/ti.h>
 extern void __iomem *cm_base;
 extern void __iomem *cm2_base;
 extern void omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2);
@@ -50,7 +51,7 @@ extern void omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2);
  * @module_disable: ptr to the SoC CM-specific module_disable impl
  */
 struct cm_ll_data {
-	int (*split_idlest_reg)(void __iomem *idlest_reg, s16 *prcm_inst,
+	int (*split_idlest_reg)(struct clk_omap_reg *idlest_reg, s16 *prcm_inst,
 				u8 *idlest_reg_id);
 	int (*wait_module_ready)(u8 part, s16 prcm_mod, u16 idlest_reg,
 				 u8 idlest_shift);
@@ -60,7 +61,7 @@ struct cm_ll_data {
 	void (*module_disable)(u8 part, u16 inst, u16 clkctrl_offs);
 };
 
-extern int cm_split_idlest_reg(void __iomem *idlest_reg, s16 *prcm_inst,
+extern int cm_split_idlest_reg(struct clk_omap_reg *idlest_reg, s16 *prcm_inst,
 			       u8 *idlest_reg_id);
 int omap_cm_wait_module_ready(u8 part, s16 prcm_mod, u16 idlest_reg,
 			      u8 idlest_shift);
diff --git a/arch/arm/mach-omap2/cm2xxx.c b/arch/arm/mach-omap2/cm2xxx.c
index 3e5fd3587eb1..cd90b4c6a06b 100644
--- a/arch/arm/mach-omap2/cm2xxx.c
+++ b/arch/arm/mach-omap2/cm2xxx.c
@@ -204,7 +204,7 @@ void omap2xxx_cm_apll96_disable(void)
  * XXX This function is only needed until absolute register addresses are
  * removed from the OMAP struct clk records.
  */
-static int omap2xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
+static int omap2xxx_cm_split_idlest_reg(struct clk_omap_reg *idlest_reg,
 					s16 *prcm_inst,
 					u8 *idlest_reg_id)
 {
@@ -212,10 +212,7 @@ static int omap2xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
 	u8 idlest_offs;
 	int i;
 
-	if (idlest_reg < cm_base || idlest_reg > (cm_base + 0x0fff))
-		return -EINVAL;
-
-	idlest_offs = (unsigned long)idlest_reg & 0xff;
+	idlest_offs = idlest_reg->offset & 0xff;
 	for (i = 0; i < ARRAY_SIZE(omap2xxx_cm_idlest_offs); i++) {
 		if (idlest_offs == omap2xxx_cm_idlest_offs[i]) {
 			*idlest_reg_id = i + 1;
@@ -226,7 +223,7 @@ static int omap2xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
 	if (i == ARRAY_SIZE(omap2xxx_cm_idlest_offs))
 		return -EINVAL;
 
-	offs = idlest_reg - cm_base;
+	offs = idlest_reg->offset;
 	offs &= 0xff00;
 	*prcm_inst = offs;
 
diff --git a/arch/arm/mach-omap2/cm3xxx.c b/arch/arm/mach-omap2/cm3xxx.c
index d91ae8206d1e..55b046a719dc 100644
--- a/arch/arm/mach-omap2/cm3xxx.c
+++ b/arch/arm/mach-omap2/cm3xxx.c
@@ -118,7 +118,7 @@ static int omap3xxx_cm_wait_module_ready(u8 part, s16 prcm_mod, u16 idlest_id,
  * XXX This function is only needed until absolute register addresses are
  * removed from the OMAP struct clk records.
  */
-static int omap3xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
+static int omap3xxx_cm_split_idlest_reg(struct clk_omap_reg *idlest_reg,
 					s16 *prcm_inst,
 					u8 *idlest_reg_id)
 {
@@ -126,11 +126,7 @@ static int omap3xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
 	u8 idlest_offs;
 	int i;
 
-	if (idlest_reg < (cm_base + OMAP3430_IVA2_MOD) ||
-	    idlest_reg > (cm_base + 0x1ffff))
-		return -EINVAL;
-
-	idlest_offs = (unsigned long)idlest_reg & 0xff;
+	idlest_offs = idlest_reg->offset & 0xff;
 	for (i = 0; i < ARRAY_SIZE(omap3xxx_cm_idlest_offs); i++) {
 		if (idlest_offs == omap3xxx_cm_idlest_offs[i]) {
 			*idlest_reg_id = i + 1;
@@ -141,7 +137,7 @@ static int omap3xxx_cm_split_idlest_reg(void __iomem *idlest_reg,
 	if (i == ARRAY_SIZE(omap3xxx_cm_idlest_offs))
 		return -EINVAL;
 
-	offs = idlest_reg - cm_base;
+	offs = idlest_reg->offset;
 	offs &= 0xff00;
 	*prcm_inst = offs;
 
diff --git a/arch/arm/mach-omap2/cm_common.c b/arch/arm/mach-omap2/cm_common.c
index 23e8bcec34e3..bbe41f4c9dc8 100644
--- a/arch/arm/mach-omap2/cm_common.c
+++ b/arch/arm/mach-omap2/cm_common.c
@@ -65,7 +65,7 @@ void __init omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2)
  * or 0 upon success.  XXX This function is only needed until absolute
  * register addresses are removed from the OMAP struct clk records.
  */
-int cm_split_idlest_reg(void __iomem *idlest_reg, s16 *prcm_inst,
+int cm_split_idlest_reg(struct clk_omap_reg *idlest_reg, s16 *prcm_inst,
 			u8 *idlest_reg_id)
 {
 	if (!cm_ll_data->split_idlest_reg) {
diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c
index 5cba28c6ab35..06f486b3488c 100644
--- a/drivers/clk/ti/apll.c
+++ b/drivers/clk/ti/apll.c
@@ -55,20 +55,20 @@ static int dra7_apll_enable(struct clk_hw *hw)
 	state <<= __ffs(ad->idlest_mask);
 
 	/* Check is already locked */
-	v = ti_clk_ll_ops->clk_readl(ad->idlest_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->idlest_reg);
 
 	if ((v & ad->idlest_mask) == state)
 		return r;
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ~ad->enable_mask;
 	v |= APLL_FORCE_LOCK << __ffs(ad->enable_mask);
-	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &ad->control_reg);
 
 	state <<= __ffs(ad->idlest_mask);
 
 	while (1) {
-		v = ti_clk_ll_ops->clk_readl(ad->idlest_reg);
+		v = ti_clk_ll_ops->clk_readl(&ad->idlest_reg);
 		if ((v & ad->idlest_mask) == state)
 			break;
 		if (i > MAX_APLL_WAIT_TRIES)
@@ -99,10 +99,10 @@ static void dra7_apll_disable(struct clk_hw *hw)
 
 	state <<= __ffs(ad->idlest_mask);
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ~ad->enable_mask;
 	v |= APLL_AUTO_IDLE << __ffs(ad->enable_mask);
-	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &ad->control_reg);
 }
 
 static int dra7_apll_is_enabled(struct clk_hw *hw)
@@ -113,7 +113,7 @@ static int dra7_apll_is_enabled(struct clk_hw *hw)
 
 	ad = clk->dpll_data;
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ad->enable_mask;
 
 	v >>= __ffs(ad->enable_mask);
@@ -185,6 +185,7 @@ static void __init of_dra7_apll_setup(struct device_node *node)
 	struct clk_hw_omap *clk_hw = NULL;
 	struct clk_init_data *init = NULL;
 	const char **parent_names = NULL;
+	int ret;
 
 	ad = kzalloc(sizeof(*ad), GFP_KERNEL);
 	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
@@ -212,10 +213,10 @@ static void __init of_dra7_apll_setup(struct device_node *node)
 
 	init->parent_names = parent_names;
 
-	ad->control_reg = ti_clk_get_reg_addr(node, 0);
-	ad->idlest_reg = ti_clk_get_reg_addr(node, 1);
+	ret = ti_clk_get_reg_addr(node, 0, &ad->control_reg);
+	ret |= ti_clk_get_reg_addr(node, 1, &ad->idlest_reg);
 
-	if (IS_ERR(ad->control_reg) || IS_ERR(ad->idlest_reg))
+	if (ret)
 		goto cleanup;
 
 	ad->idlest_mask = 0x1;
@@ -241,7 +242,7 @@ static int omap2_apll_is_enabled(struct clk_hw *hw)
 	struct dpll_data *ad = clk->dpll_data;
 	u32 v;
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ad->enable_mask;
 
 	v >>= __ffs(ad->enable_mask);
@@ -267,13 +268,13 @@ static int omap2_apll_enable(struct clk_hw *hw)
 	u32 v;
 	int i = 0;
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ~ad->enable_mask;
 	v |= OMAP2_EN_APLL_LOCKED << __ffs(ad->enable_mask);
-	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &ad->control_reg);
 
 	while (1) {
-		v = ti_clk_ll_ops->clk_readl(ad->idlest_reg);
+		v = ti_clk_ll_ops->clk_readl(&ad->idlest_reg);
 		if (v & ad->idlest_mask)
 			break;
 		if (i > MAX_APLL_WAIT_TRIES)
@@ -297,10 +298,10 @@ static void omap2_apll_disable(struct clk_hw *hw)
 	struct dpll_data *ad = clk->dpll_data;
 	u32 v;
 
-	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->control_reg);
 	v &= ~ad->enable_mask;
 	v |= OMAP2_EN_APLL_STOPPED << __ffs(ad->enable_mask);
-	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &ad->control_reg);
 }
 
 static struct clk_ops omap2_apll_ops = {
@@ -315,10 +316,10 @@ static void omap2_apll_set_autoidle(struct clk_hw_omap *clk, u32 val)
 	struct dpll_data *ad = clk->dpll_data;
 	u32 v;
 
-	v = ti_clk_ll_ops->clk_readl(ad->autoidle_reg);
+	v = ti_clk_ll_ops->clk_readl(&ad->autoidle_reg);
 	v &= ~ad->autoidle_mask;
 	v |= val << __ffs(ad->autoidle_mask);
-	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &ad->control_reg);
 }
 
 #define OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP	0x3
@@ -347,6 +348,7 @@ static void __init of_omap2_apll_setup(struct device_node *node)
 	struct clk *clk;
 	const char *parent_name;
 	u32 val;
+	int ret;
 
 	ad = kzalloc(sizeof(*ad), GFP_KERNEL);
 	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
@@ -392,12 +394,11 @@ static void __init of_omap2_apll_setup(struct device_node *node)
 
 	ad->idlest_mask = 1 << val;
 
-	ad->control_reg = ti_clk_get_reg_addr(node, 0);
-	ad->autoidle_reg = ti_clk_get_reg_addr(node, 1);
-	ad->idlest_reg = ti_clk_get_reg_addr(node, 2);
+	ret = ti_clk_get_reg_addr(node, 0, &ad->control_reg);
+	ret |= ti_clk_get_reg_addr(node, 1, &ad->autoidle_reg);
+	ret |= ti_clk_get_reg_addr(node, 2, &ad->idlest_reg);
 
-	if (IS_ERR(ad->control_reg) || IS_ERR(ad->autoidle_reg) ||
-	    IS_ERR(ad->idlest_reg))
+	if (ret)
 		goto cleanup;
 
 	clk = clk_register(NULL, &clk_hw->hw);
diff --git a/drivers/clk/ti/autoidle.c b/drivers/clk/ti/autoidle.c
index 345af43465f0..7bb9afbe4058 100644
--- a/drivers/clk/ti/autoidle.c
+++ b/drivers/clk/ti/autoidle.c
@@ -25,7 +25,7 @@
 #include "clock.h"
 
 struct clk_ti_autoidle {
-	void __iomem		*reg;
+	struct clk_omap_reg	reg;
 	u8			shift;
 	u8			flags;
 	const char		*name;
@@ -73,28 +73,28 @@ static void _allow_autoidle(struct clk_ti_autoidle *clk)
 {
 	u32 val;
 
-	val = ti_clk_ll_ops->clk_readl(clk->reg);
+	val = ti_clk_ll_ops->clk_readl(&clk->reg);
 
 	if (clk->flags & AUTOIDLE_LOW)
 		val &= ~(1 << clk->shift);
 	else
 		val |= (1 << clk->shift);
 
-	ti_clk_ll_ops->clk_writel(val, clk->reg);
+	ti_clk_ll_ops->clk_writel(val, &clk->reg);
 }
 
 static void _deny_autoidle(struct clk_ti_autoidle *clk)
 {
 	u32 val;
 
-	val = ti_clk_ll_ops->clk_readl(clk->reg);
+	val = ti_clk_ll_ops->clk_readl(&clk->reg);
 
 	if (clk->flags & AUTOIDLE_LOW)
 		val |= (1 << clk->shift);
 	else
 		val &= ~(1 << clk->shift);
 
-	ti_clk_ll_ops->clk_writel(val, clk->reg);
+	ti_clk_ll_ops->clk_writel(val, &clk->reg);
 }
 
 /**
@@ -140,6 +140,7 @@ int __init of_ti_clk_autoidle_setup(struct device_node *node)
 {
 	u32 shift;
 	struct clk_ti_autoidle *clk;
+	int ret;
 
 	/* Check if this clock has autoidle support or not */
 	if (of_property_read_u32(node, "ti,autoidle-shift", &shift))
@@ -152,11 +153,10 @@ int __init of_ti_clk_autoidle_setup(struct device_node *node)
 
 	clk->shift = shift;
 	clk->name = node->name;
-	clk->reg = ti_clk_get_reg_addr(node, 0);
-
-	if (IS_ERR(clk->reg)) {
+	ret = ti_clk_get_reg_addr(node, 0, &clk->reg);
+	if (ret) {
 		kfree(clk);
-		return -EINVAL;
+		return ret;
 	}
 
 	if (of_property_read_bool(node, "ti,invert-autoidle-bit"))
diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c
index 11d8aa3ec186..b1251cae98b8 100644
--- a/drivers/clk/ti/clk-3xxx.c
+++ b/drivers/clk/ti/clk-3xxx.c
@@ -52,14 +52,13 @@
  * @idlest_reg and @idlest_bit.  No return value.
  */
 static void omap3430es2_clk_ssi_find_idlest(struct clk_hw_omap *clk,
-					    void __iomem **idlest_reg,
+					    struct clk_omap_reg *idlest_reg,
 					    u8 *idlest_bit,
 					    u8 *idlest_val)
 {
-	u32 r;
-
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
+	idlest_reg->offset &= ~0xf0;
+	idlest_reg->offset |= 0x20;
 	*idlest_bit = OMAP3430ES2_ST_SSI_IDLE_SHIFT;
 	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
 }
@@ -85,15 +84,15 @@ const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_ssi_wait = {
  * default find_idlest code assumes that they are at the same
  * position.)  No return value.
  */
-static void omap3430es2_clk_dss_usbhost_find_idlest(struct clk_hw_omap *clk,
-						    void __iomem **idlest_reg,
-						    u8 *idlest_bit,
-						    u8 *idlest_val)
+static void
+omap3430es2_clk_dss_usbhost_find_idlest(struct clk_hw_omap *clk,
+					struct clk_omap_reg *idlest_reg,
+					u8 *idlest_bit, u8 *idlest_val)
 {
-	u32 r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
 
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
+	idlest_reg->offset &= ~0xf0;
+	idlest_reg->offset |= 0x20;
 	/* USBHOST_IDLE has same shift */
 	*idlest_bit = OMAP3430ES2_ST_DSS_IDLE_SHIFT;
 	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
@@ -122,15 +121,15 @@ const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_dss_usbhost_wait = {
  * shift from the CM_{I,F}CLKEN bit.  Pass back the correct info via
  * @idlest_reg and @idlest_bit.  No return value.
  */
-static void omap3430es2_clk_hsotgusb_find_idlest(struct clk_hw_omap *clk,
-						 void __iomem **idlest_reg,
-						 u8 *idlest_bit,
-						 u8 *idlest_val)
+static void
+omap3430es2_clk_hsotgusb_find_idlest(struct clk_hw_omap *clk,
+				     struct clk_omap_reg *idlest_reg,
+				     u8 *idlest_bit,
+				     u8 *idlest_val)
 {
-	u32 r;
-
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
+	idlest_reg->offset &= ~0xf0;
+	idlest_reg->offset |= 0x20;
 	*idlest_bit = OMAP3430ES2_ST_HSOTGUSB_IDLE_SHIFT;
 	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
 }
@@ -154,11 +153,11 @@ const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_hsotgusb_wait = {
  * bit. A value of 1 indicates that clock is enabled.
  */
 static void am35xx_clk_find_idlest(struct clk_hw_omap *clk,
-				   void __iomem **idlest_reg,
+				   struct clk_omap_reg *idlest_reg,
 				   u8 *idlest_bit,
 				   u8 *idlest_val)
 {
-	*idlest_reg = (__force void __iomem *)(clk->enable_reg);
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
 	*idlest_bit = clk->enable_bit + AM35XX_IPSS_ICK_EN_ACK_OFFSET;
 	*idlest_val = AM35XX_IPSS_CLK_IDLEST_VAL;
 }
@@ -178,10 +177,10 @@ static void am35xx_clk_find_idlest(struct clk_hw_omap *clk,
  * avoid this issue, and remove the casts.  No return value.
  */
 static void am35xx_clk_find_companion(struct clk_hw_omap *clk,
-				      void __iomem **other_reg,
+				      struct clk_omap_reg *other_reg,
 				      u8 *other_bit)
 {
-	*other_reg = (__force void __iomem *)(clk->enable_reg);
+	memcpy(other_reg, &clk->enable_reg, sizeof(*other_reg));
 	if (clk->enable_bit & AM35XX_IPSS_ICK_MASK)
 		*other_bit = clk->enable_bit + AM35XX_IPSS_ICK_FCK_OFFSET;
 	else
@@ -205,14 +204,14 @@ const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait = {
  * and @idlest_bit.  No return value.
  */
 static void am35xx_clk_ipss_find_idlest(struct clk_hw_omap *clk,
-					void __iomem **idlest_reg,
+					struct clk_omap_reg *idlest_reg,
 					u8 *idlest_bit,
 					u8 *idlest_val)
 {
-	u32 r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
 
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
+	idlest_reg->offset &= ~0xf0;
+	idlest_reg->offset |= 0x20;
 	*idlest_bit = AM35XX_ST_IPSS_SHIFT;
 	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
 }
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 024123f421d6..ddbad7e8d7c9 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -43,27 +43,29 @@ struct clk_iomap {
 
 static struct clk_iomap *clk_memmaps[CLK_MAX_MEMMAPS];
 
-static void clk_memmap_writel(u32 val, void __iomem *reg)
+static void clk_memmap_writel(u32 val, const struct clk_omap_reg *reg)
 {
-	struct clk_omap_reg *r = (struct clk_omap_reg *)&reg;
-	struct clk_iomap *io = clk_memmaps[r->index];
+	struct clk_iomap *io = clk_memmaps[reg->index];
 
-	if (io->regmap)
-		regmap_write(io->regmap, r->offset, val);
+	if (reg->ptr)
+		writel_relaxed(val, reg->ptr);
+	else if (io->regmap)
+		regmap_write(io->regmap, reg->offset, val);
 	else
-		writel_relaxed(val, io->mem + r->offset);
+		writel_relaxed(val, io->mem + reg->offset);
 }
 
-static u32 clk_memmap_readl(void __iomem *reg)
+static u32 clk_memmap_readl(const struct clk_omap_reg *reg)
 {
 	u32 val;
-	struct clk_omap_reg *r = (struct clk_omap_reg *)&reg;
-	struct clk_iomap *io = clk_memmaps[r->index];
+	struct clk_iomap *io = clk_memmaps[reg->index];
 
-	if (io->regmap)
-		regmap_read(io->regmap, r->offset, &val);
+	if (reg->ptr)
+		val = readl_relaxed(reg->ptr);
+	else if (io->regmap)
+		regmap_read(io->regmap, reg->offset, &val);
 	else
-		val = readl_relaxed(io->mem + r->offset);
+		val = readl_relaxed(io->mem + reg->offset);
 
 	return val;
 }
@@ -162,20 +164,18 @@ int __init ti_clk_retry_init(struct device_node *node, struct clk_hw *hw,
  * ti_clk_get_reg_addr - get register address for a clock register
  * @node: device node for the clock
  * @index: register index from the clock node
+ * @reg: pointer to target register struct
  *
- * Builds clock register address from device tree information. This
- * is a struct of type clk_omap_reg. Returns a pointer to the register
- * address, or a pointer error value in failure.
+ * Builds clock register address from device tree information, and returns
+ * the data via the provided output pointer @reg. Returns 0 on success,
+ * negative error value on failure.
  */
-void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index)
+int ti_clk_get_reg_addr(struct device_node *node, int index,
+			struct clk_omap_reg *reg)
 {
-	struct clk_omap_reg *reg;
 	u32 val;
-	u32 tmp;
 	int i;
 
-	reg = (struct clk_omap_reg *)&tmp;
-
 	for (i = 0; i < CLK_MAX_MEMMAPS; i++) {
 		if (clocks_node_ptr[i] == node->parent)
 			break;
@@ -183,19 +183,20 @@ void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index)
 
 	if (i == CLK_MAX_MEMMAPS) {
 		pr_err("clk-provider not found for %s!\n", node->name);
-		return IOMEM_ERR_PTR(-ENOENT);
+		return -ENOENT;
 	}
 
 	reg->index = i;
 
 	if (of_property_read_u32_index(node, "reg", index, &val)) {
 		pr_err("%s must have reg[%d]!\n", node->name, index);
-		return IOMEM_ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	reg->offset = val;
+	reg->ptr = NULL;
 
-	return (__force void __iomem *)tmp;
+	return 0;
 }
 
 /**
diff --git a/drivers/clk/ti/clkt_dflt.c b/drivers/clk/ti/clkt_dflt.c
index c6ae563801d7..91751dd26b16 100644
--- a/drivers/clk/ti/clkt_dflt.c
+++ b/drivers/clk/ti/clkt_dflt.c
@@ -55,7 +55,8 @@
  * elapsed.  XXX Deprecated - should be moved into drivers for the
  * individual IP block that the IDLEST register exists in.
  */
-static int _wait_idlest_generic(struct clk_hw_omap *clk, void __iomem *reg,
+static int _wait_idlest_generic(struct clk_hw_omap *clk,
+				struct clk_omap_reg *reg,
 				u32 mask, u8 idlest, const char *name)
 {
 	int i = 0, ena = 0;
@@ -91,7 +92,7 @@ static int _wait_idlest_generic(struct clk_hw_omap *clk, void __iomem *reg,
  */
 static void _omap2_module_wait_ready(struct clk_hw_omap *clk)
 {
-	void __iomem *companion_reg, *idlest_reg;
+	struct clk_omap_reg companion_reg, idlest_reg;
 	u8 other_bit, idlest_bit, idlest_val, idlest_reg_id;
 	s16 prcm_mod;
 	int r;
@@ -99,17 +100,17 @@ static void _omap2_module_wait_ready(struct clk_hw_omap *clk)
 	/* Not all modules have multiple clocks that their IDLEST depends on */
 	if (clk->ops->find_companion) {
 		clk->ops->find_companion(clk, &companion_reg, &other_bit);
-		if (!(ti_clk_ll_ops->clk_readl(companion_reg) &
+		if (!(ti_clk_ll_ops->clk_readl(&companion_reg) &
 		      (1 << other_bit)))
 			return;
 	}
 
 	clk->ops->find_idlest(clk, &idlest_reg, &idlest_bit, &idlest_val);
-	r = ti_clk_ll_ops->cm_split_idlest_reg(idlest_reg, &prcm_mod,
+	r = ti_clk_ll_ops->cm_split_idlest_reg(&idlest_reg, &prcm_mod,
 					       &idlest_reg_id);
 	if (r) {
 		/* IDLEST register not in the CM module */
-		_wait_idlest_generic(clk, idlest_reg, (1 << idlest_bit),
+		_wait_idlest_generic(clk, &idlest_reg, (1 << idlest_bit),
 				     idlest_val, clk_hw_get_name(&clk->hw));
 	} else {
 		ti_clk_ll_ops->cm_wait_module_ready(0, prcm_mod, idlest_reg_id,
@@ -139,17 +140,17 @@ static void _omap2_module_wait_ready(struct clk_hw_omap *clk)
  * avoid this issue, and remove the casts.  No return value.
  */
 void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk,
-				   void __iomem **other_reg, u8 *other_bit)
+				   struct clk_omap_reg *other_reg,
+				   u8 *other_bit)
 {
-	u32 r;
+	memcpy(other_reg, &clk->enable_reg, sizeof(*other_reg));
 
 	/*
 	 * Convert CM_ICLKEN* <-> CM_FCLKEN*.  This conversion assumes
 	 * it's just a matter of XORing the bits.
 	 */
-	r = ((__force u32)clk->enable_reg ^ (CM_FCLKEN ^ CM_ICLKEN));
+	other_reg->offset ^= (CM_FCLKEN ^ CM_ICLKEN);
 
-	*other_reg = (__force void __iomem *)r;
 	*other_bit = clk->enable_bit;
 }
 
@@ -168,13 +169,14 @@ void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk,
  * CM_IDLEST2).  This is not true for all modules.  No return value.
  */
 void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk,
-				void __iomem **idlest_reg, u8 *idlest_bit,
+				struct clk_omap_reg *idlest_reg, u8 *idlest_bit,
 				u8 *idlest_val)
 {
-	u32 r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
+
+	idlest_reg->offset &= ~0xf0;
+	idlest_reg->offset |= 0x20;
 
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
 	*idlest_bit = clk->enable_bit;
 
 	/*
@@ -222,31 +224,19 @@ int omap2_dflt_clk_enable(struct clk_hw *hw)
 		}
 	}
 
-	if (IS_ERR(clk->enable_reg)) {
-		pr_err("%s: %s missing enable_reg\n", __func__,
-		       clk_hw_get_name(hw));
-		ret = -EINVAL;
-		goto err;
-	}
-
 	/* FIXME should not have INVERT_ENABLE bit here */
-	v = ti_clk_ll_ops->clk_readl(clk->enable_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->enable_reg);
 	if (clk->flags & INVERT_ENABLE)
 		v &= ~(1 << clk->enable_bit);
 	else
 		v |= (1 << clk->enable_bit);
-	ti_clk_ll_ops->clk_writel(v, clk->enable_reg);
-	v = ti_clk_ll_ops->clk_readl(clk->enable_reg); /* OCP barrier */
+	ti_clk_ll_ops->clk_writel(v, &clk->enable_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->enable_reg); /* OCP barrier */
 
 	if (clk->ops && clk->ops->find_idlest)
 		_omap2_module_wait_ready(clk);
 
 	return 0;
-
-err:
-	if (clkdm_control && clk->clkdm)
-		ti_clk_ll_ops->clkdm_clk_disable(clk->clkdm, hw->clk);
-	return ret;
 }
 
 /**
@@ -264,22 +254,13 @@ void omap2_dflt_clk_disable(struct clk_hw *hw)
 	u32 v;
 
 	clk = to_clk_hw_omap(hw);
-	if (IS_ERR(clk->enable_reg)) {
-		/*
-		 * 'independent' here refers to a clock which is not
-		 * controlled by its parent.
-		 */
-		pr_err("%s: independent clock %s has no enable_reg\n",
-		       __func__, clk_hw_get_name(hw));
-		return;
-	}
 
-	v = ti_clk_ll_ops->clk_readl(clk->enable_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->enable_reg);
 	if (clk->flags & INVERT_ENABLE)
 		v |= (1 << clk->enable_bit);
 	else
 		v &= ~(1 << clk->enable_bit);
-	ti_clk_ll_ops->clk_writel(v, clk->enable_reg);
+	ti_clk_ll_ops->clk_writel(v, &clk->enable_reg);
 	/* No OCP barrier needed here since it is a disable operation */
 
 	if (!(ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) &&
@@ -300,7 +281,7 @@ int omap2_dflt_clk_is_enabled(struct clk_hw *hw)
 	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
 	u32 v;
 
-	v = ti_clk_ll_ops->clk_readl(clk->enable_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->enable_reg);
 
 	if (clk->flags & INVERT_ENABLE)
 		v ^= BIT(clk->enable_bit);
diff --git a/drivers/clk/ti/clkt_dpll.c b/drivers/clk/ti/clkt_dpll.c
index b919fdfe8256..ce98da2c10be 100644
--- a/drivers/clk/ti/clkt_dpll.c
+++ b/drivers/clk/ti/clkt_dpll.c
@@ -213,7 +213,7 @@ u8 omap2_init_dpll_parent(struct clk_hw *hw)
 	if (!dd)
 		return -EINVAL;
 
-	v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 	v &= dd->enable_mask;
 	v >>= __ffs(dd->enable_mask);
 
@@ -249,14 +249,14 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
 		return 0;
 
 	/* Return bypass rate if DPLL is bypassed */
-	v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 	v &= dd->enable_mask;
 	v >>= __ffs(dd->enable_mask);
 
 	if (_omap2_dpll_is_in_bypass(v))
 		return clk_hw_get_rate(dd->clk_bypass);
 
-	v = ti_clk_ll_ops->clk_readl(dd->mult_div1_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
 	dpll_mult = v & dd->mult_mask;
 	dpll_mult >>= __ffs(dd->mult_mask);
 	dpll_div = v & dd->div1_mask;
diff --git a/drivers/clk/ti/clkt_iclk.c b/drivers/clk/ti/clkt_iclk.c
index 38c36908cf88..60b583d7db33 100644
--- a/drivers/clk/ti/clkt_iclk.c
+++ b/drivers/clk/ti/clkt_iclk.c
@@ -31,28 +31,29 @@
 void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk)
 {
 	u32 v;
-	void __iomem *r;
+	struct clk_omap_reg r;
 
-	r = (__force void __iomem *)
-		((__force u32)clk->enable_reg ^ (CM_AUTOIDLE ^ CM_ICLKEN));
+	memcpy(&r, &clk->enable_reg, sizeof(r));
+	r.offset ^= (CM_AUTOIDLE ^ CM_ICLKEN);
 
-	v = ti_clk_ll_ops->clk_readl(r);
+	v = ti_clk_ll_ops->clk_readl(&r);
 	v |= (1 << clk->enable_bit);
-	ti_clk_ll_ops->clk_writel(v, r);
+	ti_clk_ll_ops->clk_writel(v, &r);
 }
 
 /* XXX */
 void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk)
 {
 	u32 v;
-	void __iomem *r;
+	struct clk_omap_reg r;
 
-	r = (__force void __iomem *)
-		((__force u32)clk->enable_reg ^ (CM_AUTOIDLE ^ CM_ICLKEN));
+	memcpy(&r, &clk->enable_reg, sizeof(r));
 
-	v = ti_clk_ll_ops->clk_readl(r);
+	r.offset ^= (CM_AUTOIDLE ^ CM_ICLKEN);
+
+	v = ti_clk_ll_ops->clk_readl(&r);
 	v &= ~(1 << clk->enable_bit);
-	ti_clk_ll_ops->clk_writel(v, r);
+	ti_clk_ll_ops->clk_writel(v, &r);
 }
 
 /**
@@ -68,14 +69,12 @@ void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk)
  * modules.  No return value.
  */
 static void omap2430_clk_i2chs_find_idlest(struct clk_hw_omap *clk,
-					   void __iomem **idlest_reg,
+					   struct clk_omap_reg *idlest_reg,
 					   u8 *idlest_bit,
 					   u8 *idlest_val)
 {
-	u32 r;
-
-	r = ((__force u32)clk->enable_reg ^ (OMAP24XX_CM_FCLKEN2 ^ CM_IDLEST));
-	*idlest_reg = (__force void __iomem *)r;
+	memcpy(idlest_reg, &clk->enable_reg, sizeof(*idlest_reg));
+	idlest_reg->offset ^= (OMAP24XX_CM_FCLKEN2 ^ CM_IDLEST);
 	*idlest_bit = clk->enable_bit;
 	*idlest_val = OMAP24XX_CM_IDLEST_VAL;
 }
diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index 437ea768f837..3f7b26540be8 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -18,7 +18,7 @@
 
 struct clk_omap_divider {
 	struct clk_hw		hw;
-	void __iomem		*reg;
+	struct clk_omap_reg	reg;
 	u8			shift;
 	u8			width;
 	u8			flags;
@@ -29,7 +29,7 @@ struct clk_omap_divider {
 
 struct clk_omap_mux {
 	struct clk_hw		hw;
-	void __iomem		*reg;
+	struct clk_omap_reg	reg;
 	u32			*table;
 	u32			mask;
 	u8			shift;
@@ -228,7 +228,8 @@ void ti_clk_patch_legacy_clks(struct ti_clk **patch);
 struct clk *ti_clk_register_clk(struct ti_clk *setup);
 int ti_clk_register_legacy_clks(struct ti_clk_alias *clks);
 
-void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index);
+int ti_clk_get_reg_addr(struct device_node *node, int index,
+			struct clk_omap_reg *reg);
 void ti_dt_clocks_register(struct ti_dt_clk *oclks);
 int ti_clk_retry_init(struct device_node *node, struct clk_hw *hw,
 		      ti_of_clk_init_cb_t func);
@@ -263,10 +264,10 @@ int omap2_dflt_clk_enable(struct clk_hw *hw);
 void omap2_dflt_clk_disable(struct clk_hw *hw);
 int omap2_dflt_clk_is_enabled(struct clk_hw *hw);
 void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk,
-				   void __iomem **other_reg,
+				   struct clk_omap_reg *other_reg,
 				   u8 *other_bit);
 void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk,
-				void __iomem **idlest_reg,
+				struct clk_omap_reg *idlest_reg,
 				u8 *idlest_bit, u8 *idlest_val);
 
 void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk);
diff --git a/drivers/clk/ti/clockdomain.c b/drivers/clk/ti/clockdomain.c
index 704157d8c0b7..fbedc6a9fed0 100644
--- a/drivers/clk/ti/clockdomain.c
+++ b/drivers/clk/ti/clockdomain.c
@@ -52,10 +52,6 @@ int omap2_clkops_enable_clkdm(struct clk_hw *hw)
 		return -EINVAL;
 	}
 
-	if (unlikely(clk->enable_reg))
-		pr_err("%s: %s: should use dflt_clk_enable ?!\n", __func__,
-		       clk_hw_get_name(hw));
-
 	if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) {
 		pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n",
 		       __func__, clk_hw_get_name(hw));
@@ -90,10 +86,6 @@ void omap2_clkops_disable_clkdm(struct clk_hw *hw)
 		return;
 	}
 
-	if (unlikely(clk->enable_reg))
-		pr_err("%s: %s: should use dflt_clk_disable ?!\n", __func__,
-		       clk_hw_get_name(hw));
-
 	if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) {
 		pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n",
 		       __func__, clk_hw_get_name(hw));
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index 1cc0242d823f..d6dcb283b72b 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -100,7 +100,7 @@ static unsigned long ti_clk_divider_recalc_rate(struct clk_hw *hw,
 	struct clk_omap_divider *divider = to_clk_omap_divider(hw);
 	unsigned int div, val;
 
-	val = ti_clk_ll_ops->clk_readl(divider->reg) >> divider->shift;
+	val = ti_clk_ll_ops->clk_readl(&divider->reg) >> divider->shift;
 	val &= div_mask(divider);
 
 	div = _get_div(divider, val);
@@ -257,11 +257,11 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (divider->flags & CLK_DIVIDER_HIWORD_MASK) {
 		val = div_mask(divider) << (divider->shift + 16);
 	} else {
-		val = ti_clk_ll_ops->clk_readl(divider->reg);
+		val = ti_clk_ll_ops->clk_readl(&divider->reg);
 		val &= ~(div_mask(divider) << divider->shift);
 	}
 	val |= value << divider->shift;
-	ti_clk_ll_ops->clk_writel(val, divider->reg);
+	ti_clk_ll_ops->clk_writel(val, &divider->reg);
 
 	return 0;
 }
@@ -274,7 +274,8 @@ const struct clk_ops ti_clk_divider_ops = {
 
 static struct clk *_register_divider(struct device *dev, const char *name,
 				     const char *parent_name,
-				     unsigned long flags, void __iomem *reg,
+				     unsigned long flags,
+				     struct clk_omap_reg *reg,
 				     u8 shift, u8 width, u8 clk_divider_flags,
 				     const struct clk_div_table *table)
 {
@@ -303,7 +304,7 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 	init.num_parents = (parent_name ? 1 : 0);
 
 	/* struct clk_divider assignments */
-	div->reg = reg;
+	memcpy(&div->reg, reg, sizeof(*reg));
 	div->shift = shift;
 	div->width = width;
 	div->flags = clk_divider_flags;
@@ -561,14 +562,15 @@ static int _get_divider_width(struct device_node *node,
 }
 
 static int __init ti_clk_divider_populate(struct device_node *node,
-	void __iomem **reg, const struct clk_div_table **table,
+	struct clk_omap_reg *reg, const struct clk_div_table **table,
 	u32 *flags, u8 *div_flags, u8 *width, u8 *shift)
 {
 	u32 val;
+	int ret;
 
-	*reg = ti_clk_get_reg_addr(node, 0);
-	if (IS_ERR(*reg))
-		return PTR_ERR(*reg);
+	ret = ti_clk_get_reg_addr(node, 0, reg);
+	if (ret)
+		return ret;
 
 	if (!of_property_read_u32(node, "ti,bit-shift", &val))
 		*shift = val;
@@ -607,7 +609,7 @@ static void __init of_ti_divider_clk_setup(struct device_node *node)
 {
 	struct clk *clk;
 	const char *parent_name;
-	void __iomem *reg;
+	struct clk_omap_reg reg;
 	u8 clk_divider_flags = 0;
 	u8 width = 0;
 	u8 shift = 0;
@@ -620,7 +622,7 @@ static void __init of_ti_divider_clk_setup(struct device_node *node)
 				    &clk_divider_flags, &width, &shift))
 		goto cleanup;
 
-	clk = _register_divider(NULL, node->name, parent_name, flags, reg,
+	clk = _register_divider(NULL, node->name, parent_name, flags, &reg,
 				shift, width, clk_divider_flags, table);
 
 	if (!IS_ERR(clk)) {
diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index 778bc90955b9..96d84888c6c5 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -203,17 +203,10 @@ cleanup:
 }
 
 #if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_ATAGS)
-static void __iomem *_get_reg(u8 module, u16 offset)
+void _get_reg(u8 module, u16 offset, struct clk_omap_reg *reg)
 {
-	u32 reg;
-	struct clk_omap_reg *reg_setup;
-
-	reg_setup = (struct clk_omap_reg *)&reg;
-
-	reg_setup->index = module;
-	reg_setup->offset = offset;
-
-	return (void __iomem *)reg;
+	reg->index = module;
+	reg->offset = offset;
 }
 
 struct clk *ti_clk_register_dpll(struct ti_clk *setup)
@@ -255,10 +248,10 @@ struct clk *ti_clk_register_dpll(struct ti_clk *setup)
 	init.num_parents = dpll->num_parents;
 	init.parent_names = dpll->parents;
 
-	dd->control_reg = _get_reg(dpll->module, dpll->control_reg);
-	dd->idlest_reg = _get_reg(dpll->module, dpll->idlest_reg);
-	dd->mult_div1_reg = _get_reg(dpll->module, dpll->mult_div1_reg);
-	dd->autoidle_reg = _get_reg(dpll->module, dpll->autoidle_reg);
+	_get_reg(dpll->module, dpll->control_reg, &dd->control_reg);
+	_get_reg(dpll->module, dpll->idlest_reg, &dd->idlest_reg);
+	_get_reg(dpll->module, dpll->mult_div1_reg, &dd->mult_div1_reg);
+	_get_reg(dpll->module, dpll->autoidle_reg, &dd->autoidle_reg);
 
 	dd->modes = dpll->modes;
 	dd->div1_mask = dpll->div1_mask;
@@ -344,12 +337,9 @@ static void _register_dpll_x2(struct device_node *node,
 		ret = of_property_count_elems_of_size(node, "reg", 1);
 		if (ret <= 0) {
 			hw_ops = NULL;
-		} else {
-			clk_hw->clksel_reg = ti_clk_get_reg_addr(node, 0);
-			if (IS_ERR(clk_hw->clksel_reg)) {
-				kfree(clk_hw);
-				return;
-			}
+		} else if (ti_clk_get_reg_addr(node, 0, &clk_hw->clksel_reg)) {
+			kfree(clk_hw);
+			return;
 		}
 	}
 
@@ -412,7 +402,8 @@ static void __init of_ti_dpll_setup(struct device_node *node,
 
 	init->parent_names = parent_names;
 
-	dd->control_reg = ti_clk_get_reg_addr(node, 0);
+	if (ti_clk_get_reg_addr(node, 0, &dd->control_reg))
+		goto cleanup;
 
 	/*
 	 * Special case for OMAP2 DPLL, register order is different due to
@@ -420,25 +411,22 @@ static void __init of_ti_dpll_setup(struct device_node *node,
 	 * missing idlest_mask.
 	 */
 	if (!dd->idlest_mask) {
-		dd->mult_div1_reg = ti_clk_get_reg_addr(node, 1);
+		if (ti_clk_get_reg_addr(node, 1, &dd->mult_div1_reg))
+			goto cleanup;
 #ifdef CONFIG_ARCH_OMAP2
 		clk_hw->ops = &clkhwops_omap2xxx_dpll;
 		omap2xxx_clkt_dpllcore_init(&clk_hw->hw);
 #endif
 	} else {
-		dd->idlest_reg = ti_clk_get_reg_addr(node, 1);
-		if (IS_ERR(dd->idlest_reg))
+		if (ti_clk_get_reg_addr(node, 1, &dd->idlest_reg))
 			goto cleanup;
 
-		dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2);
+		if (ti_clk_get_reg_addr(node, 2, &dd->mult_div1_reg))
+			goto cleanup;
 	}
 
-	if (IS_ERR(dd->control_reg) || IS_ERR(dd->mult_div1_reg))
-		goto cleanup;
-
 	if (dd->autoidle_mask) {
-		dd->autoidle_reg = ti_clk_get_reg_addr(node, 3);
-		if (IS_ERR(dd->autoidle_reg))
+		if (ti_clk_get_reg_addr(node, 3, &dd->autoidle_reg))
 			goto cleanup;
 	}
 
diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c
index 4cdd28a25584..4534de2ef455 100644
--- a/drivers/clk/ti/dpll3xxx.c
+++ b/drivers/clk/ti/dpll3xxx.c
@@ -54,10 +54,10 @@ static void _omap3_dpll_write_clken(struct clk_hw_omap *clk, u8 clken_bits)
 
 	dd = clk->dpll_data;
 
-	v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 	v &= ~dd->enable_mask;
 	v |= clken_bits << __ffs(dd->enable_mask);
-	ti_clk_ll_ops->clk_writel(v, dd->control_reg);
+	ti_clk_ll_ops->clk_writel(v, &dd->control_reg);
 }
 
 /* _omap3_wait_dpll_status: wait for a DPLL to enter a specific state */
@@ -73,7 +73,7 @@ static int _omap3_wait_dpll_status(struct clk_hw_omap *clk, u8 state)
 
 	state <<= __ffs(dd->idlest_mask);
 
-	while (((ti_clk_ll_ops->clk_readl(dd->idlest_reg) & dd->idlest_mask)
+	while (((ti_clk_ll_ops->clk_readl(&dd->idlest_reg) & dd->idlest_mask)
 		!= state) && i < MAX_DPLL_WAIT_TRIES) {
 		i++;
 		udelay(1);
@@ -151,7 +151,7 @@ static int _omap3_noncore_dpll_lock(struct clk_hw_omap *clk)
 	state <<= __ffs(dd->idlest_mask);
 
 	/* Check if already locked */
-	if ((ti_clk_ll_ops->clk_readl(dd->idlest_reg) & dd->idlest_mask) ==
+	if ((ti_clk_ll_ops->clk_readl(&dd->idlest_reg) & dd->idlest_mask) ==
 	    state)
 		goto done;
 
@@ -317,14 +317,14 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel)
 	 * only since freqsel field is no longer present on other devices.
 	 */
 	if (ti_clk_get_features()->flags & TI_CLK_DPLL_HAS_FREQSEL) {
-		v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+		v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 		v &= ~dd->freqsel_mask;
 		v |= freqsel << __ffs(dd->freqsel_mask);
-		ti_clk_ll_ops->clk_writel(v, dd->control_reg);
+		ti_clk_ll_ops->clk_writel(v, &dd->control_reg);
 	}
 
 	/* Set DPLL multiplier, divider */
-	v = ti_clk_ll_ops->clk_readl(dd->mult_div1_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
 
 	/* Handle Duty Cycle Correction */
 	if (dd->dcc_mask) {
@@ -370,11 +370,11 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel)
 		}
 	}
 
-	ti_clk_ll_ops->clk_writel(v, dd->mult_div1_reg);
+	ti_clk_ll_ops->clk_writel(v, &dd->mult_div1_reg);
 
 	/* Set 4X multiplier and low-power mode */
 	if (dd->m4xen_mask || dd->lpmode_mask) {
-		v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+		v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 
 		if (dd->m4xen_mask) {
 			if (dd->last_rounded_m4xen)
@@ -390,7 +390,7 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel)
 				v &= ~dd->lpmode_mask;
 		}
 
-		ti_clk_ll_ops->clk_writel(v, dd->control_reg);
+		ti_clk_ll_ops->clk_writel(v, &dd->control_reg);
 	}
 
 	/* We let the clock framework set the other output dividers later */
@@ -652,10 +652,10 @@ static u32 omap3_dpll_autoidle_read(struct clk_hw_omap *clk)
 
 	dd = clk->dpll_data;
 
-	if (!dd->autoidle_reg)
+	if (!dd->autoidle_mask)
 		return -EINVAL;
 
-	v = ti_clk_ll_ops->clk_readl(dd->autoidle_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->autoidle_reg);
 	v &= dd->autoidle_mask;
 	v >>= __ffs(dd->autoidle_mask);
 
@@ -681,7 +681,7 @@ static void omap3_dpll_allow_idle(struct clk_hw_omap *clk)
 
 	dd = clk->dpll_data;
 
-	if (!dd->autoidle_reg)
+	if (!dd->autoidle_mask)
 		return;
 
 	/*
@@ -689,10 +689,10 @@ static void omap3_dpll_allow_idle(struct clk_hw_omap *clk)
 	 * by writing 0x5 instead of 0x1.  Add some mechanism to
 	 * optionally enter this mode.
 	 */
-	v = ti_clk_ll_ops->clk_readl(dd->autoidle_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->autoidle_reg);
 	v &= ~dd->autoidle_mask;
 	v |= DPLL_AUTOIDLE_LOW_POWER_STOP << __ffs(dd->autoidle_mask);
-	ti_clk_ll_ops->clk_writel(v, dd->autoidle_reg);
+	ti_clk_ll_ops->clk_writel(v, &dd->autoidle_reg);
 }
 
 /**
@@ -711,13 +711,13 @@ static void omap3_dpll_deny_idle(struct clk_hw_omap *clk)
 
 	dd = clk->dpll_data;
 
-	if (!dd->autoidle_reg)
+	if (!dd->autoidle_mask)
 		return;
 
-	v = ti_clk_ll_ops->clk_readl(dd->autoidle_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->autoidle_reg);
 	v &= ~dd->autoidle_mask;
 	v |= DPLL_AUTOIDLE_DISABLE << __ffs(dd->autoidle_mask);
-	ti_clk_ll_ops->clk_writel(v, dd->autoidle_reg);
+	ti_clk_ll_ops->clk_writel(v, &dd->autoidle_reg);
 }
 
 /* Clock control for DPLL outputs */
@@ -773,7 +773,7 @@ unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw,
 
 	WARN_ON(!dd->enable_mask);
 
-	v = ti_clk_ll_ops->clk_readl(dd->control_reg) & dd->enable_mask;
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg) & dd->enable_mask;
 	v >>= __ffs(dd->enable_mask);
 	if ((v != OMAP3XXX_EN_DPLL_LOCKED) || (dd->flags & DPLL_J_TYPE))
 		rate = parent_rate;
diff --git a/drivers/clk/ti/dpll44xx.c b/drivers/clk/ti/dpll44xx.c
index 82c05b55a7be..d7a3f7ec8d77 100644
--- a/drivers/clk/ti/dpll44xx.c
+++ b/drivers/clk/ti/dpll44xx.c
@@ -42,17 +42,17 @@ static void omap4_dpllmx_allow_gatectrl(struct clk_hw_omap *clk)
 	u32 v;
 	u32 mask;
 
-	if (!clk || !clk->clksel_reg)
+	if (!clk)
 		return;
 
 	mask = clk->flags & CLOCK_CLKOUTX2 ?
 			OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK :
 			OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK;
 
-	v = ti_clk_ll_ops->clk_readl(clk->clksel_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->clksel_reg);
 	/* Clear the bit to allow gatectrl */
 	v &= ~mask;
-	ti_clk_ll_ops->clk_writel(v, clk->clksel_reg);
+	ti_clk_ll_ops->clk_writel(v, &clk->clksel_reg);
 }
 
 static void omap4_dpllmx_deny_gatectrl(struct clk_hw_omap *clk)
@@ -60,17 +60,17 @@ static void omap4_dpllmx_deny_gatectrl(struct clk_hw_omap *clk)
 	u32 v;
 	u32 mask;
 
-	if (!clk || !clk->clksel_reg)
+	if (!clk)
 		return;
 
 	mask = clk->flags & CLOCK_CLKOUTX2 ?
 			OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK :
 			OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK;
 
-	v = ti_clk_ll_ops->clk_readl(clk->clksel_reg);
+	v = ti_clk_ll_ops->clk_readl(&clk->clksel_reg);
 	/* Set the bit to deny gatectrl */
 	v |= mask;
-	ti_clk_ll_ops->clk_writel(v, clk->clksel_reg);
+	ti_clk_ll_ops->clk_writel(v, &clk->clksel_reg);
 }
 
 const struct clk_hw_omap_ops clkhwops_omap4_dpllmx = {
@@ -128,7 +128,7 @@ unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw,
 	rate = omap2_get_dpll_rate(clk);
 
 	/* regm4xen adds a multiplier of 4 to DPLL calculations */
-	v = ti_clk_ll_ops->clk_readl(dd->control_reg);
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
 	if (v & OMAP4430_DPLL_REGM4XEN_MASK)
 		rate *= OMAP4430_REGM4XEN_MULT;
 
diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index 77f0920e12f1..7151ec3a1b07 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c
@@ -76,15 +76,15 @@ static int omap36xx_gate_clk_enable_with_hsdiv_restore(struct clk_hw *hw)
 
 	/* Restore the dividers */
 	if (!ret) {
-		orig_v = ti_clk_ll_ops->clk_readl(parent->reg);
+		orig_v = ti_clk_ll_ops->clk_readl(&parent->reg);
 		dummy_v = orig_v;
 
 		/* Write any other value different from the Read value */
 		dummy_v ^= (1 << parent->shift);
-		ti_clk_ll_ops->clk_writel(dummy_v, parent->reg);
+		ti_clk_ll_ops->clk_writel(dummy_v, &parent->reg);
 
 		/* Write the original divider */
-		ti_clk_ll_ops->clk_writel(orig_v, parent->reg);
+		ti_clk_ll_ops->clk_writel(orig_v, &parent->reg);
 	}
 
 	return ret;
@@ -92,7 +92,7 @@ static int omap36xx_gate_clk_enable_with_hsdiv_restore(struct clk_hw *hw)
 
 static struct clk *_register_gate(struct device *dev, const char *name,
 				  const char *parent_name, unsigned long flags,
-				  void __iomem *reg, u8 bit_idx,
+				  struct clk_omap_reg *reg, u8 bit_idx,
 				  u8 clk_gate_flags, const struct clk_ops *ops,
 				  const struct clk_hw_omap_ops *hw_ops)
 {
@@ -109,7 +109,7 @@ static struct clk *_register_gate(struct device *dev, const char *name,
 	init.name = name;
 	init.ops = ops;
 
-	clk_hw->enable_reg = reg;
+	memcpy(&clk_hw->enable_reg, reg, sizeof(*reg));
 	clk_hw->enable_bit = bit_idx;
 	clk_hw->ops = hw_ops;
 
@@ -133,8 +133,7 @@ struct clk *ti_clk_register_gate(struct ti_clk *setup)
 {
 	const struct clk_ops *ops = &omap_gate_clk_ops;
 	const struct clk_hw_omap_ops *hw_ops = NULL;
-	u32 reg;
-	struct clk_omap_reg *reg_setup;
+	struct clk_omap_reg reg;
 	u32 flags = 0;
 	u8 clk_gate_flags = 0;
 	struct ti_clk_gate *gate;
@@ -144,8 +143,6 @@ struct clk *ti_clk_register_gate(struct ti_clk *setup)
 	if (gate->flags & CLKF_INTERFACE)
 		return ti_clk_register_interface(setup);
 
-	reg_setup = (struct clk_omap_reg *)&reg;
-
 	if (gate->flags & CLKF_SET_RATE_PARENT)
 		flags |= CLK_SET_RATE_PARENT;
 
@@ -169,11 +166,12 @@ struct clk *ti_clk_register_gate(struct ti_clk *setup)
 	if (gate->flags & CLKF_AM35XX)
 		hw_ops = &clkhwops_am35xx_ipss_module_wait;
 
-	reg_setup->index = gate->module;
-	reg_setup->offset = gate->reg;
+	reg.index = gate->module;
+	reg.offset = gate->reg;
+	reg.ptr = NULL;
 
 	return _register_gate(NULL, setup->name, gate->parent, flags,
-			      (void __iomem *)reg, gate->bit_shift,
+			      &reg, gate->bit_shift,
 			      clk_gate_flags, ops, hw_ops);
 }
 
@@ -214,15 +212,14 @@ static void __init _of_ti_gate_clk_setup(struct device_node *node,
 {
 	struct clk *clk;
 	const char *parent_name;
-	void __iomem *reg = NULL;
+	struct clk_omap_reg reg;
 	u8 enable_bit = 0;
 	u32 val;
 	u32 flags = 0;
 	u8 clk_gate_flags = 0;
 
 	if (ops != &omap_gate_clkdm_clk_ops) {
-		reg = ti_clk_get_reg_addr(node, 0);
-		if (IS_ERR(reg))
+		if (ti_clk_get_reg_addr(node, 0, &reg))
 			return;
 
 		if (!of_property_read_u32(node, "ti,bit-shift", &val))
@@ -242,7 +239,7 @@ static void __init _of_ti_gate_clk_setup(struct device_node *node,
 	if (of_property_read_bool(node, "ti,set-bit-to-disable"))
 		clk_gate_flags |= INVERT_ENABLE;
 
-	clk = _register_gate(NULL, node->name, parent_name, flags, reg,
+	clk = _register_gate(NULL, node->name, parent_name, flags, &reg,
 			     enable_bit, clk_gate_flags, ops, hw_ops);
 
 	if (!IS_ERR(clk))
@@ -260,8 +257,7 @@ _of_ti_composite_gate_clk_setup(struct device_node *node,
 	if (!gate)
 		return;
 
-	gate->enable_reg = ti_clk_get_reg_addr(node, 0);
-	if (IS_ERR(gate->enable_reg))
+	if (ti_clk_get_reg_addr(node, 0, &gate->enable_reg))
 		goto cleanup;
 
 	of_property_read_u32(node, "ti,bit-shift", &val);
diff --git a/drivers/clk/ti/interface.c b/drivers/clk/ti/interface.c
index 42d9fd4f5f6a..62cf50c1e1e3 100644
--- a/drivers/clk/ti/interface.c
+++ b/drivers/clk/ti/interface.c
@@ -34,7 +34,7 @@ static const struct clk_ops ti_interface_clk_ops = {
 
 static struct clk *_register_interface(struct device *dev, const char *name,
 				       const char *parent_name,
-				       void __iomem *reg, u8 bit_idx,
+				       struct clk_omap_reg *reg, u8 bit_idx,
 				       const struct clk_hw_omap_ops *ops)
 {
 	struct clk_init_data init = { NULL };
@@ -47,7 +47,7 @@ static struct clk *_register_interface(struct device *dev, const char *name,
 
 	clk_hw->hw.init = &init;
 	clk_hw->ops = ops;
-	clk_hw->enable_reg = reg;
+	memcpy(&clk_hw->enable_reg, reg, sizeof(*reg));
 	clk_hw->enable_bit = bit_idx;
 
 	init.name = name;
@@ -71,14 +71,13 @@ static struct clk *_register_interface(struct device *dev, const char *name,
 struct clk *ti_clk_register_interface(struct ti_clk *setup)
 {
 	const struct clk_hw_omap_ops *ops = &clkhwops_iclk_wait;
-	u32 reg;
-	struct clk_omap_reg *reg_setup;
+	struct clk_omap_reg reg;
 	struct ti_clk_gate *gate;
 
 	gate = setup->data;
-	reg_setup = (struct clk_omap_reg *)&reg;
-	reg_setup->index = gate->module;
-	reg_setup->offset = gate->reg;
+	reg.index = gate->module;
+	reg.offset = gate->reg;
+	reg.ptr = NULL;
 
 	if (gate->flags & CLKF_NO_WAIT)
 		ops = &clkhwops_iclk;
@@ -96,7 +95,7 @@ struct clk *ti_clk_register_interface(struct ti_clk *setup)
 		ops = &clkhwops_am35xx_ipss_wait;
 
 	return _register_interface(NULL, setup->name, gate->parent,
-				   (void __iomem *)reg, gate->bit_shift, ops);
+				   &reg, gate->bit_shift, ops);
 }
 #endif
 
@@ -105,12 +104,11 @@ static void __init _of_ti_interface_clk_setup(struct device_node *node,
 {
 	struct clk *clk;
 	const char *parent_name;
-	void __iomem *reg;
+	struct clk_omap_reg reg;
 	u8 enable_bit = 0;
 	u32 val;
 
-	reg = ti_clk_get_reg_addr(node, 0);
-	if (IS_ERR(reg))
+	if (ti_clk_get_reg_addr(node, 0, &reg))
 		return;
 
 	if (!of_property_read_u32(node, "ti,bit-shift", &val))
@@ -122,7 +120,7 @@ static void __init _of_ti_interface_clk_setup(struct device_node *node,
 		return;
 	}
 
-	clk = _register_interface(NULL, node->name, parent_name, reg,
+	clk = _register_interface(NULL, node->name, parent_name, &reg,
 				  enable_bit, ops);
 
 	if (!IS_ERR(clk))
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index daa2dee6bafe..18c267b38461 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -39,7 +39,7 @@ static u8 ti_clk_mux_get_parent(struct clk_hw *hw)
 	 * OTOH, pmd_trace_clk_mux_ck uses a separate bit for each clock, so
 	 * val = 0x4 really means "bit 2, index starts at bit 0"
 	 */
-	val = ti_clk_ll_ops->clk_readl(mux->reg) >> mux->shift;
+	val = ti_clk_ll_ops->clk_readl(&mux->reg) >> mux->shift;
 	val &= mux->mask;
 
 	if (mux->table) {
@@ -81,11 +81,11 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 	if (mux->flags & CLK_MUX_HIWORD_MASK) {
 		val = mux->mask << (mux->shift + 16);
 	} else {
-		val = ti_clk_ll_ops->clk_readl(mux->reg);
+		val = ti_clk_ll_ops->clk_readl(&mux->reg);
 		val &= ~(mux->mask << mux->shift);
 	}
 	val |= index << mux->shift;
-	ti_clk_ll_ops->clk_writel(val, mux->reg);
+	ti_clk_ll_ops->clk_writel(val, &mux->reg);
 
 	return 0;
 }
@@ -99,7 +99,7 @@ const struct clk_ops ti_clk_mux_ops = {
 static struct clk *_register_mux(struct device *dev, const char *name,
 				 const char * const *parent_names,
 				 u8 num_parents, unsigned long flags,
-				 void __iomem *reg, u8 shift, u32 mask,
+				 struct clk_omap_reg *reg, u8 shift, u32 mask,
 				 u8 clk_mux_flags, u32 *table)
 {
 	struct clk_omap_mux *mux;
@@ -120,7 +120,7 @@ static struct clk *_register_mux(struct device *dev, const char *name,
 	init.num_parents = num_parents;
 
 	/* struct clk_mux assignments */
-	mux->reg = reg;
+	memcpy(&mux->reg, reg, sizeof(*reg));
 	mux->shift = shift;
 	mux->mask = mask;
 	mux->flags = clk_mux_flags;
@@ -140,12 +140,9 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
 	struct ti_clk_mux *mux;
 	u32 flags;
 	u8 mux_flags = 0;
-	struct clk_omap_reg *reg_setup;
-	u32 reg;
+	struct clk_omap_reg reg;
 	u32 mask;
 
-	reg_setup = (struct clk_omap_reg *)&reg;
-
 	mux = setup->data;
 	flags = CLK_SET_RATE_NO_REPARENT;
 
@@ -154,8 +151,9 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
 		mask--;
 
 	mask = (1 << fls(mask)) - 1;
-	reg_setup->index = mux->module;
-	reg_setup->offset = mux->reg;
+	reg.index = mux->module;
+	reg.offset = mux->reg;
+	reg.ptr = NULL;
 
 	if (mux->flags & CLKF_INDEX_STARTS_AT_ONE)
 		mux_flags |= CLK_MUX_INDEX_ONE;
@@ -164,7 +162,7 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
 		flags |= CLK_SET_RATE_PARENT;
 
 	return _register_mux(NULL, setup->name, mux->parents, mux->num_parents,
-			     flags, (void __iomem *)reg, mux->bit_shift, mask,
+			     flags, &reg, mux->bit_shift, mask,
 			     mux_flags, NULL);
 }
 
@@ -177,7 +175,7 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
 static void of_mux_clk_setup(struct device_node *node)
 {
 	struct clk *clk;
-	void __iomem *reg;
+	struct clk_omap_reg reg;
 	unsigned int num_parents;
 	const char **parent_names;
 	u8 clk_mux_flags = 0;
@@ -196,9 +194,7 @@ static void of_mux_clk_setup(struct device_node *node)
 
 	of_clk_parent_fill(node, parent_names, num_parents);
 
-	reg = ti_clk_get_reg_addr(node, 0);
-
-	if (IS_ERR(reg))
+	if (ti_clk_get_reg_addr(node, 0, &reg))
 		goto cleanup;
 
 	of_property_read_u32(node, "ti,bit-shift", &shift);
@@ -217,7 +213,7 @@ static void of_mux_clk_setup(struct device_node *node)
 	mask = (1 << fls(mask)) - 1;
 
 	clk = _register_mux(NULL, node->name, parent_names, num_parents,
-			    flags, reg, shift, mask, clk_mux_flags, NULL);
+			    flags, &reg, shift, mask, clk_mux_flags, NULL);
 
 	if (!IS_ERR(clk))
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
@@ -230,7 +226,6 @@ CLK_OF_DECLARE(mux_clk, "ti,mux-clock", of_mux_clk_setup);
 struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup)
 {
 	struct clk_omap_mux *mux;
-	struct clk_omap_reg *reg;
 	int num_parents;
 
 	if (!setup)
@@ -240,12 +235,10 @@ struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup)
 	if (!mux)
 		return ERR_PTR(-ENOMEM);
 
-	reg = (struct clk_omap_reg *)&mux->reg;
-
 	mux->shift = setup->bit_shift;
 
-	reg->index = setup->module;
-	reg->offset = setup->reg;
+	mux->reg.index = setup->module;
+	mux->reg.offset = setup->reg;
 
 	if (setup->flags & CLKF_INDEX_STARTS_AT_ONE)
 		mux->flags |= CLK_MUX_INDEX_ONE;
@@ -268,9 +261,7 @@ static void __init of_ti_composite_mux_clk_setup(struct device_node *node)
 	if (!mux)
 		return;
 
-	mux->reg = ti_clk_get_reg_addr(node, 0);
-
-	if (IS_ERR(mux->reg))
+	if (ti_clk_get_reg_addr(node, 0, &mux->reg))
 		goto cleanup;
 
 	if (!of_property_read_u32(node, "ti,bit-shift", &val))
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index affdabd0b6a1..d18da839b810 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -18,6 +18,18 @@
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
 
+/**
+ * struct clk_omap_reg - OMAP register declaration
+ * @offset: offset from the master IP module base address
+ * @index: index of the master IP module
+ */
+struct clk_omap_reg {
+	void __iomem *ptr;
+	u16 offset;
+	u8 index;
+	u8 flags;
+};
+
 /**
  * struct dpll_data - DPLL registers and integration data
  * @mult_div1_reg: register containing the DPLL M and N bitfields
@@ -67,12 +79,12 @@
  * can be placed into read-only space.
  */
 struct dpll_data {
-	void __iomem		*mult_div1_reg;
+	struct clk_omap_reg	mult_div1_reg;
 	u32			mult_mask;
 	u32			div1_mask;
 	struct clk_hw		*clk_bypass;
 	struct clk_hw		*clk_ref;
-	void __iomem		*control_reg;
+	struct clk_omap_reg	control_reg;
 	u32			enable_mask;
 	unsigned long		last_rounded_rate;
 	u16			last_rounded_m;
@@ -84,8 +96,8 @@ struct dpll_data {
 	u16			max_divider;
 	unsigned long		max_rate;
 	u8			modes;
-	void __iomem		*autoidle_reg;
-	void __iomem		*idlest_reg;
+	struct clk_omap_reg	autoidle_reg;
+	struct clk_omap_reg	idlest_reg;
 	u32			autoidle_mask;
 	u32			freqsel_mask;
 	u32			idlest_mask;
@@ -113,10 +125,10 @@ struct clk_hw_omap;
  */
 struct clk_hw_omap_ops {
 	void	(*find_idlest)(struct clk_hw_omap *oclk,
-			       void __iomem **idlest_reg,
+			       struct clk_omap_reg *idlest_reg,
 			       u8 *idlest_bit, u8 *idlest_val);
 	void	(*find_companion)(struct clk_hw_omap *oclk,
-				  void __iomem **other_reg,
+				  struct clk_omap_reg *other_reg,
 				  u8 *other_bit);
 	void	(*allow_idle)(struct clk_hw_omap *oclk);
 	void	(*deny_idle)(struct clk_hw_omap *oclk);
@@ -139,10 +151,10 @@ struct clk_hw_omap {
 	struct list_head	node;
 	unsigned long		fixed_rate;
 	u8			fixed_div;
-	void __iomem		*enable_reg;
+	struct clk_omap_reg	enable_reg;
 	u8			enable_bit;
 	u8			flags;
-	void __iomem		*clksel_reg;
+	struct clk_omap_reg	clksel_reg;
 	struct dpll_data	*dpll_data;
 	const char		*clkdm_name;
 	struct clockdomain	*clkdm;
@@ -195,16 +207,6 @@ enum {
 	CLK_MAX_MEMMAPS
 };
 
-/**
- * struct clk_omap_reg - OMAP register declaration
- * @offset: offset from the master IP module base address
- * @index: index of the master IP module
- */
-struct clk_omap_reg {
-	u16 offset;
-	u16 index;
-};
-
 /**
  * struct ti_clk_ll_ops - low-level ops for clocks
  * @clk_readl: pointer to register read function
@@ -222,16 +224,16 @@ struct clk_omap_reg {
  * operations not provided directly by clock drivers.
  */
 struct ti_clk_ll_ops {
-	u32	(*clk_readl)(void __iomem *reg);
-	void	(*clk_writel)(u32 val, void __iomem *reg);
+	u32	(*clk_readl)(const struct clk_omap_reg *reg);
+	void	(*clk_writel)(u32 val, const struct clk_omap_reg *reg);
 	int	(*clkdm_clk_enable)(struct clockdomain *clkdm, struct clk *clk);
 	int	(*clkdm_clk_disable)(struct clockdomain *clkdm,
 				     struct clk *clk);
 	struct clockdomain * (*clkdm_lookup)(const char *name);
 	int	(*cm_wait_module_ready)(u8 part, s16 prcm_mod, u16 idlest_reg,
 					u8 idlest_shift);
-	int	(*cm_split_idlest_reg)(void __iomem *idlest_reg, s16 *prcm_inst,
-				       u8 *idlest_reg_id);
+	int	(*cm_split_idlest_reg)(struct clk_omap_reg *idlest_reg,
+				       s16 *prcm_inst, u8 *idlest_reg_id);
 };
 
 #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw)
-- 
cgit v1.2.3


From b1eaa950f7e905aaffca0454aa05101ce4f6446a Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 11 Jan 2017 12:07:46 +0200
Subject: ovl: lockdep annotate of nested stacked overlayfs inode lock

An overlayfs instance can be the lower layer of another overlayfs
instance. This setup triggers a lockdep splat of possible recursive
locking of sb->s_type->i_mutex_key in iterate_dir(). Trimmed snip:

 [ INFO: possible recursive locking detected ]
 bash/2468 is trying to acquire lock:
  &sb->s_type->i_mutex_key#14, at: iterate_dir+0x7d/0x15c
 but task is already holding lock:
  &sb->s_type->i_mutex_key#14, at: iterate_dir+0x7d/0x15c

One problem observed with this splat is that ovl_new_inode()
does not call lockdep_annotate_inode_mutex_key() to annotate
the dir inode lock as &sb->s_type->i_mutex_dir_key like other
fs do.

The other problem is that the 2 nested levels of overlayfs inode
lock are annotated using the same key, which is the cause of the
false positive lockdep warning.

Fix this by annotating overlayfs inode lock in ovl_fill_inode()
according to stack level of the super block instance and use
different key for dir vs. non-dir like other fs do.

Here is an edited snip from /proc/lockdep_chains after
iterate_dir() of nested overlayfs:

 [...] &ovl_i_mutex_dir_key[depth]   (stack_depth=2)
 [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
 [...] &type->i_mutex_dir_key        (stack_depth=0)

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index f8fe6bf2036d..17b8418358ed 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -303,6 +303,41 @@ static const struct inode_operations ovl_symlink_inode_operations = {
 	.update_time	= ovl_update_time,
 };
 
+/*
+ * It is possible to stack overlayfs instance on top of another
+ * overlayfs instance as lower layer. We need to annonate the
+ * stackable i_mutex locks according to stack level of the super
+ * block instance. An overlayfs instance can never be in stack
+ * depth 0 (there is always a real fs below it).  An overlayfs
+ * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
+ *
+ * For example, here is a snip from /proc/lockdep_chains after
+ * dir_iterate of nested overlayfs:
+ *
+ * [...] &ovl_i_mutex_dir_key[depth]   (stack_depth=2)
+ * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
+ * [...] &type->i_mutex_dir_key        (stack_depth=0)
+ */
+#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
+
+static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
+{
+#ifdef CONFIG_LOCKDEP
+	static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
+	static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
+
+	int depth = inode->i_sb->s_stack_depth - 1;
+
+	if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
+		depth = 0;
+
+	if (S_ISDIR(inode->i_mode))
+		lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
+	else
+		lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
+#endif
+}
+
 static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
 {
 	inode->i_ino = get_next_ino();
@@ -312,6 +347,8 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
 	inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
 #endif
 
+	ovl_lockdep_annotate_inode_mutex_key(inode);
+
 	switch (mode & S_IFMT) {
 	case S_IFREG:
 		inode->i_op = &ovl_file_inode_operations;
-- 
cgit v1.2.3


From cbe435a182d15d82a10a6b56a96406d957ceb35c Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 24 May 2016 16:56:22 +0200
Subject: mtd: nand: Get rid of the mtd parameter in all auto-detection
 functions

Now that struct nand_chip embeds an mtd_info object we can get rid of the
mtd parameter and extract it from the chip parameter with the nand_to_mtd()
helper.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/nand/nand_base.c | 54 ++++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b0524f8accb6..f7969e0d59fc 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3464,9 +3464,10 @@ static u16 onfi_crc16(u16 crc, u8 const *p, size_t len)
 }
 
 /* Parse the Extended Parameter Page. */
-static int nand_flash_detect_ext_param_page(struct mtd_info *mtd,
-		struct nand_chip *chip, struct nand_onfi_params *p)
+static int nand_flash_detect_ext_param_page(struct nand_chip *chip,
+					    struct nand_onfi_params *p)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct onfi_ext_param_page *ep;
 	struct onfi_ext_section *s;
 	struct onfi_ext_ecc_info *ecc;
@@ -3561,9 +3562,9 @@ static void nand_onfi_detect_micron(struct nand_chip *chip,
 /*
  * Check if the NAND chip is ONFI compliant, returns 1 if it is, 0 otherwise.
  */
-static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
-					int *busw)
+static int nand_flash_detect_onfi(struct nand_chip *chip, int *busw)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_onfi_params *p = &chip->onfi_params;
 	int i, j;
 	int val;
@@ -3653,7 +3654,7 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
 			chip->cmdfunc = nand_command_lp;
 
 		/* The Extended Parameter Page is supported since ONFI 2.1. */
-		if (nand_flash_detect_ext_param_page(mtd, chip, p))
+		if (nand_flash_detect_ext_param_page(chip, p))
 			pr_warn("Failed to detect ONFI extended param page\n");
 	} else {
 		pr_warn("Could not retrieve ONFI ECC requirements\n");
@@ -3668,9 +3669,9 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
 /*
  * Check if the NAND chip is JEDEC compliant, returns 1 if it is, 0 otherwise.
  */
-static int nand_flash_detect_jedec(struct mtd_info *mtd, struct nand_chip *chip,
-					int *busw)
+static int nand_flash_detect_jedec(struct nand_chip *chip, int *busw)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_jedec_params *p = &chip->jedec_params;
 	struct jedec_ecc_info *ecc;
 	int val;
@@ -3820,9 +3821,10 @@ static int nand_get_bits_per_cell(u8 cellinfo)
  * chip. The rest of the parameters must be decoded according to generic or
  * manufacturer-specific "extended ID" decoding patterns.
  */
-static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip,
-				u8 id_data[8], int *busw)
+static void nand_decode_ext_id(struct nand_chip *chip, u8 id_data[8],
+			       int *busw)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int extid, id_len;
 	/* The 3rd id byte holds MLC / multichip data */
 	chip->bits_per_cell = nand_get_bits_per_cell(id_data[2]);
@@ -3953,10 +3955,10 @@ static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip,
  * decodes a matching ID table entry and assigns the MTD size parameters for
  * the chip.
  */
-static void nand_decode_id(struct mtd_info *mtd, struct nand_chip *chip,
-				struct nand_flash_dev *type, u8 id_data[8],
-				int *busw)
+static void nand_decode_id(struct nand_chip *chip, struct nand_flash_dev *type,
+			   u8 id_data[8], int *busw)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int maf_id = id_data[0];
 
 	mtd->erasesize = type->erasesize;
@@ -3986,9 +3988,9 @@ static void nand_decode_id(struct mtd_info *mtd, struct nand_chip *chip,
  * heuristic patterns using various detected parameters (e.g., manufacturer,
  * page size, cell-type information).
  */
-static void nand_decode_bbm_options(struct mtd_info *mtd,
-				    struct nand_chip *chip, u8 id_data[8])
+static void nand_decode_bbm_options(struct nand_chip *chip, u8 id_data[8])
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int maf_id = id_data[0];
 
 	/* Set the bad block position */
@@ -4023,9 +4025,12 @@ static inline bool is_full_id_nand(struct nand_flash_dev *type)
 	return type->id_len;
 }
 
-static bool find_full_id_nand(struct mtd_info *mtd, struct nand_chip *chip,
-		   struct nand_flash_dev *type, u8 *id_data, int *busw)
+static bool find_full_id_nand(struct nand_chip *chip,
+			      struct nand_flash_dev *type, u8 *id_data,
+			      int *busw)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	if (!strncmp(type->id, id_data, type->id_len)) {
 		mtd->writesize = type->pagesize;
 		mtd->erasesize = type->erasesize;
@@ -4052,10 +4057,11 @@ static bool find_full_id_nand(struct mtd_info *mtd, struct nand_chip *chip,
 /*
  * Get the flash and manufacturer id and lookup if the type is supported.
  */
-static int nand_get_flash_type(struct mtd_info *mtd, struct nand_chip *chip,
+static int nand_get_flash_type(struct nand_chip *chip,
 			       int *maf_id, int *dev_id,
 			       struct nand_flash_dev *type)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int busw;
 	int i, maf_idx;
 	u8 id_data[8];
@@ -4100,7 +4106,7 @@ static int nand_get_flash_type(struct mtd_info *mtd, struct nand_chip *chip,
 
 	for (; type->name != NULL; type++) {
 		if (is_full_id_nand(type)) {
-			if (find_full_id_nand(mtd, chip, type, id_data, &busw))
+			if (find_full_id_nand(chip, type, id_data, &busw))
 				goto ident_done;
 		} else if (*dev_id == type->dev_id) {
 			break;
@@ -4110,11 +4116,11 @@ static int nand_get_flash_type(struct mtd_info *mtd, struct nand_chip *chip,
 	chip->onfi_version = 0;
 	if (!type->name || !type->pagesize) {
 		/* Check if the chip is ONFI compliant */
-		if (nand_flash_detect_onfi(mtd, chip, &busw))
+		if (nand_flash_detect_onfi(chip, &busw))
 			goto ident_done;
 
 		/* Check if the chip is JEDEC compliant */
-		if (nand_flash_detect_jedec(mtd, chip, &busw))
+		if (nand_flash_detect_jedec(chip, &busw))
 			goto ident_done;
 	}
 
@@ -4128,9 +4134,9 @@ static int nand_get_flash_type(struct mtd_info *mtd, struct nand_chip *chip,
 
 	if (!type->pagesize) {
 		/* Decode parameters from extended ID */
-		nand_decode_ext_id(mtd, chip, id_data, &busw);
+		nand_decode_ext_id(chip, id_data, &busw);
 	} else {
-		nand_decode_id(mtd, chip, type, id_data, &busw);
+		nand_decode_id(chip, type, id_data, &busw);
 	}
 	/* Get chip options */
 	chip->options |= type->options;
@@ -4167,7 +4173,7 @@ ident_done:
 		return -EINVAL;
 	}
 
-	nand_decode_bbm_options(mtd, chip, id_data);
+	nand_decode_bbm_options(chip, id_data);
 
 	/* Calculate the address shift from the page size */
 	chip->page_shift = ffs(mtd->writesize) - 1;
@@ -4394,7 +4400,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	nand_set_defaults(chip, chip->options & NAND_BUSWIDTH_16);
 
 	/* Read the flash type */
-	ret = nand_get_flash_type(mtd, chip, &nand_maf_id, &nand_dev_id, table);
+	ret = nand_get_flash_type(chip, &nand_maf_id, &nand_dev_id, table);
 	if (ret) {
 		if (!(chip->options & NAND_SCAN_SILENT_NODEV))
 			pr_warn("No NAND device found\n");
-- 
cgit v1.2.3


From 7f501f0a72036dc29ad9a53811474c393634b401 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 24 May 2016 19:20:05 +0200
Subject: mtd: nand: Store nand ID in struct nand_chip

Store the NAND ID in struct nand_chip to avoid passing id_data and id_len
as function parameters.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/nand/nand_base.c | 55 ++++++++++++++++++++++++--------------------
 include/linux/mtd/nand.h     | 13 +++++++++++
 2 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index f7969e0d59fc..6f3ae626cabc 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3821,18 +3821,16 @@ static int nand_get_bits_per_cell(u8 cellinfo)
  * chip. The rest of the parameters must be decoded according to generic or
  * manufacturer-specific "extended ID" decoding patterns.
  */
-static void nand_decode_ext_id(struct nand_chip *chip, u8 id_data[8],
-			       int *busw)
+static void nand_decode_ext_id(struct nand_chip *chip, int *busw)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
-	int extid, id_len;
+	int extid, id_len = chip->id.len;
+	u8 *id_data = chip->id.data;
 	/* The 3rd id byte holds MLC / multichip data */
 	chip->bits_per_cell = nand_get_bits_per_cell(id_data[2]);
 	/* The 4th id byte is the important one */
 	extid = id_data[3];
 
-	id_len = nand_id_len(id_data, 8);
-
 	/*
 	 * Field definitions are in the following datasheets:
 	 * Old style (4,5 byte ID): Samsung K9GAG08U0M (p.32)
@@ -3956,9 +3954,10 @@ static void nand_decode_ext_id(struct nand_chip *chip, u8 id_data[8],
  * the chip.
  */
 static void nand_decode_id(struct nand_chip *chip, struct nand_flash_dev *type,
-			   u8 id_data[8], int *busw)
+			   int *busw)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
+	u8 *id_data = chip->id.data;
 	int maf_id = id_data[0];
 
 	mtd->erasesize = type->erasesize;
@@ -3988,9 +3987,10 @@ static void nand_decode_id(struct nand_chip *chip, struct nand_flash_dev *type,
  * heuristic patterns using various detected parameters (e.g., manufacturer,
  * page size, cell-type information).
  */
-static void nand_decode_bbm_options(struct nand_chip *chip, u8 id_data[8])
+static void nand_decode_bbm_options(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
+	u8 *id_data = chip->id.data;
 	int maf_id = id_data[0];
 
 	/* Set the bad block position */
@@ -4026,10 +4026,10 @@ static inline bool is_full_id_nand(struct nand_flash_dev *type)
 }
 
 static bool find_full_id_nand(struct nand_chip *chip,
-			      struct nand_flash_dev *type, u8 *id_data,
-			      int *busw)
+			      struct nand_flash_dev *type, int *busw)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
+	u8 *id_data = chip->id.data;
 
 	if (!strncmp(type->id, id_data, type->id_len)) {
 		mtd->writesize = type->pagesize;
@@ -4058,13 +4058,13 @@ static bool find_full_id_nand(struct nand_chip *chip,
  * Get the flash and manufacturer id and lookup if the type is supported.
  */
 static int nand_get_flash_type(struct nand_chip *chip,
-			       int *maf_id, int *dev_id,
 			       struct nand_flash_dev *type)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int busw;
 	int i, maf_idx;
-	u8 id_data[8];
+	u8 *id_data = chip->id.data;
+	u8 maf_id, dev_id;
 
 	/*
 	 * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
@@ -4079,8 +4079,8 @@ static int nand_get_flash_type(struct nand_chip *chip,
 	chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 
 	/* Read manufacturer and device IDs */
-	*maf_id = chip->read_byte(mtd);
-	*dev_id = chip->read_byte(mtd);
+	maf_id = chip->read_byte(mtd);
+	dev_id = chip->read_byte(mtd);
 
 	/*
 	 * Try again to make sure, as some systems the bus-hold or other
@@ -4095,20 +4095,22 @@ static int nand_get_flash_type(struct nand_chip *chip,
 	for (i = 0; i < 8; i++)
 		id_data[i] = chip->read_byte(mtd);
 
-	if (id_data[0] != *maf_id || id_data[1] != *dev_id) {
+	if (id_data[0] != maf_id || id_data[1] != dev_id) {
 		pr_info("second ID read did not match %02x,%02x against %02x,%02x\n",
-			*maf_id, *dev_id, id_data[0], id_data[1]);
+			maf_id, dev_id, id_data[0], id_data[1]);
 		return -ENODEV;
 	}
 
+	chip->id.len = nand_id_len(id_data, 8);
+
 	if (!type)
 		type = nand_flash_ids;
 
 	for (; type->name != NULL; type++) {
 		if (is_full_id_nand(type)) {
-			if (find_full_id_nand(chip, type, id_data, &busw))
+			if (find_full_id_nand(chip, type, &busw))
 				goto ident_done;
-		} else if (*dev_id == type->dev_id) {
+		} else if (dev_id == type->dev_id) {
 			break;
 		}
 	}
@@ -4134,9 +4136,9 @@ static int nand_get_flash_type(struct nand_chip *chip,
 
 	if (!type->pagesize) {
 		/* Decode parameters from extended ID */
-		nand_decode_ext_id(chip, id_data, &busw);
+		nand_decode_ext_id(chip, &busw);
 	} else {
-		nand_decode_id(chip, type, id_data, &busw);
+		nand_decode_id(chip, type, &busw);
 	}
 	/* Get chip options */
 	chip->options |= type->options;
@@ -4145,13 +4147,13 @@ static int nand_get_flash_type(struct nand_chip *chip,
 	 * Check if chip is not a Samsung device. Do not clear the
 	 * options for chips which do not have an extended id.
 	 */
-	if (*maf_id != NAND_MFR_SAMSUNG && !type->pagesize)
+	if (maf_id != NAND_MFR_SAMSUNG && !type->pagesize)
 		chip->options &= ~NAND_SAMSUNG_LP_OPTIONS;
 ident_done:
 
 	/* Try to identify manufacturer */
 	for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_idx++) {
-		if (nand_manuf_ids[maf_idx].id == *maf_id)
+		if (nand_manuf_ids[maf_idx].id == maf_id)
 			break;
 	}
 
@@ -4165,7 +4167,7 @@ ident_done:
 		 * chip correct!
 		 */
 		pr_info("device found, Manufacturer ID: 0x%02x, Chip ID: 0x%02x\n",
-			*maf_id, *dev_id);
+			maf_id, dev_id);
 		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name, mtd->name);
 		pr_warn("bus width %d instead %d bit\n",
 			   (chip->options & NAND_BUSWIDTH_16) ? 16 : 8,
@@ -4173,7 +4175,7 @@ ident_done:
 		return -EINVAL;
 	}
 
-	nand_decode_bbm_options(chip, id_data);
+	nand_decode_bbm_options(chip);
 
 	/* Calculate the address shift from the page size */
 	chip->page_shift = ffs(mtd->writesize) - 1;
@@ -4197,7 +4199,7 @@ ident_done:
 		chip->cmdfunc = nand_command_lp;
 
 	pr_info("device found, Manufacturer ID: 0x%02x, Chip ID: 0x%02x\n",
-		*maf_id, *dev_id);
+		maf_id, dev_id);
 
 	if (chip->onfi_version)
 		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name,
@@ -4400,7 +4402,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	nand_set_defaults(chip, chip->options & NAND_BUSWIDTH_16);
 
 	/* Read the flash type */
-	ret = nand_get_flash_type(chip, &nand_maf_id, &nand_dev_id, table);
+	ret = nand_get_flash_type(chip, table);
 	if (ret) {
 		if (!(chip->options & NAND_SCAN_SILENT_NODEV))
 			pr_warn("No NAND device found\n");
@@ -4425,6 +4427,9 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	if (ret)
 		return ret;
 
+	nand_maf_id = chip->id.data[0];
+	nand_dev_id = chip->id.data[1];
+
 	chip->select_chip(mtd, -1);
 
 	/* Check for a chip array */
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 9591e0fbe5bd..e2c11351b1bd 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -464,6 +464,17 @@ struct nand_jedec_params {
 	__le16 crc;
 } __packed;
 
+/**
+ * struct nand_id - NAND id structure
+ * @data: buffer containing the id bytes. Currently 8 bytes large, but can
+ *	  be extended if required.
+ * @len: ID length.
+ */
+struct nand_id {
+	u8 data[8];
+	int len;
+};
+
 /**
  * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices
  * @lock:               protection lock
@@ -793,6 +804,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
  * @pagebuf_bitflips:	[INTERN] holds the bitflip count for the page which is
  *			currently in data_buf.
  * @subpagesize:	[INTERN] holds the subpagesize
+ * @id:			[INTERN] holds NAND ID
  * @onfi_version:	[INTERN] holds the chip ONFI version (BCD encoded),
  *			non 0 if ONFI supported.
  * @jedec_version:	[INTERN] holds the chip JEDEC version (BCD encoded),
@@ -881,6 +893,7 @@ struct nand_chip {
 	int badblockpos;
 	int badblockbits;
 
+	struct nand_id id;
 	int onfi_version;
 	int jedec_version;
 	union {
-- 
cgit v1.2.3


From 29a198a1592d83f2bc1be3b2631b3bcf3d5b380f Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 24 May 2016 20:17:48 +0200
Subject: mtd: nand: Get rid of busw parameter

Auto-detection functions are passed a busw parameter to retrieve the actual
NAND bus width and eventually set the correct value in chip->options.
Rework the nand_get_flash_type() function to get rid of this extra
parameter and let detection code directly set the NAND_BUSWIDTH_16 flag in
chip->options if needed.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/nand/nand_base.c | 68 ++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 6f3ae626cabc..16b082fb89f4 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3385,8 +3385,10 @@ static void nand_shutdown(struct mtd_info *mtd)
 }
 
 /* Set default functions */
-static void nand_set_defaults(struct nand_chip *chip, int busw)
+static void nand_set_defaults(struct nand_chip *chip)
 {
+	unsigned int busw = chip->options & NAND_BUSWIDTH_16;
+
 	/* check for proper chip_delay setup, set 20us if not */
 	if (!chip->chip_delay)
 		chip->chip_delay = 20;
@@ -3562,7 +3564,7 @@ static void nand_onfi_detect_micron(struct nand_chip *chip,
 /*
  * Check if the NAND chip is ONFI compliant, returns 1 if it is, 0 otherwise.
  */
-static int nand_flash_detect_onfi(struct nand_chip *chip, int *busw)
+static int nand_flash_detect_onfi(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_onfi_params *p = &chip->onfi_params;
@@ -3634,9 +3636,7 @@ static int nand_flash_detect_onfi(struct nand_chip *chip, int *busw)
 	chip->blocks_per_die = le32_to_cpu(p->blocks_per_lun);
 
 	if (onfi_feature(chip) & ONFI_FEATURE_16_BIT_BUS)
-		*busw = NAND_BUSWIDTH_16;
-	else
-		*busw = 0;
+		chip->options |= NAND_BUSWIDTH_16;
 
 	if (p->ecc_bits != 0xff) {
 		chip->ecc_strength_ds = p->ecc_bits;
@@ -3669,7 +3669,7 @@ static int nand_flash_detect_onfi(struct nand_chip *chip, int *busw)
 /*
  * Check if the NAND chip is JEDEC compliant, returns 1 if it is, 0 otherwise.
  */
-static int nand_flash_detect_jedec(struct nand_chip *chip, int *busw)
+static int nand_flash_detect_jedec(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_jedec_params *p = &chip->jedec_params;
@@ -3730,9 +3730,7 @@ static int nand_flash_detect_jedec(struct nand_chip *chip, int *busw)
 	chip->bits_per_cell = p->bits_per_cell;
 
 	if (jedec_feature(chip) & JEDEC_FEATURE_16_BIT_BUS)
-		*busw = NAND_BUSWIDTH_16;
-	else
-		*busw = 0;
+		chip->options |= NAND_BUSWIDTH_16;
 
 	/* ECC info */
 	ecc = &p->ecc_info[0];
@@ -3821,7 +3819,7 @@ static int nand_get_bits_per_cell(u8 cellinfo)
  * chip. The rest of the parameters must be decoded according to generic or
  * manufacturer-specific "extended ID" decoding patterns.
  */
-static void nand_decode_ext_id(struct nand_chip *chip, int *busw)
+static void nand_decode_ext_id(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int extid, id_len = chip->id.len;
@@ -3874,7 +3872,6 @@ static void nand_decode_ext_id(struct nand_chip *chip, int *busw)
 		/* Calc blocksize */
 		mtd->erasesize = (128 * 1024) <<
 			(((extid >> 1) & 0x04) | (extid & 0x03));
-		*busw = 0;
 	} else if (id_len == 6 && id_data[0] == NAND_MFR_HYNIX &&
 			!nand_is_slc(chip)) {
 		unsigned int tmp;
@@ -3915,7 +3912,6 @@ static void nand_decode_ext_id(struct nand_chip *chip, int *busw)
 			mtd->erasesize = 768 * 1024;
 		else
 			mtd->erasesize = (64 * 1024) << tmp;
-		*busw = 0;
 	} else {
 		/* Calc pagesize */
 		mtd->writesize = 1024 << (extid & 0x03);
@@ -3928,7 +3924,8 @@ static void nand_decode_ext_id(struct nand_chip *chip, int *busw)
 		mtd->erasesize = (64 * 1024) << (extid & 0x03);
 		extid >>= 2;
 		/* Get buswidth information */
-		*busw = (extid & 0x01) ? NAND_BUSWIDTH_16 : 0;
+		if (extid & 0x1)
+			chip->options |= NAND_BUSWIDTH_16;
 
 		/*
 		 * Toshiba 24nm raw SLC (i.e., not BENAND) have 32B OOB per
@@ -3953,8 +3950,7 @@ static void nand_decode_ext_id(struct nand_chip *chip, int *busw)
  * decodes a matching ID table entry and assigns the MTD size parameters for
  * the chip.
  */
-static void nand_decode_id(struct nand_chip *chip, struct nand_flash_dev *type,
-			   int *busw)
+static void nand_decode_id(struct nand_chip *chip, struct nand_flash_dev *type)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	u8 *id_data = chip->id.data;
@@ -3963,7 +3959,6 @@ static void nand_decode_id(struct nand_chip *chip, struct nand_flash_dev *type,
 	mtd->erasesize = type->erasesize;
 	mtd->writesize = type->pagesize;
 	mtd->oobsize = mtd->writesize / 32;
-	*busw = type->options & NAND_BUSWIDTH_16;
 
 	/* All legacy ID NAND are small-page, SLC */
 	chip->bits_per_cell = 1;
@@ -4026,7 +4021,7 @@ static inline bool is_full_id_nand(struct nand_flash_dev *type)
 }
 
 static bool find_full_id_nand(struct nand_chip *chip,
-			      struct nand_flash_dev *type, int *busw)
+			      struct nand_flash_dev *type)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	u8 *id_data = chip->id.data;
@@ -4044,8 +4039,6 @@ static bool find_full_id_nand(struct nand_chip *chip,
 		chip->onfi_timing_mode_default =
 					type->onfi_timing_mode_default;
 
-		*busw = type->options & NAND_BUSWIDTH_16;
-
 		if (!mtd->name)
 			mtd->name = type->name;
 
@@ -4106,9 +4099,24 @@ static int nand_get_flash_type(struct nand_chip *chip,
 	if (!type)
 		type = nand_flash_ids;
 
+	/*
+	 * Save the NAND_BUSWIDTH_16 flag before letting auto-detection logic
+	 * override it.
+	 * This is required to make sure initial NAND bus width set by the
+	 * NAND controller driver is coherent with the real NAND bus width
+	 * (extracted by auto-detection code).
+	 */
+	busw = chip->options & NAND_BUSWIDTH_16;
+
+	/*
+	 * The flag is only set (never cleared), reset it to its default value
+	 * before starting auto-detection.
+	 */
+	chip->options &= ~NAND_BUSWIDTH_16;
+
 	for (; type->name != NULL; type++) {
 		if (is_full_id_nand(type)) {
-			if (find_full_id_nand(chip, type, &busw))
+			if (find_full_id_nand(chip, type))
 				goto ident_done;
 		} else if (dev_id == type->dev_id) {
 			break;
@@ -4118,11 +4126,11 @@ static int nand_get_flash_type(struct nand_chip *chip,
 	chip->onfi_version = 0;
 	if (!type->name || !type->pagesize) {
 		/* Check if the chip is ONFI compliant */
-		if (nand_flash_detect_onfi(chip, &busw))
+		if (nand_flash_detect_onfi(chip))
 			goto ident_done;
 
 		/* Check if the chip is JEDEC compliant */
-		if (nand_flash_detect_jedec(chip, &busw))
+		if (nand_flash_detect_jedec(chip))
 			goto ident_done;
 	}
 
@@ -4136,9 +4144,9 @@ static int nand_get_flash_type(struct nand_chip *chip,
 
 	if (!type->pagesize) {
 		/* Decode parameters from extended ID */
-		nand_decode_ext_id(chip, &busw);
+		nand_decode_ext_id(chip);
 	} else {
-		nand_decode_id(chip, type, &busw);
+		nand_decode_id(chip, type);
 	}
 	/* Get chip options */
 	chip->options |= type->options;
@@ -4158,9 +4166,8 @@ ident_done:
 	}
 
 	if (chip->options & NAND_BUSWIDTH_AUTO) {
-		WARN_ON(chip->options & NAND_BUSWIDTH_16);
-		chip->options |= busw;
-		nand_set_defaults(chip, busw);
+		WARN_ON(busw & NAND_BUSWIDTH_16);
+		nand_set_defaults(chip);
 	} else if (busw != (chip->options & NAND_BUSWIDTH_16)) {
 		/*
 		 * Check, if buswidth is correct. Hardware drivers should set
@@ -4169,9 +4176,8 @@ ident_done:
 		pr_info("device found, Manufacturer ID: 0x%02x, Chip ID: 0x%02x\n",
 			maf_id, dev_id);
 		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name, mtd->name);
-		pr_warn("bus width %d instead %d bit\n",
-			   (chip->options & NAND_BUSWIDTH_16) ? 16 : 8,
-			   busw ? 16 : 8);
+		pr_warn("bus width %d instead of %d bits\n", busw ? 16 : 8,
+			(chip->options & NAND_BUSWIDTH_16) ? 16 : 8);
 		return -EINVAL;
 	}
 
@@ -4399,7 +4405,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 		return -EINVAL;
 	}
 	/* Set the default functions */
-	nand_set_defaults(chip, chip->options & NAND_BUSWIDTH_16);
+	nand_set_defaults(chip);
 
 	/* Read the flash type */
 	ret = nand_get_flash_type(chip, table);
-- 
cgit v1.2.3


From 7bb427990ee36482afcae859b1883e5fa1244ef2 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 24 May 2016 20:55:33 +0200
Subject: mtd: nand: Rename nand_get_flash_type() into nand_detect()

Since commit 4722c0e958e6 ("mtd: nand: change return type of
nand_get_flash_type() to int"), nand_get_flash_type() no longer returns
a nand_flash_dev object.
Rename the function to match this new behavior.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/nand/nand_base.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 16b082fb89f4..b1eb99e84044 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4050,8 +4050,7 @@ static bool find_full_id_nand(struct nand_chip *chip,
 /*
  * Get the flash and manufacturer id and lookup if the type is supported.
  */
-static int nand_get_flash_type(struct nand_chip *chip,
-			       struct nand_flash_dev *type)
+static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int busw;
@@ -4408,7 +4407,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	nand_set_defaults(chip);
 
 	/* Read the flash type */
-	ret = nand_get_flash_type(chip, table);
+	ret = nand_detect(chip, table);
 	if (ret) {
 		if (!(chip->options & NAND_SCAN_SILENT_NODEV))
 			pr_warn("No NAND device found\n");
-- 
cgit v1.2.3


From 8cfb9ab68f90703d419870fce7ac21ac401399f2 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Sat, 7 Jan 2017 15:15:57 +0100
Subject: mtd: nand: Rename the nand_manufacturers struct

Drop the 's' at the end of nand_manufacturers since the struct is actually
describing a single manufacturer, not a manufacturer table.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_ids.c | 2 +-
 include/linux/mtd/nand.h    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 4a2f75b0c200..3f80cfcb5e37 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -169,7 +169,7 @@ struct nand_flash_dev nand_flash_ids[] = {
 };
 
 /* Manufacturer IDs */
-struct nand_manufacturers nand_manuf_ids[] = {
+struct nand_manufacturer nand_manuf_ids[] = {
 	{NAND_MFR_TOSHIBA, "Toshiba"},
 	{NAND_MFR_ESMT, "ESMT"},
 	{NAND_MFR_SAMSUNG, "Samsung"},
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index e2c11351b1bd..9c679e8bde42 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1062,17 +1062,17 @@ struct nand_flash_dev {
 };
 
 /**
- * struct nand_manufacturers - NAND Flash Manufacturer ID Structure
+ * struct nand_manufacturer - NAND Flash Manufacturer structure
  * @name:	Manufacturer name
  * @id:		manufacturer ID code of device.
 */
-struct nand_manufacturers {
+struct nand_manufacturer {
 	int id;
 	char *name;
 };
 
 extern struct nand_flash_dev nand_flash_ids[];
-extern struct nand_manufacturers nand_manuf_ids[];
+extern struct nand_manufacturer nand_manuf_ids[];
 
 int nand_default_bbt(struct mtd_info *mtd);
 int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
-- 
cgit v1.2.3


From f16bd7ca045729e1104a9353dfd792ea98931b80 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 24 May 2016 23:07:46 +0200
Subject: mtd: nand: Kill the MTD_NAND_IDS Kconfig option

MTD_NAND_IDS is selected by MTD_NAND, which makes it useless. Remove
the Kconfig option and link nand_ids.o into the nand.o object file.
Doing that also prevents creating an extra nand_ids.ko module when
MTD_NAND is activated as a module.

Since nand_ids.c is no longer compiled as a standalone module and the
nand_manuf_ids/nand_flash_ids symbols are only used in nand_base.c, we
can get rid of the MODULE_XXX() and EXPORT_SYMBOL() definitions.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 arch/cris/arch-v32/drivers/Kconfig | 1 -
 drivers/mtd/nand/Kconfig           | 4 ----
 drivers/mtd/nand/Makefile          | 3 +--
 drivers/mtd/nand/nand_ids.c        | 7 -------
 4 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 2735eb7671a5..b7cd6b9209a9 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -136,7 +136,6 @@ config ETRAX_NANDFLASH
 	bool "NAND flash support"
 	depends on ETRAX_ARCH_V32
 	select MTD_NAND
-	select MTD_NAND_IDS
 	help
 	  This option enables MTD mapping of NAND flash devices.  Needed to use
 	  NAND flash memories.  If unsure, say Y.
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 6d4d5672d1d8..1ac7f321203e 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -13,7 +13,6 @@ config MTD_NAND_ECC_SMC
 menuconfig MTD_NAND
 	tristate "NAND Device Support"
 	depends on MTD
-	select MTD_NAND_IDS
 	select MTD_NAND_ECC
 	help
 	  This enables support for accessing all type of NAND flash
@@ -109,9 +108,6 @@ config MTD_NAND_OMAP_BCH
 config MTD_NAND_OMAP_BCH_BUILD
 	def_tristate MTD_NAND_OMAP2 && MTD_NAND_OMAP_BCH
 
-config MTD_NAND_IDS
-	tristate
-
 config MTD_NAND_RICOH
 	tristate "Ricoh xD card reader"
 	default n
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 19a66e404d5b..bfd5d12b9ade 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -5,7 +5,6 @@
 obj-$(CONFIG_MTD_NAND)			+= nand.o
 obj-$(CONFIG_MTD_NAND_ECC)		+= nand_ecc.o
 obj-$(CONFIG_MTD_NAND_BCH)		+= nand_bch.o
-obj-$(CONFIG_MTD_NAND_IDS)		+= nand_ids.o
 obj-$(CONFIG_MTD_SM_COMMON) 		+= sm_common.o
 
 obj-$(CONFIG_MTD_NAND_CAFE)		+= cafe_nand.o
@@ -61,4 +60,4 @@ obj-$(CONFIG_MTD_NAND_BRCMNAND)		+= brcmnand/
 obj-$(CONFIG_MTD_NAND_QCOM)		+= qcom_nandc.o
 obj-$(CONFIG_MTD_NAND_MTK)		+= mtk_nand.o mtk_ecc.o
 
-nand-objs := nand_base.o nand_bbt.o nand_timings.o
+nand-objs := nand_base.o nand_bbt.o nand_timings.o nand_ids.o
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 3f80cfcb5e37..bd267ade0742 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -188,10 +188,3 @@ struct nand_manufacturer nand_manuf_ids[] = {
 	{NAND_MFR_WINBOND, "Winbond"},
 	{0x0, "Unknown"}
 };
-
-EXPORT_SYMBOL(nand_manuf_ids);
-EXPORT_SYMBOL(nand_flash_ids);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Thomas Gleixner <tglx@linutronix.de>");
-MODULE_DESCRIPTION("Nand device & manufacturer IDs");
-- 
cgit v1.2.3


From bcc678c2d7a0e0af14cb3d858ebd367be378c172 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Sat, 7 Jan 2017 15:48:25 +0100
Subject: mtd: nand: Do not expose the NAND manufacturer table directly

There is no reason to expose the NAND manufacturer table. Provide an
helper function to find manufacturers by their id.

We also turn the nand_manufacturers table into a const array, since its
members are not modified after the initial assignment.

Finally, we remove the sentinel manufacturer entry from the manufacturers
table (we already have the array size information given by ARRAY_SIZE()),
and add the nand_manufacturer_name() helper to handle the "Unknown" case
properly.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 23 +++++++++++------------
 drivers/mtd/nand/nand_ids.c  | 22 ++++++++++++++++++++--
 include/linux/mtd/nand.h     |  9 ++++++++-
 3 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b1eb99e84044..0120252e0710 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4052,9 +4052,10 @@ static bool find_full_id_nand(struct nand_chip *chip,
  */
 static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 {
+	const struct nand_manufacturer *manufacturer;
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int busw;
-	int i, maf_idx;
+	int i;
 	u8 *id_data = chip->id.data;
 	u8 maf_id, dev_id;
 
@@ -4159,10 +4160,7 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 ident_done:
 
 	/* Try to identify manufacturer */
-	for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_idx++) {
-		if (nand_manuf_ids[maf_idx].id == maf_id)
-			break;
-	}
+	manufacturer = nand_get_manufacturer(maf_id);
 
 	if (chip->options & NAND_BUSWIDTH_AUTO) {
 		WARN_ON(busw & NAND_BUSWIDTH_16);
@@ -4174,7 +4172,8 @@ ident_done:
 		 */
 		pr_info("device found, Manufacturer ID: 0x%02x, Chip ID: 0x%02x\n",
 			maf_id, dev_id);
-		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name, mtd->name);
+		pr_info("%s %s\n", nand_manufacturer_name(manufacturer),
+			mtd->name);
 		pr_warn("bus width %d instead of %d bits\n", busw ? 16 : 8,
 			(chip->options & NAND_BUSWIDTH_16) ? 16 : 8);
 		return -EINVAL;
@@ -4207,14 +4206,14 @@ ident_done:
 		maf_id, dev_id);
 
 	if (chip->onfi_version)
-		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name,
-				chip->onfi_params.model);
+		pr_info("%s %s\n", nand_manufacturer_name(manufacturer),
+			chip->onfi_params.model);
 	else if (chip->jedec_version)
-		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name,
-				chip->jedec_params.model);
+		pr_info("%s %s\n", nand_manufacturer_name(manufacturer),
+			chip->jedec_params.model);
 	else
-		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name,
-				type->name);
+		pr_info("%s %s\n", nand_manufacturer_name(manufacturer),
+			type->name);
 
 	pr_info("%d MiB, %s, erase size: %d KiB, page size: %d, OOB size: %d\n",
 		(int)(chip->chipsize >> 20), nand_is_slc(chip) ? "SLC" : "MLC",
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index bd267ade0742..06f59a6b74ff 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -169,7 +169,7 @@ struct nand_flash_dev nand_flash_ids[] = {
 };
 
 /* Manufacturer IDs */
-struct nand_manufacturer nand_manuf_ids[] = {
+static const struct nand_manufacturer nand_manufacturers[] = {
 	{NAND_MFR_TOSHIBA, "Toshiba"},
 	{NAND_MFR_ESMT, "ESMT"},
 	{NAND_MFR_SAMSUNG, "Samsung"},
@@ -186,5 +186,23 @@ struct nand_manufacturer nand_manuf_ids[] = {
 	{NAND_MFR_INTEL, "Intel"},
 	{NAND_MFR_ATO, "ATO"},
 	{NAND_MFR_WINBOND, "Winbond"},
-	{0x0, "Unknown"}
 };
+
+/**
+ * nand_get_manufacturer - Get manufacturer information from the manufacturer
+ *			   ID
+ * @id: manufacturer ID
+ *
+ * Returns a pointer a nand_manufacturer object if the manufacturer is defined
+ * in the NAND manufacturers database, NULL otherwise.
+ */
+const struct nand_manufacturer *nand_get_manufacturer(u8 id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(nand_manufacturers); i++)
+		if (nand_manufacturers[i].id == id)
+			return &nand_manufacturers[i];
+
+	return NULL;
+}
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 9c679e8bde42..6415aa16043c 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1071,8 +1071,15 @@ struct nand_manufacturer {
 	char *name;
 };
 
+const struct nand_manufacturer *nand_get_manufacturer(u8 id);
+
+static inline const char *
+nand_manufacturer_name(const struct nand_manufacturer *manufacturer)
+{
+	return manufacturer ? manufacturer->name : "Unknown";
+}
+
 extern struct nand_flash_dev nand_flash_ids[];
-extern struct nand_manufacturer nand_manuf_ids[];
 
 int nand_default_bbt(struct mtd_info *mtd);
 int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
-- 
cgit v1.2.3


From abbe26d144ec22bb067fa414d717b9f7ca2e12bd Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 8 Jun 2016 09:32:55 +0200
Subject: mtd: nand: Add manufacturer specific initialization/detection steps

A lot of NANDs are implementing generic features in a non-generic way,
or are providing advanced auto-detection logic where the NAND ID bytes
meaning changes with the NAND generation.

Providing this vendor specific initialization step will allow us to get
rid of full-id entries in the nand_ids table or all the vendor specific
cases added over the time in the generic NAND ID decoding logic.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 75 ++++++++++++++++++++++++++++++++++++++------
 include/linux/mtd/nand.h     | 35 +++++++++++++++++++++
 2 files changed, 100 insertions(+), 10 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 0120252e0710..92ff6adbd7c9 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3819,7 +3819,7 @@ static int nand_get_bits_per_cell(u8 cellinfo)
  * chip. The rest of the parameters must be decoded according to generic or
  * manufacturer-specific "extended ID" decoding patterns.
  */
-static void nand_decode_ext_id(struct nand_chip *chip)
+void nand_decode_ext_id(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int extid, id_len = chip->id.len;
@@ -3944,6 +3944,7 @@ static void nand_decode_ext_id(struct nand_chip *chip)
 
 	}
 }
+EXPORT_SYMBOL_GPL(nand_decode_ext_id);
 
 /*
  * Old devices have chip data hardcoded in the device ID table. nand_decode_id
@@ -4047,6 +4048,53 @@ static bool find_full_id_nand(struct nand_chip *chip,
 	return false;
 }
 
+/*
+ * Manufacturer detection. Only used when the NAND is not ONFI or JEDEC
+ * compliant and does not have a full-id or legacy-id entry in the nand_ids
+ * table.
+ */
+static void nand_manufacturer_detect(struct nand_chip *chip)
+{
+	/*
+	 * Try manufacturer detection if available and use
+	 * nand_decode_ext_id() otherwise.
+	 */
+	if (chip->manufacturer.desc && chip->manufacturer.desc->ops &&
+	    chip->manufacturer.desc->ops->detect)
+		chip->manufacturer.desc->ops->detect(chip);
+	else
+		nand_decode_ext_id(chip);
+}
+
+/*
+ * Manufacturer initialization. This function is called for all NANDs including
+ * ONFI and JEDEC compliant ones.
+ * Manufacturer drivers should put all their specific initialization code in
+ * their ->init() hook.
+ */
+static int nand_manufacturer_init(struct nand_chip *chip)
+{
+	if (!chip->manufacturer.desc || !chip->manufacturer.desc->ops ||
+	    !chip->manufacturer.desc->ops->init)
+		return 0;
+
+	return chip->manufacturer.desc->ops->init(chip);
+}
+
+/*
+ * Manufacturer cleanup. This function is called for all NANDs including
+ * ONFI and JEDEC compliant ones.
+ * Manufacturer drivers should put all their specific cleanup code in their
+ * ->cleanup() hook.
+ */
+static void nand_manufacturer_cleanup(struct nand_chip *chip)
+{
+	/* Release manufacturer private data */
+	if (chip->manufacturer.desc && chip->manufacturer.desc->ops &&
+	    chip->manufacturer.desc->ops->cleanup)
+		chip->manufacturer.desc->ops->cleanup(chip);
+}
+
 /*
  * Get the flash and manufacturer id and lookup if the type is supported.
  */
@@ -4055,7 +4103,7 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 	const struct nand_manufacturer *manufacturer;
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int busw;
-	int i;
+	int i, ret;
 	u8 *id_data = chip->id.data;
 	u8 maf_id, dev_id;
 
@@ -4096,6 +4144,10 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 
 	chip->id.len = nand_id_len(id_data, 8);
 
+	/* Try to identify manufacturer */
+	manufacturer = nand_get_manufacturer(maf_id);
+	chip->manufacturer.desc = manufacturer;
+
 	if (!type)
 		type = nand_flash_ids;
 
@@ -4142,12 +4194,11 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 
 	chip->chipsize = (uint64_t)type->chipsize << 20;
 
-	if (!type->pagesize) {
-		/* Decode parameters from extended ID */
-		nand_decode_ext_id(chip);
-	} else {
+	if (!type->pagesize)
+		nand_manufacturer_detect(chip);
+	else
 		nand_decode_id(chip, type);
-	}
+
 	/* Get chip options */
 	chip->options |= type->options;
 
@@ -4159,9 +4210,6 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 		chip->options &= ~NAND_SAMSUNG_LP_OPTIONS;
 ident_done:
 
-	/* Try to identify manufacturer */
-	manufacturer = nand_get_manufacturer(maf_id);
-
 	if (chip->options & NAND_BUSWIDTH_AUTO) {
 		WARN_ON(busw & NAND_BUSWIDTH_16);
 		nand_set_defaults(chip);
@@ -4202,6 +4250,10 @@ ident_done:
 	if (mtd->writesize > 512 && chip->cmdfunc == nand_command)
 		chip->cmdfunc = nand_command_lp;
 
+	ret = nand_manufacturer_init(chip);
+	if (ret)
+		return ret;
+
 	pr_info("device found, Manufacturer ID: 0x%02x, Chip ID: 0x%02x\n",
 		maf_id, dev_id);
 
@@ -4947,6 +4999,9 @@ void nand_cleanup(struct nand_chip *chip)
 	if (chip->badblock_pattern && chip->badblock_pattern->options
 			& NAND_BBT_DYNAMICSTRUCT)
 		kfree(chip->badblock_pattern);
+
+	/* Free manufacturer priv data. */
+	nand_manufacturer_cleanup(chip);
 }
 EXPORT_SYMBOL_GPL(nand_cleanup);
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 6415aa16043c..ee9a19f42293 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -731,6 +731,20 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
 	return &conf->timings.sdr;
 }
 
+/**
+ * struct nand_manufacturer_ops - NAND Manufacturer operations
+ * @detect: detect the NAND memory organization and capabilities
+ * @init: initialize all vendor specific fields (like the ->read_retry()
+ *	  implementation) if any.
+ * @cleanup: the ->init() function may have allocated resources, ->cleanup()
+ *	     is here to let vendor specific code release those resources.
+ */
+struct nand_manufacturer_ops {
+	void (*detect)(struct nand_chip *chip);
+	int (*init)(struct nand_chip *chip);
+	void (*cleanup)(struct nand_chip *chip);
+};
+
 /**
  * struct nand_chip - NAND Private Flash Chip Data
  * @mtd:		MTD device registered to the MTD framework
@@ -835,6 +849,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
  *			additional error status checks (determine if errors are
  *			correctable).
  * @write_page:		[REPLACEABLE] High-level page write function
+ * @manufacturer:	[INTERN] Contains manufacturer information
  */
 
 struct nand_chip {
@@ -923,6 +938,11 @@ struct nand_chip {
 	struct nand_bbt_descr *badblock_pattern;
 
 	void *priv;
+
+	struct {
+		const struct nand_manufacturer *desc;
+		void *priv;
+	} manufacturer;
 };
 
 extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops;
@@ -959,6 +979,17 @@ static inline void nand_set_controller_data(struct nand_chip *chip, void *priv)
 	chip->priv = priv;
 }
 
+static inline void nand_set_manufacturer_data(struct nand_chip *chip,
+					      void *priv)
+{
+	chip->manufacturer.priv = priv;
+}
+
+static inline void *nand_get_manufacturer_data(struct nand_chip *chip)
+{
+	return chip->manufacturer.priv;
+}
+
 /*
  * NAND Flash Manufacturer ID Codes
  */
@@ -1065,10 +1096,12 @@ struct nand_flash_dev {
  * struct nand_manufacturer - NAND Flash Manufacturer structure
  * @name:	Manufacturer name
  * @id:		manufacturer ID code of device.
+ * @ops:	manufacturer operations
 */
 struct nand_manufacturer {
 	int id;
 	char *name;
+	const struct nand_manufacturer_ops *ops;
 };
 
 const struct nand_manufacturer *nand_get_manufacturer(u8 id);
@@ -1246,4 +1279,6 @@ int nand_reset(struct nand_chip *chip, int chipnr);
 /* Free resources held by the NAND device */
 void nand_cleanup(struct nand_chip *chip);
 
+/* Default extended ID decoding function */
+void nand_decode_ext_id(struct nand_chip *chip);
 #endif /* __LINUX_MTD_NAND_H */
-- 
cgit v1.2.3


From c51d0ac59f24200dfdccc897ff7c3c9446c7599a Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 8 Jun 2016 10:22:19 +0200
Subject: mtd: nand: Move Samsung specific init/detection logic in
 nand_samsung.c

Move Samsung specific initialization and detection logic into
nand_samsung.c. This is part of the "separate vendor specific code from
core" cleanup process.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/nand/Makefile       |  1 +
 drivers/mtd/nand/nand_base.c    | 52 ++---------------------
 drivers/mtd/nand/nand_ids.c     |  4 +-
 drivers/mtd/nand/nand_samsung.c | 92 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h        |  2 +
 5 files changed, 101 insertions(+), 50 deletions(-)
 create mode 100644 drivers/mtd/nand/nand_samsung.c

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index bfd5d12b9ade..d4b90b0f879e 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -61,3 +61,4 @@ obj-$(CONFIG_MTD_NAND_QCOM)		+= qcom_nandc.o
 obj-$(CONFIG_MTD_NAND_MTK)		+= mtk_nand.o mtk_ecc.o
 
 nand-objs := nand_base.o nand_bbt.o nand_timings.o nand_ids.o
+nand-objs += nand_samsung.o
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 92ff6adbd7c9..fd38d59d33a6 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3832,48 +3832,13 @@ void nand_decode_ext_id(struct nand_chip *chip)
 	/*
 	 * Field definitions are in the following datasheets:
 	 * Old style (4,5 byte ID): Samsung K9GAG08U0M (p.32)
-	 * New Samsung (6 byte ID): Samsung K9GAG08U0F (p.44)
 	 * Hynix MLC   (6 byte ID): Hynix H27UBG8T2B (p.22)
 	 *
 	 * Check for ID length, non-zero 6th byte, cell type, and Hynix/Samsung
 	 * ID to decide what to do.
 	 */
-	if (id_len == 6 && id_data[0] == NAND_MFR_SAMSUNG &&
-			!nand_is_slc(chip) && id_data[5] != 0x00) {
-		/* Calc pagesize */
-		mtd->writesize = 2048 << (extid & 0x03);
-		extid >>= 2;
-		/* Calc oobsize */
-		switch (((extid >> 2) & 0x04) | (extid & 0x03)) {
-		case 1:
-			mtd->oobsize = 128;
-			break;
-		case 2:
-			mtd->oobsize = 218;
-			break;
-		case 3:
-			mtd->oobsize = 400;
-			break;
-		case 4:
-			mtd->oobsize = 436;
-			break;
-		case 5:
-			mtd->oobsize = 512;
-			break;
-		case 6:
-			mtd->oobsize = 640;
-			break;
-		case 7:
-		default: /* Other cases are "reserved" (unknown) */
-			mtd->oobsize = 1024;
-			break;
-		}
-		extid >>= 2;
-		/* Calc blocksize */
-		mtd->erasesize = (128 * 1024) <<
-			(((extid >> 1) & 0x04) | (extid & 0x03));
-	} else if (id_len == 6 && id_data[0] == NAND_MFR_HYNIX &&
-			!nand_is_slc(chip)) {
+	if (id_len == 6 && id_data[0] == NAND_MFR_HYNIX &&
+	    !nand_is_slc(chip)) {
 		unsigned int tmp;
 
 		/* Calc pagesize */
@@ -4001,13 +3966,10 @@ static void nand_decode_bbm_options(struct nand_chip *chip)
 	 * Micron devices with 2KiB pages and on SLC Samsung, Hynix, Toshiba,
 	 * AMD/Spansion, and Macronix.  All others scan only the first page.
 	 */
-	if (!nand_is_slc(chip) &&
-			(maf_id == NAND_MFR_SAMSUNG ||
-			 maf_id == NAND_MFR_HYNIX))
+	if (!nand_is_slc(chip) && maf_id == NAND_MFR_HYNIX)
 		chip->bbt_options |= NAND_BBT_SCANLASTPAGE;
 	else if ((nand_is_slc(chip) &&
-				(maf_id == NAND_MFR_SAMSUNG ||
-				 maf_id == NAND_MFR_HYNIX ||
+				(maf_id == NAND_MFR_HYNIX ||
 				 maf_id == NAND_MFR_TOSHIBA ||
 				 maf_id == NAND_MFR_AMD ||
 				 maf_id == NAND_MFR_MACRONIX)) ||
@@ -4202,12 +4164,6 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 	/* Get chip options */
 	chip->options |= type->options;
 
-	/*
-	 * Check if chip is not a Samsung device. Do not clear the
-	 * options for chips which do not have an extended id.
-	 */
-	if (maf_id != NAND_MFR_SAMSUNG && !type->pagesize)
-		chip->options &= ~NAND_SAMSUNG_LP_OPTIONS;
 ident_done:
 
 	if (chip->options & NAND_BUSWIDTH_AUTO) {
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 06f59a6b74ff..8eba7dfe7c1f 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -10,7 +10,7 @@
 #include <linux/mtd/nand.h>
 #include <linux/sizes.h>
 
-#define LP_OPTIONS NAND_SAMSUNG_LP_OPTIONS
+#define LP_OPTIONS 0
 #define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16)
 
 #define SP_OPTIONS NAND_NEED_READRDY
@@ -172,7 +172,7 @@ struct nand_flash_dev nand_flash_ids[] = {
 static const struct nand_manufacturer nand_manufacturers[] = {
 	{NAND_MFR_TOSHIBA, "Toshiba"},
 	{NAND_MFR_ESMT, "ESMT"},
-	{NAND_MFR_SAMSUNG, "Samsung"},
+	{NAND_MFR_SAMSUNG, "Samsung", &samsung_nand_manuf_ops},
 	{NAND_MFR_FUJITSU, "Fujitsu"},
 	{NAND_MFR_NATIONAL, "National"},
 	{NAND_MFR_RENESAS, "Renesas"},
diff --git a/drivers/mtd/nand/nand_samsung.c b/drivers/mtd/nand/nand_samsung.c
new file mode 100644
index 000000000000..5c259dd5b652
--- /dev/null
+++ b/drivers/mtd/nand/nand_samsung.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+static void samsung_nand_decode_id(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	/* New Samsung (6 byte ID): Samsung K9GAG08U0F (p.44) */
+	if (chip->id.len == 6 && !nand_is_slc(chip) &&
+	    chip->id.data[5] != 0x00) {
+		u8 extid = chip->id.data[3];
+
+		/* Get pagesize */
+		mtd->writesize = 2048 << (extid & 0x03);
+
+		extid >>= 2;
+
+		/* Get oobsize */
+		switch (((extid >> 2) & 0x4) | (extid & 0x3)) {
+		case 1:
+			mtd->oobsize = 128;
+			break;
+		case 2:
+			mtd->oobsize = 218;
+			break;
+		case 3:
+			mtd->oobsize = 400;
+			break;
+		case 4:
+			mtd->oobsize = 436;
+			break;
+		case 5:
+			mtd->oobsize = 512;
+			break;
+		case 6:
+			mtd->oobsize = 640;
+			break;
+		default:
+			/*
+			 * We should never reach this case, but if that
+			 * happens, this probably means Samsung decided to use
+			 * a different extended ID format, and we should find
+			 * a way to support it.
+			 */
+			WARN(1, "Invalid OOB size value");
+			break;
+		}
+
+		/* Get blocksize */
+		extid >>= 2;
+		mtd->erasesize = (128 * 1024) <<
+				 (((extid >> 1) & 0x04) | (extid & 0x03));
+	} else {
+		nand_decode_ext_id(chip);
+	}
+}
+
+static int samsung_nand_init(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	if (mtd->writesize > 512)
+		chip->options |= NAND_SAMSUNG_LP_OPTIONS;
+
+	if (!nand_is_slc(chip))
+		chip->bbt_options |= NAND_BBT_SCANLASTPAGE;
+	else
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops samsung_nand_manuf_ops = {
+	.detect = samsung_nand_decode_id,
+	.init = samsung_nand_init,
+};
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index ee9a19f42293..2f83cb55392f 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1114,6 +1114,8 @@ nand_manufacturer_name(const struct nand_manufacturer *manufacturer)
 
 extern struct nand_flash_dev nand_flash_ids[];
 
+extern const struct nand_manufacturer_ops samsung_nand_manuf_ops;
+
 int nand_default_bbt(struct mtd_info *mtd);
 int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
 int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs);
-- 
cgit v1.2.3


From 01389b6bd2f4f7649cdbb4a99a15d9e0c05d6f8c Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 8 Jun 2016 10:30:18 +0200
Subject: mtd: nand: Move Hynix specific init/detection logic in nand_hynix.c

Move Hynix specific initialization and detection logic into
nand_hynix.c. This is part of the "separate vendor specific code from
core" cleanup process.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/nand/Makefile     |   1 +
 drivers/mtd/nand/nand_base.c  | 114 +++++++++++-------------------------------
 drivers/mtd/nand/nand_hynix.c |  84 +++++++++++++++++++++++++++++++
 drivers/mtd/nand/nand_ids.c   |   2 +-
 include/linux/mtd/nand.h      |   1 +
 5 files changed, 115 insertions(+), 87 deletions(-)
 create mode 100644 drivers/mtd/nand/nand_hynix.c

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index d4b90b0f879e..ddb2670d9767 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -61,4 +61,5 @@ obj-$(CONFIG_MTD_NAND_QCOM)		+= qcom_nandc.o
 obj-$(CONFIG_MTD_NAND_MTK)		+= mtk_nand.o mtk_ecc.o
 
 nand-objs := nand_base.o nand_bbt.o nand_timings.o nand_ids.o
+nand-objs += nand_hynix.o
 nand-objs += nand_samsung.o
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index fd38d59d33a6..d65db5921a0f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3829,85 +3829,32 @@ void nand_decode_ext_id(struct nand_chip *chip)
 	/* The 4th id byte is the important one */
 	extid = id_data[3];
 
+	/* Calc pagesize */
+	mtd->writesize = 1024 << (extid & 0x03);
+	extid >>= 2;
+	/* Calc oobsize */
+	mtd->oobsize = (8 << (extid & 0x01)) * (mtd->writesize >> 9);
+	extid >>= 2;
+	/* Calc blocksize. Blocksize is multiples of 64KiB */
+	mtd->erasesize = (64 * 1024) << (extid & 0x03);
+	extid >>= 2;
+	/* Get buswidth information */
+	if (extid & 0x1)
+		chip->options |= NAND_BUSWIDTH_16;
+
 	/*
-	 * Field definitions are in the following datasheets:
-	 * Old style (4,5 byte ID): Samsung K9GAG08U0M (p.32)
-	 * Hynix MLC   (6 byte ID): Hynix H27UBG8T2B (p.22)
-	 *
-	 * Check for ID length, non-zero 6th byte, cell type, and Hynix/Samsung
-	 * ID to decide what to do.
+	 * Toshiba 24nm raw SLC (i.e., not BENAND) have 32B OOB per
+	 * 512B page. For Toshiba SLC, we decode the 5th/6th byte as
+	 * follows:
+	 * - ID byte 6, bits[2:0]: 100b -> 43nm, 101b -> 32nm,
+	 *                         110b -> 24nm
+	 * - ID byte 5, bit[7]:    1 -> BENAND, 0 -> raw SLC
 	 */
-	if (id_len == 6 && id_data[0] == NAND_MFR_HYNIX &&
-	    !nand_is_slc(chip)) {
-		unsigned int tmp;
-
-		/* Calc pagesize */
-		mtd->writesize = 2048 << (extid & 0x03);
-		extid >>= 2;
-		/* Calc oobsize */
-		switch (((extid >> 2) & 0x04) | (extid & 0x03)) {
-		case 0:
-			mtd->oobsize = 128;
-			break;
-		case 1:
-			mtd->oobsize = 224;
-			break;
-		case 2:
-			mtd->oobsize = 448;
-			break;
-		case 3:
-			mtd->oobsize = 64;
-			break;
-		case 4:
-			mtd->oobsize = 32;
-			break;
-		case 5:
-			mtd->oobsize = 16;
-			break;
-		default:
-			mtd->oobsize = 640;
-			break;
-		}
-		extid >>= 2;
-		/* Calc blocksize */
-		tmp = ((extid >> 1) & 0x04) | (extid & 0x03);
-		if (tmp < 0x03)
-			mtd->erasesize = (128 * 1024) << tmp;
-		else if (tmp == 0x03)
-			mtd->erasesize = 768 * 1024;
-		else
-			mtd->erasesize = (64 * 1024) << tmp;
-	} else {
-		/* Calc pagesize */
-		mtd->writesize = 1024 << (extid & 0x03);
-		extid >>= 2;
-		/* Calc oobsize */
-		mtd->oobsize = (8 << (extid & 0x01)) *
-			(mtd->writesize >> 9);
-		extid >>= 2;
-		/* Calc blocksize. Blocksize is multiples of 64KiB */
-		mtd->erasesize = (64 * 1024) << (extid & 0x03);
-		extid >>= 2;
-		/* Get buswidth information */
-		if (extid & 0x1)
-			chip->options |= NAND_BUSWIDTH_16;
-
-		/*
-		 * Toshiba 24nm raw SLC (i.e., not BENAND) have 32B OOB per
-		 * 512B page. For Toshiba SLC, we decode the 5th/6th byte as
-		 * follows:
-		 * - ID byte 6, bits[2:0]: 100b -> 43nm, 101b -> 32nm,
-		 *                         110b -> 24nm
-		 * - ID byte 5, bit[7]:    1 -> BENAND, 0 -> raw SLC
-		 */
-		if (id_len >= 6 && id_data[0] == NAND_MFR_TOSHIBA &&
-				nand_is_slc(chip) &&
-				(id_data[5] & 0x7) == 0x6 /* 24nm */ &&
-				!(id_data[4] & 0x80) /* !BENAND */) {
-			mtd->oobsize = 32 * mtd->writesize >> 9;
-		}
-
-	}
+	if (id_len >= 6 && id_data[0] == NAND_MFR_TOSHIBA &&
+	    nand_is_slc(chip) &&
+	    (id_data[5] & 0x7) == 0x6 /* 24nm */ &&
+	     !(id_data[4] & 0x80) /* !BENAND */)
+		mtd->oobsize = 32 * mtd->writesize >> 9;
 }
 EXPORT_SYMBOL_GPL(nand_decode_ext_id);
 
@@ -3966,15 +3913,10 @@ static void nand_decode_bbm_options(struct nand_chip *chip)
 	 * Micron devices with 2KiB pages and on SLC Samsung, Hynix, Toshiba,
 	 * AMD/Spansion, and Macronix.  All others scan only the first page.
 	 */
-	if (!nand_is_slc(chip) && maf_id == NAND_MFR_HYNIX)
-		chip->bbt_options |= NAND_BBT_SCANLASTPAGE;
-	else if ((nand_is_slc(chip) &&
-				(maf_id == NAND_MFR_HYNIX ||
-				 maf_id == NAND_MFR_TOSHIBA ||
-				 maf_id == NAND_MFR_AMD ||
-				 maf_id == NAND_MFR_MACRONIX)) ||
-			(mtd->writesize == 2048 &&
-			 maf_id == NAND_MFR_MICRON))
+	if ((nand_is_slc(chip) &&
+	     (maf_id == NAND_MFR_TOSHIBA || maf_id == NAND_MFR_AMD ||
+	      maf_id == NAND_MFR_MACRONIX)) ||
+	    (mtd->writesize == 2048 && maf_id == NAND_MFR_MICRON))
 		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
 }
 
diff --git a/drivers/mtd/nand/nand_hynix.c b/drivers/mtd/nand/nand_hynix.c
new file mode 100644
index 000000000000..fbd661688158
--- /dev/null
+++ b/drivers/mtd/nand/nand_hynix.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+static void hynix_nand_decode_id(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	/* Hynix MLC   (6 byte ID): Hynix H27UBG8T2B (p.22) */
+	if (chip->id.len == 6 && !nand_is_slc(chip)) {
+		u8 tmp, extid = chip->id.data[3];
+
+		/* Extract pagesize */
+		mtd->writesize = 2048 << (extid & 0x03);
+		extid >>= 2;
+
+		/* Extract oobsize */
+		switch (((extid >> 2) & 0x4) | (extid & 0x3)) {
+		case 0:
+			mtd->oobsize = 128;
+			break;
+		case 1:
+			mtd->oobsize = 224;
+			break;
+		case 2:
+			mtd->oobsize = 448;
+			break;
+		case 3:
+			mtd->oobsize = 64;
+			break;
+		case 4:
+			mtd->oobsize = 32;
+			break;
+		case 5:
+			mtd->oobsize = 16;
+			break;
+		default:
+			mtd->oobsize = 640;
+			break;
+		}
+
+		/* Extract blocksize */
+		extid >>= 2;
+		tmp = ((extid >> 1) & 0x04) | (extid & 0x03);
+		if (tmp < 0x03)
+			mtd->erasesize = (128 * 1024) << tmp;
+		else if (tmp == 0x03)
+			mtd->erasesize = 768 * 1024;
+		else
+			mtd->erasesize = (64 * 1024) << tmp;
+	} else {
+		nand_decode_ext_id(chip);
+	}
+}
+
+static int hynix_nand_init(struct nand_chip *chip)
+{
+	if (!nand_is_slc(chip))
+		chip->bbt_options |= NAND_BBT_SCANLASTPAGE;
+	else
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops hynix_nand_manuf_ops = {
+	.detect = hynix_nand_decode_id,
+	.init = hynix_nand_init,
+};
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 8eba7dfe7c1f..7dc06a807d00 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -177,7 +177,7 @@ static const struct nand_manufacturer nand_manufacturers[] = {
 	{NAND_MFR_NATIONAL, "National"},
 	{NAND_MFR_RENESAS, "Renesas"},
 	{NAND_MFR_STMICRO, "ST Micro"},
-	{NAND_MFR_HYNIX, "Hynix"},
+	{NAND_MFR_HYNIX, "Hynix", &hynix_nand_manuf_ops},
 	{NAND_MFR_MICRON, "Micron"},
 	{NAND_MFR_AMD, "AMD/Spansion"},
 	{NAND_MFR_MACRONIX, "Macronix"},
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 2f83cb55392f..74e3a231cb56 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1115,6 +1115,7 @@ nand_manufacturer_name(const struct nand_manufacturer *manufacturer)
 extern struct nand_flash_dev nand_flash_ids[];
 
 extern const struct nand_manufacturer_ops samsung_nand_manuf_ops;
+extern const struct nand_manufacturer_ops hynix_nand_manuf_ops;
 
 int nand_default_bbt(struct mtd_info *mtd);
 int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
-- 
cgit v1.2.3


From 9b2d61f80b060ce3ea5af2a99e148b0b214932b2 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 8 Jun 2016 10:34:57 +0200
Subject: mtd: nand: Move Toshiba specific init/detection logic in
 nand_toshiba.c

Move Toshiba specific initialization and detection logic into
nand_toshiba.c. This is part of the "separate vendor specific code from
core" cleanup process.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/nand/Makefile       |  1 +
 drivers/mtd/nand/nand_base.c    | 19 ++-------------
 drivers/mtd/nand/nand_ids.c     |  2 +-
 drivers/mtd/nand/nand_toshiba.c | 51 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h        |  1 +
 5 files changed, 56 insertions(+), 18 deletions(-)
 create mode 100644 drivers/mtd/nand/nand_toshiba.c

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index ddb2670d9767..7c059822f479 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -63,3 +63,4 @@ obj-$(CONFIG_MTD_NAND_MTK)		+= mtk_nand.o mtk_ecc.o
 nand-objs := nand_base.o nand_bbt.o nand_timings.o nand_ids.o
 nand-objs += nand_hynix.o
 nand-objs += nand_samsung.o
+nand-objs += nand_toshiba.o
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index d65db5921a0f..36bca97900af 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3822,7 +3822,7 @@ static int nand_get_bits_per_cell(u8 cellinfo)
 void nand_decode_ext_id(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
-	int extid, id_len = chip->id.len;
+	int extid;
 	u8 *id_data = chip->id.data;
 	/* The 3rd id byte holds MLC / multichip data */
 	chip->bits_per_cell = nand_get_bits_per_cell(id_data[2]);
@@ -3841,20 +3841,6 @@ void nand_decode_ext_id(struct nand_chip *chip)
 	/* Get buswidth information */
 	if (extid & 0x1)
 		chip->options |= NAND_BUSWIDTH_16;
-
-	/*
-	 * Toshiba 24nm raw SLC (i.e., not BENAND) have 32B OOB per
-	 * 512B page. For Toshiba SLC, we decode the 5th/6th byte as
-	 * follows:
-	 * - ID byte 6, bits[2:0]: 100b -> 43nm, 101b -> 32nm,
-	 *                         110b -> 24nm
-	 * - ID byte 5, bit[7]:    1 -> BENAND, 0 -> raw SLC
-	 */
-	if (id_len >= 6 && id_data[0] == NAND_MFR_TOSHIBA &&
-	    nand_is_slc(chip) &&
-	    (id_data[5] & 0x7) == 0x6 /* 24nm */ &&
-	     !(id_data[4] & 0x80) /* !BENAND */)
-		mtd->oobsize = 32 * mtd->writesize >> 9;
 }
 EXPORT_SYMBOL_GPL(nand_decode_ext_id);
 
@@ -3914,8 +3900,7 @@ static void nand_decode_bbm_options(struct nand_chip *chip)
 	 * AMD/Spansion, and Macronix.  All others scan only the first page.
 	 */
 	if ((nand_is_slc(chip) &&
-	     (maf_id == NAND_MFR_TOSHIBA || maf_id == NAND_MFR_AMD ||
-	      maf_id == NAND_MFR_MACRONIX)) ||
+	     (maf_id == NAND_MFR_AMD || maf_id == NAND_MFR_MACRONIX)) ||
 	    (mtd->writesize == 2048 && maf_id == NAND_MFR_MICRON))
 		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
 }
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 7dc06a807d00..22ff89e9c478 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -170,7 +170,7 @@ struct nand_flash_dev nand_flash_ids[] = {
 
 /* Manufacturer IDs */
 static const struct nand_manufacturer nand_manufacturers[] = {
-	{NAND_MFR_TOSHIBA, "Toshiba"},
+	{NAND_MFR_TOSHIBA, "Toshiba", &toshiba_nand_manuf_ops},
 	{NAND_MFR_ESMT, "ESMT"},
 	{NAND_MFR_SAMSUNG, "Samsung", &samsung_nand_manuf_ops},
 	{NAND_MFR_FUJITSU, "Fujitsu"},
diff --git a/drivers/mtd/nand/nand_toshiba.c b/drivers/mtd/nand/nand_toshiba.c
new file mode 100644
index 000000000000..fa787ba38dcd
--- /dev/null
+++ b/drivers/mtd/nand/nand_toshiba.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+static void toshiba_nand_decode_id(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	nand_decode_ext_id(chip);
+
+	/*
+	 * Toshiba 24nm raw SLC (i.e., not BENAND) have 32B OOB per
+	 * 512B page. For Toshiba SLC, we decode the 5th/6th byte as
+	 * follows:
+	 * - ID byte 6, bits[2:0]: 100b -> 43nm, 101b -> 32nm,
+	 *                         110b -> 24nm
+	 * - ID byte 5, bit[7]:    1 -> BENAND, 0 -> raw SLC
+	 */
+	if (chip->id.len >= 6 && nand_is_slc(chip) &&
+	    (chip->id.data[5] & 0x7) == 0x6 /* 24nm */ &&
+	    !(chip->id.data[4] & 0x80) /* !BENAND */)
+		mtd->oobsize = 32 * mtd->writesize >> 9;
+}
+
+static int toshiba_nand_init(struct nand_chip *chip)
+{
+	if (nand_is_slc(chip))
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops toshiba_nand_manuf_ops = {
+	.detect = toshiba_nand_decode_id,
+	.init = toshiba_nand_init,
+};
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 74e3a231cb56..dd9e3b5ddd4f 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1114,6 +1114,7 @@ nand_manufacturer_name(const struct nand_manufacturer *manufacturer)
 
 extern struct nand_flash_dev nand_flash_ids[];
 
+extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops;
 extern const struct nand_manufacturer_ops samsung_nand_manuf_ops;
 extern const struct nand_manufacturer_ops hynix_nand_manuf_ops;
 
-- 
cgit v1.2.3


From 10d4e75c36f6c16311dde1461f318210da357219 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 8 Jun 2016 10:38:57 +0200
Subject: mtd: nand: Move Micron specific init logic in nand_micron.c

Move Micron specific initialization logic into nand_micron.c. This is
part of the "separate vendor specific code from core" cleanup process.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/nand/Makefile      |  1 +
 drivers/mtd/nand/nand_base.c   | 32 +---------------
 drivers/mtd/nand/nand_ids.c    |  2 +-
 drivers/mtd/nand/nand_micron.c | 86 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h       | 21 +----------
 5 files changed, 91 insertions(+), 51 deletions(-)
 create mode 100644 drivers/mtd/nand/nand_micron.c

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 7c059822f479..11d743155810 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -62,5 +62,6 @@ obj-$(CONFIG_MTD_NAND_MTK)		+= mtk_nand.o mtk_ecc.o
 
 nand-objs := nand_base.o nand_bbt.o nand_timings.o nand_ids.o
 nand-objs += nand_hynix.o
+nand-objs += nand_micron.o
 nand-objs += nand_samsung.o
 nand-objs += nand_toshiba.o
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 36bca97900af..f45e3bbe5f85 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3537,30 +3537,6 @@ ext_out:
 	return ret;
 }
 
-static int nand_setup_read_retry_micron(struct mtd_info *mtd, int retry_mode)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	uint8_t feature[ONFI_SUBFEATURE_PARAM_LEN] = {retry_mode};
-
-	return chip->onfi_set_features(mtd, chip, ONFI_FEATURE_ADDR_READ_RETRY,
-			feature);
-}
-
-/*
- * Configure chip properties from Micron vendor-specific ONFI table
- */
-static void nand_onfi_detect_micron(struct nand_chip *chip,
-		struct nand_onfi_params *p)
-{
-	struct nand_onfi_vendor_micron *micron = (void *)p->vendor;
-
-	if (le16_to_cpu(p->vendor_revision) < 1)
-		return;
-
-	chip->read_retries = micron->read_retry_options;
-	chip->setup_read_retry = nand_setup_read_retry_micron;
-}
-
 /*
  * Check if the NAND chip is ONFI compliant, returns 1 if it is, 0 otherwise.
  */
@@ -3660,9 +3636,6 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 		pr_warn("Could not retrieve ONFI ECC requirements\n");
 	}
 
-	if (p->jedec_id == NAND_MFR_MICRON)
-		nand_onfi_detect_micron(chip, p);
-
 	return 1;
 }
 
@@ -3899,9 +3872,8 @@ static void nand_decode_bbm_options(struct nand_chip *chip)
 	 * Micron devices with 2KiB pages and on SLC Samsung, Hynix, Toshiba,
 	 * AMD/Spansion, and Macronix.  All others scan only the first page.
 	 */
-	if ((nand_is_slc(chip) &&
-	     (maf_id == NAND_MFR_AMD || maf_id == NAND_MFR_MACRONIX)) ||
-	    (mtd->writesize == 2048 && maf_id == NAND_MFR_MICRON))
+	if (nand_is_slc(chip) &&
+	    (maf_id == NAND_MFR_AMD || maf_id == NAND_MFR_MACRONIX))
 		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
 }
 
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 22ff89e9c478..42c6743401c5 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -178,7 +178,7 @@ static const struct nand_manufacturer nand_manufacturers[] = {
 	{NAND_MFR_RENESAS, "Renesas"},
 	{NAND_MFR_STMICRO, "ST Micro"},
 	{NAND_MFR_HYNIX, "Hynix", &hynix_nand_manuf_ops},
-	{NAND_MFR_MICRON, "Micron"},
+	{NAND_MFR_MICRON, "Micron", &micron_nand_manuf_ops},
 	{NAND_MFR_AMD, "AMD/Spansion"},
 	{NAND_MFR_MACRONIX, "Macronix"},
 	{NAND_MFR_EON, "Eon"},
diff --git a/drivers/mtd/nand/nand_micron.c b/drivers/mtd/nand/nand_micron.c
new file mode 100644
index 000000000000..877011069251
--- /dev/null
+++ b/drivers/mtd/nand/nand_micron.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+struct nand_onfi_vendor_micron {
+	u8 two_plane_read;
+	u8 read_cache;
+	u8 read_unique_id;
+	u8 dq_imped;
+	u8 dq_imped_num_settings;
+	u8 dq_imped_feat_addr;
+	u8 rb_pulldown_strength;
+	u8 rb_pulldown_strength_feat_addr;
+	u8 rb_pulldown_strength_num_settings;
+	u8 otp_mode;
+	u8 otp_page_start;
+	u8 otp_data_prot_addr;
+	u8 otp_num_pages;
+	u8 otp_feat_addr;
+	u8 read_retry_options;
+	u8 reserved[72];
+	u8 param_revision;
+} __packed;
+
+static int micron_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	u8 feature[ONFI_SUBFEATURE_PARAM_LEN] = {retry_mode};
+
+	return chip->onfi_set_features(mtd, chip, ONFI_FEATURE_ADDR_READ_RETRY,
+				       feature);
+}
+
+/*
+ * Configure chip properties from Micron vendor-specific ONFI table
+ */
+static int micron_nand_onfi_init(struct nand_chip *chip)
+{
+	struct nand_onfi_params *p = &chip->onfi_params;
+	struct nand_onfi_vendor_micron *micron = (void *)p->vendor;
+
+	if (!chip->onfi_version)
+		return 0;
+
+	if (le16_to_cpu(p->vendor_revision) < 1)
+		return 0;
+
+	chip->read_retries = micron->read_retry_options;
+	chip->setup_read_retry = micron_nand_setup_read_retry;
+
+	return 0;
+}
+
+static int micron_nand_init(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	int ret;
+
+	ret = micron_nand_onfi_init(chip);
+	if (ret)
+		return ret;
+
+	if (mtd->writesize == 2048)
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops micron_nand_manuf_ops = {
+	.init = micron_nand_init,
+};
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index dd9e3b5ddd4f..7d0f18ecbf57 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -366,26 +366,6 @@ struct onfi_ext_param_page {
 	 */
 } __packed;
 
-struct nand_onfi_vendor_micron {
-	u8 two_plane_read;
-	u8 read_cache;
-	u8 read_unique_id;
-	u8 dq_imped;
-	u8 dq_imped_num_settings;
-	u8 dq_imped_feat_addr;
-	u8 rb_pulldown_strength;
-	u8 rb_pulldown_strength_feat_addr;
-	u8 rb_pulldown_strength_num_settings;
-	u8 otp_mode;
-	u8 otp_page_start;
-	u8 otp_data_prot_addr;
-	u8 otp_num_pages;
-	u8 otp_feat_addr;
-	u8 read_retry_options;
-	u8 reserved[72];
-	u8 param_revision;
-} __packed;
-
 struct jedec_ecc_info {
 	u8 ecc_bits;
 	u8 codeword_size;
@@ -1117,6 +1097,7 @@ extern struct nand_flash_dev nand_flash_ids[];
 extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops;
 extern const struct nand_manufacturer_ops samsung_nand_manuf_ops;
 extern const struct nand_manufacturer_ops hynix_nand_manuf_ops;
+extern const struct nand_manufacturer_ops micron_nand_manuf_ops;
 
 int nand_default_bbt(struct mtd_info *mtd);
 int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
-- 
cgit v1.2.3


From 229204da53b31d576fcc1c93a33626943ea8202c Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 8 Jun 2016 10:42:23 +0200
Subject: mtd: nand: Move AMD/Spansion specific init/detection logic in
 nand_amd.c

Move AMD/Spansion specific initialization/detection logic into
nand_amd.c. This is part of the "separate vendor specific code from
core" cleanup process.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/nand/Makefile    |  1 +
 drivers/mtd/nand/nand_amd.c  | 51 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/mtd/nand/nand_base.c | 18 +---------------
 drivers/mtd/nand/nand_ids.c  |  2 +-
 include/linux/mtd/nand.h     |  1 +
 5 files changed, 55 insertions(+), 18 deletions(-)
 create mode 100644 drivers/mtd/nand/nand_amd.c

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 11d743155810..f48ddcc132bc 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_MTD_NAND_QCOM)		+= qcom_nandc.o
 obj-$(CONFIG_MTD_NAND_MTK)		+= mtk_nand.o mtk_ecc.o
 
 nand-objs := nand_base.o nand_bbt.o nand_timings.o nand_ids.o
+nand-objs += nand_amd.o
 nand-objs += nand_hynix.o
 nand-objs += nand_micron.o
 nand-objs += nand_samsung.o
diff --git a/drivers/mtd/nand/nand_amd.c b/drivers/mtd/nand/nand_amd.c
new file mode 100644
index 000000000000..170403a3bfa8
--- /dev/null
+++ b/drivers/mtd/nand/nand_amd.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+static void amd_nand_decode_id(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	nand_decode_ext_id(chip);
+
+	/*
+	 * Check for Spansion/AMD ID + repeating 5th, 6th byte since
+	 * some Spansion chips have erasesize that conflicts with size
+	 * listed in nand_ids table.
+	 * Data sheet (5 byte ID): Spansion S30ML-P ORNAND (p.39)
+	 */
+	if (chip->id.data[4] != 0x00 && chip->id.data[5] == 0x00 &&
+	    chip->id.data[6] == 0x00 && chip->id.data[7] == 0x00 &&
+	    mtd->writesize == 512) {
+		mtd->erasesize = 128 * 1024;
+		mtd->erasesize <<= ((chip->id.data[3] & 0x03) << 1);
+	}
+}
+
+static int amd_nand_init(struct nand_chip *chip)
+{
+	if (nand_is_slc(chip))
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops amd_nand_manuf_ops = {
+	.detect = amd_nand_decode_id,
+	.init = amd_nand_init,
+};
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index f45e3bbe5f85..a9060bcb9eb5 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3825,8 +3825,6 @@ EXPORT_SYMBOL_GPL(nand_decode_ext_id);
 static void nand_decode_id(struct nand_chip *chip, struct nand_flash_dev *type)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
-	u8 *id_data = chip->id.data;
-	int maf_id = id_data[0];
 
 	mtd->erasesize = type->erasesize;
 	mtd->writesize = type->pagesize;
@@ -3834,19 +3832,6 @@ static void nand_decode_id(struct nand_chip *chip, struct nand_flash_dev *type)
 
 	/* All legacy ID NAND are small-page, SLC */
 	chip->bits_per_cell = 1;
-
-	/*
-	 * Check for Spansion/AMD ID + repeating 5th, 6th byte since
-	 * some Spansion chips have erasesize that conflicts with size
-	 * listed in nand_ids table.
-	 * Data sheet (5 byte ID): Spansion S30ML-P ORNAND (p.39)
-	 */
-	if (maf_id == NAND_MFR_AMD && id_data[4] != 0x00 && id_data[5] == 0x00
-			&& id_data[6] == 0x00 && id_data[7] == 0x00
-			&& mtd->writesize == 512) {
-		mtd->erasesize = 128 * 1024;
-		mtd->erasesize <<= ((id_data[3] & 0x03) << 1);
-	}
 }
 
 /*
@@ -3872,8 +3857,7 @@ static void nand_decode_bbm_options(struct nand_chip *chip)
 	 * Micron devices with 2KiB pages and on SLC Samsung, Hynix, Toshiba,
 	 * AMD/Spansion, and Macronix.  All others scan only the first page.
 	 */
-	if (nand_is_slc(chip) &&
-	    (maf_id == NAND_MFR_AMD || maf_id == NAND_MFR_MACRONIX))
+	if (nand_is_slc(chip) && maf_id == NAND_MFR_MACRONIX)
 		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
 }
 
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 42c6743401c5..af7b92e08fbd 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -179,7 +179,7 @@ static const struct nand_manufacturer nand_manufacturers[] = {
 	{NAND_MFR_STMICRO, "ST Micro"},
 	{NAND_MFR_HYNIX, "Hynix", &hynix_nand_manuf_ops},
 	{NAND_MFR_MICRON, "Micron", &micron_nand_manuf_ops},
-	{NAND_MFR_AMD, "AMD/Spansion"},
+	{NAND_MFR_AMD, "AMD/Spansion", &amd_nand_manuf_ops},
 	{NAND_MFR_MACRONIX, "Macronix"},
 	{NAND_MFR_EON, "Eon"},
 	{NAND_MFR_SANDISK, "SanDisk"},
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7d0f18ecbf57..97dce42778e9 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1098,6 +1098,7 @@ extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops;
 extern const struct nand_manufacturer_ops samsung_nand_manuf_ops;
 extern const struct nand_manufacturer_ops hynix_nand_manuf_ops;
 extern const struct nand_manufacturer_ops micron_nand_manuf_ops;
+extern const struct nand_manufacturer_ops amd_nand_manuf_ops;
 
 int nand_default_bbt(struct mtd_info *mtd);
 int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
-- 
cgit v1.2.3


From 3b5206f4be9b65d2f0f85b3239cf117a1d0de7ce Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 8 Jun 2016 10:43:26 +0200
Subject: mtd: nand: Move Macronix specific initialization in nand_macronix.c

Move Macronix specific initialization logic into nand_macronix.c. This
is part of the "separate vendor specific code from core" cleanup
process.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/Makefile        |  1 +
 drivers/mtd/nand/nand_base.c     | 11 -----------
 drivers/mtd/nand/nand_ids.c      |  2 +-
 drivers/mtd/nand/nand_macronix.c | 30 ++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h         |  1 +
 5 files changed, 33 insertions(+), 12 deletions(-)
 create mode 100644 drivers/mtd/nand/nand_macronix.c

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index f48ddcc132bc..098b8791f10a 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_MTD_NAND_MTK)		+= mtk_nand.o mtk_ecc.o
 nand-objs := nand_base.o nand_bbt.o nand_timings.o nand_ids.o
 nand-objs += nand_amd.o
 nand-objs += nand_hynix.o
+nand-objs += nand_macronix.o
 nand-objs += nand_micron.o
 nand-objs += nand_samsung.o
 nand-objs += nand_toshiba.o
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index a9060bcb9eb5..685376d8ceab 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3842,23 +3842,12 @@ static void nand_decode_id(struct nand_chip *chip, struct nand_flash_dev *type)
 static void nand_decode_bbm_options(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
-	u8 *id_data = chip->id.data;
-	int maf_id = id_data[0];
 
 	/* Set the bad block position */
 	if (mtd->writesize > 512 || (chip->options & NAND_BUSWIDTH_16))
 		chip->badblockpos = NAND_LARGE_BADBLOCK_POS;
 	else
 		chip->badblockpos = NAND_SMALL_BADBLOCK_POS;
-
-	/*
-	 * Bad block marker is stored in the last page of each block on Samsung
-	 * and Hynix MLC devices; stored in first two pages of each block on
-	 * Micron devices with 2KiB pages and on SLC Samsung, Hynix, Toshiba,
-	 * AMD/Spansion, and Macronix.  All others scan only the first page.
-	 */
-	if (nand_is_slc(chip) && maf_id == NAND_MFR_MACRONIX)
-		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
 }
 
 static inline bool is_full_id_nand(struct nand_flash_dev *type)
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index af7b92e08fbd..9d5ca0e540b5 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -180,7 +180,7 @@ static const struct nand_manufacturer nand_manufacturers[] = {
 	{NAND_MFR_HYNIX, "Hynix", &hynix_nand_manuf_ops},
 	{NAND_MFR_MICRON, "Micron", &micron_nand_manuf_ops},
 	{NAND_MFR_AMD, "AMD/Spansion", &amd_nand_manuf_ops},
-	{NAND_MFR_MACRONIX, "Macronix"},
+	{NAND_MFR_MACRONIX, "Macronix", &macronix_nand_manuf_ops},
 	{NAND_MFR_EON, "Eon"},
 	{NAND_MFR_SANDISK, "SanDisk"},
 	{NAND_MFR_INTEL, "Intel"},
diff --git a/drivers/mtd/nand/nand_macronix.c b/drivers/mtd/nand/nand_macronix.c
new file mode 100644
index 000000000000..84855c3e1a02
--- /dev/null
+++ b/drivers/mtd/nand/nand_macronix.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Copyright (C) 2017 NextThing Co
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mtd/nand.h>
+
+static int macronix_nand_init(struct nand_chip *chip)
+{
+	if (nand_is_slc(chip))
+		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
+
+	return 0;
+}
+
+const struct nand_manufacturer_ops macronix_nand_manuf_ops = {
+	.init = macronix_nand_init,
+};
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 97dce42778e9..c7de017c7f4c 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1099,6 +1099,7 @@ extern const struct nand_manufacturer_ops samsung_nand_manuf_ops;
 extern const struct nand_manufacturer_ops hynix_nand_manuf_ops;
 extern const struct nand_manufacturer_ops micron_nand_manuf_ops;
 extern const struct nand_manufacturer_ops amd_nand_manuf_ops;
+extern const struct nand_manufacturer_ops macronix_nand_manuf_ops;
 
 int nand_default_bbt(struct mtd_info *mtd);
 int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
-- 
cgit v1.2.3


From 8fc82d456e40a073d7e85d7825a345698f4b5a40 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 8 Jun 2016 10:45:28 +0200
Subject: mtd: nand: samsung: Retrieve ECC requirements from extended ID

On some nand controllers with hw-ecc the controller code wants to know
the ecc strength and size and having these as 0, 0 is not accepted.

Specifying these in devicetree is possible but undesirable as the nand
may be different in different production runs of the same board, so it
is better to get this info from the nand id where possible.

This commit adds code to read the ecc strength and size from the nand
for Samsung extended-id nands. This code is based on the info for the 5th
id byte in the datasheets for the following Samsung nands: K9GAG08U0E,
K9GAG08U0F, K9GAG08X0D, K9GBG08U0A, K9GBG08U0B. These all use these bits
in the exact same way.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_samsung.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/mtd/nand/nand_samsung.c b/drivers/mtd/nand/nand_samsung.c
index 5c259dd5b652..9cfc4035a420 100644
--- a/drivers/mtd/nand/nand_samsung.c
+++ b/drivers/mtd/nand/nand_samsung.c
@@ -66,6 +66,26 @@ static void samsung_nand_decode_id(struct nand_chip *chip)
 		extid >>= 2;
 		mtd->erasesize = (128 * 1024) <<
 				 (((extid >> 1) & 0x04) | (extid & 0x03));
+
+		/* Extract ECC requirements from 5th id byte*/
+		extid = (chip->id.data[4] >> 4) & 0x07;
+		if (extid < 5) {
+			chip->ecc_step_ds = 512;
+			chip->ecc_strength_ds = 1 << extid;
+		} else {
+			chip->ecc_step_ds = 1024;
+			switch (extid) {
+			case 5:
+				chip->ecc_strength_ds = 24;
+				break;
+			case 6:
+				chip->ecc_strength_ds = 40;
+				break;
+			case 7:
+				chip->ecc_strength_ds = 60;
+				break;
+			}
+		}
 	} else {
 		nand_decode_ext_id(chip);
 	}
-- 
cgit v1.2.3


From 78f3482d74809f278614470fb68536db16152ec8 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Fri, 27 May 2016 14:36:36 +0200
Subject: mtd: nand: hynix: Rework NAND ID decoding to extract more information

The current NAND ID detection in nand_hynix.c is not handling the
different scheme used by Hynix, thus forcing developers to add new
entry to the nand_ids table each time they want to support a new MLC
NAND.

Enhance the detection logic to handle all known formats. This does not
necessarily mean we are handling all the cases, but if new formats are
discovered, the code should evolve to take them into account instead of
adding more full-id entries to the nand_ids table.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/nand/nand_hynix.c | 228 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 209 insertions(+), 19 deletions(-)

diff --git a/drivers/mtd/nand/nand_hynix.c b/drivers/mtd/nand/nand_hynix.c
index fbd661688158..57c4ed894dfb 100644
--- a/drivers/mtd/nand/nand_hynix.c
+++ b/drivers/mtd/nand/nand_hynix.c
@@ -16,21 +16,56 @@
  */
 
 #include <linux/mtd/nand.h>
+#include <linux/sizes.h>
 
-static void hynix_nand_decode_id(struct nand_chip *chip)
+static bool hynix_nand_has_valid_jedecid(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
+	u8 jedecid[6] = { };
+	int i = 0;
+
+	chip->cmdfunc(mtd, NAND_CMD_READID, 0x40, -1);
+	for (i = 0; i < 5; i++)
+		jedecid[i] = chip->read_byte(mtd);
 
-	/* Hynix MLC   (6 byte ID): Hynix H27UBG8T2B (p.22) */
-	if (chip->id.len == 6 && !nand_is_slc(chip)) {
-		u8 tmp, extid = chip->id.data[3];
+	return !strcmp("JEDEC", jedecid);
+}
 
-		/* Extract pagesize */
-		mtd->writesize = 2048 << (extid & 0x03);
-		extid >>= 2;
+static void hynix_nand_extract_oobsize(struct nand_chip *chip,
+				       bool valid_jedecid)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	u8 oobsize;
 
-		/* Extract oobsize */
-		switch (((extid >> 2) & 0x4) | (extid & 0x3)) {
+	oobsize = ((chip->id.data[3] >> 2) & 0x3) |
+		  ((chip->id.data[3] >> 4) & 0x4);
+
+	if (valid_jedecid) {
+		switch (oobsize) {
+		case 0:
+			mtd->oobsize = 2048;
+			break;
+		case 1:
+			mtd->oobsize = 1664;
+			break;
+		case 2:
+			mtd->oobsize = 1024;
+			break;
+		case 3:
+			mtd->oobsize = 640;
+			break;
+		default:
+			/*
+			 * We should never reach this case, but if that
+			 * happens, this probably means Hynix decided to use
+			 * a different extended ID format, and we should find
+			 * a way to support it.
+			 */
+			WARN(1, "Invalid OOB size");
+			break;
+		}
+	} else {
+		switch (oobsize) {
 		case 0:
 			mtd->oobsize = 128;
 			break;
@@ -49,23 +84,178 @@ static void hynix_nand_decode_id(struct nand_chip *chip)
 		case 5:
 			mtd->oobsize = 16;
 			break;
-		default:
+		case 6:
 			mtd->oobsize = 640;
 			break;
+		default:
+			/*
+			 * We should never reach this case, but if that
+			 * happens, this probably means Hynix decided to use
+			 * a different extended ID format, and we should find
+			 * a way to support it.
+			 */
+			WARN(1, "Invalid OOB size");
+			break;
 		}
+	}
+}
+
+static void hynix_nand_extract_ecc_requirements(struct nand_chip *chip,
+						bool valid_jedecid)
+{
+	u8 ecc_level = (chip->id.data[4] >> 4) & 0x7;
+
+	if (valid_jedecid) {
+		/* Reference: H27UCG8T2E datasheet */
+		chip->ecc_step_ds = 1024;
 
-		/* Extract blocksize */
-		extid >>= 2;
-		tmp = ((extid >> 1) & 0x04) | (extid & 0x03);
-		if (tmp < 0x03)
-			mtd->erasesize = (128 * 1024) << tmp;
-		else if (tmp == 0x03)
-			mtd->erasesize = 768 * 1024;
-		else
-			mtd->erasesize = (64 * 1024) << tmp;
+		switch (ecc_level) {
+		case 0:
+			chip->ecc_step_ds = 0;
+			chip->ecc_strength_ds = 0;
+			break;
+		case 1:
+			chip->ecc_strength_ds = 4;
+			break;
+		case 2:
+			chip->ecc_strength_ds = 24;
+			break;
+		case 3:
+			chip->ecc_strength_ds = 32;
+			break;
+		case 4:
+			chip->ecc_strength_ds = 40;
+			break;
+		case 5:
+			chip->ecc_strength_ds = 50;
+			break;
+		case 6:
+			chip->ecc_strength_ds = 60;
+			break;
+		default:
+			/*
+			 * We should never reach this case, but if that
+			 * happens, this probably means Hynix decided to use
+			 * a different extended ID format, and we should find
+			 * a way to support it.
+			 */
+			WARN(1, "Invalid ECC requirements");
+		}
+	} else {
+		/*
+		 * The ECC requirements field meaning depends on the
+		 * NAND technology.
+		 */
+		u8 nand_tech = chip->id.data[5] & 0x3;
+
+		if (nand_tech < 3) {
+			/* > 26nm, reference: H27UBG8T2A datasheet */
+			if (ecc_level < 5) {
+				chip->ecc_step_ds = 512;
+				chip->ecc_strength_ds = 1 << ecc_level;
+			} else if (ecc_level < 7) {
+				if (ecc_level == 5)
+					chip->ecc_step_ds = 2048;
+				else
+					chip->ecc_step_ds = 1024;
+				chip->ecc_strength_ds = 24;
+			} else {
+				/*
+				 * We should never reach this case, but if that
+				 * happens, this probably means Hynix decided
+				 * to use a different extended ID format, and
+				 * we should find a way to support it.
+				 */
+				WARN(1, "Invalid ECC requirements");
+			}
+		} else {
+			/* <= 26nm, reference: H27UBG8T2B datasheet */
+			if (!ecc_level) {
+				chip->ecc_step_ds = 0;
+				chip->ecc_strength_ds = 0;
+			} else if (ecc_level < 5) {
+				chip->ecc_step_ds = 512;
+				chip->ecc_strength_ds = 1 << (ecc_level - 1);
+			} else {
+				chip->ecc_step_ds = 1024;
+				chip->ecc_strength_ds = 24 +
+							(8 * (ecc_level - 5));
+			}
+		}
+	}
+}
+
+static void hynix_nand_extract_scrambling_requirements(struct nand_chip *chip,
+						       bool valid_jedecid)
+{
+	u8 nand_tech;
+
+	/* We need scrambling on all TLC NANDs*/
+	if (chip->bits_per_cell > 2)
+		chip->options |= NAND_NEED_SCRAMBLING;
+
+	/* And on MLC NANDs with sub-3xnm process */
+	if (valid_jedecid) {
+		nand_tech = chip->id.data[5] >> 4;
+
+		/* < 3xnm */
+		if (nand_tech > 0)
+			chip->options |= NAND_NEED_SCRAMBLING;
 	} else {
+		nand_tech = chip->id.data[5] & 0x3;
+
+		/* < 32nm */
+		if (nand_tech > 2)
+			chip->options |= NAND_NEED_SCRAMBLING;
+	}
+}
+
+static void hynix_nand_decode_id(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	bool valid_jedecid;
+	u8 tmp;
+
+	/*
+	 * Exclude all SLC NANDs from this advanced detection scheme.
+	 * According to the ranges defined in several datasheets, it might
+	 * appear that even SLC NANDs could fall in this extended ID scheme.
+	 * If that the case rework the test to let SLC NANDs go through the
+	 * detection process.
+	 */
+	if (chip->id.len < 6 || nand_is_slc(chip)) {
 		nand_decode_ext_id(chip);
+		return;
 	}
+
+	/* Extract pagesize */
+	mtd->writesize = 2048 << (chip->id.data[3] & 0x03);
+
+	tmp = (chip->id.data[3] >> 4) & 0x3;
+	/*
+	 * When bit7 is set that means we start counting at 1MiB, otherwise
+	 * we start counting at 128KiB and shift this value the content of
+	 * ID[3][4:5].
+	 * The only exception is when ID[3][4:5] == 3 and ID[3][7] == 0, in
+	 * this case the erasesize is set to 768KiB.
+	 */
+	if (chip->id.data[3] & 0x80)
+		mtd->erasesize = SZ_1M << tmp;
+	else if (tmp == 3)
+		mtd->erasesize = SZ_512K + SZ_256K;
+	else
+		mtd->erasesize = SZ_128K << tmp;
+
+	/*
+	 * Modern Toggle DDR NANDs have a valid JEDECID even though they are
+	 * not exposing a valid JEDEC parameter table.
+	 * These NANDs use a different NAND ID scheme.
+	 */
+	valid_jedecid = hynix_nand_has_valid_jedecid(chip);
+
+	hynix_nand_extract_oobsize(chip, valid_jedecid);
+	hynix_nand_extract_ecc_requirements(chip, valid_jedecid);
+	hynix_nand_extract_scrambling_requirements(chip, valid_jedecid);
 }
 
 static int hynix_nand_init(struct nand_chip *chip)
-- 
cgit v1.2.3


From 626994e0748019f9987ac520f1dcfd0adb7e34c6 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Fri, 27 May 2016 10:15:03 +0200
Subject: mtd: nand: hynix: Add read-retry support for 1x nm MLC NANDs

All Hynix MLC NANDs produced with the 1x nm process support read-retry.
This read retry implementation should also be re-usable for other Hynix
NANDs, but the method to retrieve the read-retry parameters from the
read-retry OTP area might change a bit (some NANDs are even using a fixed
set of values instead of retrieving those information from the OTP area).

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/nand/nand_hynix.c | 357 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 356 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/nand_hynix.c b/drivers/mtd/nand/nand_hynix.c
index 57c4ed894dfb..2a5d0efea498 100644
--- a/drivers/mtd/nand/nand_hynix.c
+++ b/drivers/mtd/nand/nand_hynix.c
@@ -17,6 +17,53 @@
 
 #include <linux/mtd/nand.h>
 #include <linux/sizes.h>
+#include <linux/slab.h>
+
+#define NAND_HYNIX_CMD_SET_PARAMS	0x36
+#define NAND_HYNIX_CMD_APPLY_PARAMS	0x16
+
+#define NAND_HYNIX_1XNM_RR_REPEAT	8
+
+/**
+ * struct hynix_read_retry - read-retry data
+ * @nregs: number of register to set when applying a new read-retry mode
+ * @regs: register offsets (NAND chip dependent)
+ * @values: array of values to set in registers. The array size is equal to
+ *	    (nregs * nmodes)
+ */
+struct hynix_read_retry {
+	int nregs;
+	const u8 *regs;
+	u8 values[0];
+};
+
+/**
+ * struct hynix_nand - private Hynix NAND struct
+ * @nand_technology: manufacturing process expressed in picometer
+ * @read_retry: read-retry information
+ */
+struct hynix_nand {
+	const struct hynix_read_retry *read_retry;
+};
+
+/**
+ * struct hynix_read_retry_otp - structure describing how the read-retry OTP
+ *				 area
+ * @nregs: number of hynix private registers to set before reading the reading
+ *	   the OTP area
+ * @regs: registers that should be configured
+ * @values: values that should be set in regs
+ * @page: the address to pass to the READ_PAGE command. Depends on the NAND
+ *	  chip
+ * @size: size of the read-retry OTP section
+ */
+struct hynix_read_retry_otp {
+	int nregs;
+	const u8 *regs;
+	const u8 *values;
+	int page;
+	int size;
+};
 
 static bool hynix_nand_has_valid_jedecid(struct nand_chip *chip)
 {
@@ -31,6 +78,288 @@ static bool hynix_nand_has_valid_jedecid(struct nand_chip *chip)
 	return !strcmp("JEDEC", jedecid);
 }
 
+static int hynix_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct hynix_nand *hynix = nand_get_manufacturer_data(chip);
+	const u8 *values;
+	int status;
+	int i;
+
+	values = hynix->read_retry->values +
+		 (retry_mode * hynix->read_retry->nregs);
+
+	/* Enter 'Set Hynix Parameters' mode */
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_SET_PARAMS, -1, -1);
+
+	/*
+	 * Configure the NAND in the requested read-retry mode.
+	 * This is done by setting pre-defined values in internal NAND
+	 * registers.
+	 *
+	 * The set of registers is NAND specific, and the values are either
+	 * predefined or extracted from an OTP area on the NAND (values are
+	 * probably tweaked at production in this case).
+	 */
+	for (i = 0; i < hynix->read_retry->nregs; i++) {
+		int column = hynix->read_retry->regs[i];
+
+		column |= column << 8;
+		chip->cmdfunc(mtd, NAND_CMD_NONE, column, -1);
+		chip->write_byte(mtd, values[i]);
+	}
+
+	/* Apply the new settings. */
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_APPLY_PARAMS, -1, -1);
+
+	status = chip->waitfunc(mtd, chip);
+	if (status & NAND_STATUS_FAIL)
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * hynix_get_majority - get the value that is occurring the most in a given
+ *			set of values
+ * @in: the array of values to test
+ * @repeat: the size of the in array
+ * @out: pointer used to store the output value
+ *
+ * This function implements the 'majority check' logic that is supposed to
+ * overcome the unreliability of MLC NANDs when reading the OTP area storing
+ * the read-retry parameters.
+ *
+ * It's based on a pretty simple assumption: if we repeat the same value
+ * several times and then take the one that is occurring the most, we should
+ * find the correct value.
+ * Let's hope this dummy algorithm prevents us from losing the read-retry
+ * parameters.
+ */
+static int hynix_get_majority(const u8 *in, int repeat, u8 *out)
+{
+	int i, j, half = repeat / 2;
+
+	/*
+	 * We only test the first half of the in array because we must ensure
+	 * that the value is at least occurring repeat / 2 times.
+	 *
+	 * This loop is suboptimal since we may count the occurrences of the
+	 * same value several time, but we are doing that on small sets, which
+	 * makes it acceptable.
+	 */
+	for (i = 0; i < half; i++) {
+		int cnt = 0;
+		u8 val = in[i];
+
+		/* Count all values that are matching the one at index i. */
+		for (j = i + 1; j < repeat; j++) {
+			if (in[j] == val)
+				cnt++;
+		}
+
+		/* We found a value occurring more than repeat / 2. */
+		if (cnt > half) {
+			*out = val;
+			return 0;
+		}
+	}
+
+	return -EIO;
+}
+
+static int hynix_read_rr_otp(struct nand_chip *chip,
+			     const struct hynix_read_retry_otp *info,
+			     void *buf)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	int i;
+
+	chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_SET_PARAMS, -1, -1);
+
+	for (i = 0; i < info->nregs; i++) {
+		int column = info->regs[i];
+
+		column |= column << 8;
+		chip->cmdfunc(mtd, NAND_CMD_NONE, column, -1);
+		chip->write_byte(mtd, info->values[i]);
+	}
+
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_APPLY_PARAMS, -1, -1);
+
+	/* Sequence to enter OTP mode? */
+	chip->cmdfunc(mtd, 0x17, -1, -1);
+	chip->cmdfunc(mtd, 0x04, -1, -1);
+	chip->cmdfunc(mtd, 0x19, -1, -1);
+
+	/* Now read the page */
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x0, info->page);
+	chip->read_buf(mtd, buf, info->size);
+
+	/* Put everything back to normal */
+	chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_SET_PARAMS, 0x38, -1);
+	chip->write_byte(mtd, 0x0);
+	chip->cmdfunc(mtd, NAND_HYNIX_CMD_APPLY_PARAMS, -1, -1);
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x0, -1);
+
+	return 0;
+}
+
+#define NAND_HYNIX_1XNM_RR_COUNT_OFFS				0
+#define NAND_HYNIX_1XNM_RR_REG_COUNT_OFFS			8
+#define NAND_HYNIX_1XNM_RR_SET_OFFS(x, setsize, inv)		\
+	(16 + ((((x) * 2) + ((inv) ? 1 : 0)) * (setsize)))
+
+static int hynix_mlc_1xnm_rr_value(const u8 *buf, int nmodes, int nregs,
+				   int mode, int reg, bool inv, u8 *val)
+{
+	u8 tmp[NAND_HYNIX_1XNM_RR_REPEAT];
+	int val_offs = (mode * nregs) + reg;
+	int set_size = nmodes * nregs;
+	int i, ret;
+
+	for (i = 0; i < NAND_HYNIX_1XNM_RR_REPEAT; i++) {
+		int set_offs = NAND_HYNIX_1XNM_RR_SET_OFFS(i, set_size, inv);
+
+		tmp[i] = buf[val_offs + set_offs];
+	}
+
+	ret = hynix_get_majority(tmp, NAND_HYNIX_1XNM_RR_REPEAT, val);
+	if (ret)
+		return ret;
+
+	if (inv)
+		*val = ~*val;
+
+	return 0;
+}
+
+static u8 hynix_1xnm_mlc_read_retry_regs[] = {
+	0xcc, 0xbf, 0xaa, 0xab, 0xcd, 0xad, 0xae, 0xaf
+};
+
+static int hynix_mlc_1xnm_rr_init(struct nand_chip *chip,
+				  const struct hynix_read_retry_otp *info)
+{
+	struct hynix_nand *hynix = nand_get_manufacturer_data(chip);
+	struct hynix_read_retry *rr = NULL;
+	int ret, i, j;
+	u8 nregs, nmodes;
+	u8 *buf;
+
+	buf = kmalloc(info->size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = hynix_read_rr_otp(chip, info, buf);
+	if (ret)
+		goto out;
+
+	ret = hynix_get_majority(buf, NAND_HYNIX_1XNM_RR_REPEAT,
+				 &nmodes);
+	if (ret)
+		goto out;
+
+	ret = hynix_get_majority(buf + NAND_HYNIX_1XNM_RR_REPEAT,
+				 NAND_HYNIX_1XNM_RR_REPEAT,
+				 &nregs);
+	if (ret)
+		goto out;
+
+	rr = kzalloc(sizeof(*rr) + (nregs * nmodes), GFP_KERNEL);
+	if (!rr)
+		goto out;
+
+	for (i = 0; i < nmodes; i++) {
+		for (j = 0; j < nregs; j++) {
+			u8 *val = rr->values + (i * nregs);
+
+			ret = hynix_mlc_1xnm_rr_value(buf, nmodes, nregs, i, j,
+						      false, val);
+			if (!ret)
+				continue;
+
+			ret = hynix_mlc_1xnm_rr_value(buf, nmodes, nregs, i, j,
+						      true, val);
+			if (ret)
+				goto out;
+		}
+	}
+
+	rr->nregs = nregs;
+	rr->regs = hynix_1xnm_mlc_read_retry_regs;
+	hynix->read_retry = rr;
+	chip->setup_read_retry = hynix_nand_setup_read_retry;
+	chip->read_retries = nmodes;
+
+out:
+	kfree(buf);
+
+	if (ret)
+		kfree(rr);
+
+	return ret;
+}
+
+static const u8 hynix_mlc_1xnm_rr_otp_regs[] = { 0x38 };
+static const u8 hynix_mlc_1xnm_rr_otp_values[] = { 0x52 };
+
+static const struct hynix_read_retry_otp hynix_mlc_1xnm_rr_otps[] = {
+	{
+		.nregs = ARRAY_SIZE(hynix_mlc_1xnm_rr_otp_regs),
+		.regs = hynix_mlc_1xnm_rr_otp_regs,
+		.values = hynix_mlc_1xnm_rr_otp_values,
+		.page = 0x21f,
+		.size = 784
+	},
+	{
+		.nregs = ARRAY_SIZE(hynix_mlc_1xnm_rr_otp_regs),
+		.regs = hynix_mlc_1xnm_rr_otp_regs,
+		.values = hynix_mlc_1xnm_rr_otp_values,
+		.page = 0x200,
+		.size = 528,
+	},
+};
+
+static int hynix_nand_rr_init(struct nand_chip *chip)
+{
+	int i, ret = 0;
+	bool valid_jedecid;
+
+	valid_jedecid = hynix_nand_has_valid_jedecid(chip);
+
+	/*
+	 * We only support read-retry for 1xnm NANDs, and those NANDs all
+	 * expose a valid JEDEC ID.
+	 */
+	if (valid_jedecid) {
+		u8 nand_tech = chip->id.data[5] >> 4;
+
+		/* 1xnm technology */
+		if (nand_tech == 4) {
+			for (i = 0; i < ARRAY_SIZE(hynix_mlc_1xnm_rr_otps);
+			     i++) {
+				/*
+				 * FIXME: Hynix recommend to copy the
+				 * read-retry OTP area into a normal page.
+				 */
+				ret = hynix_mlc_1xnm_rr_init(chip,
+						hynix_mlc_1xnm_rr_otps);
+				if (!ret)
+					break;
+			}
+		}
+	}
+
+	if (ret)
+		pr_warn("failed to initialize read-retry infrastructure");
+
+	return 0;
+}
+
 static void hynix_nand_extract_oobsize(struct nand_chip *chip,
 				       bool valid_jedecid)
 {
@@ -258,17 +587,43 @@ static void hynix_nand_decode_id(struct nand_chip *chip)
 	hynix_nand_extract_scrambling_requirements(chip, valid_jedecid);
 }
 
+static void hynix_nand_cleanup(struct nand_chip *chip)
+{
+	struct hynix_nand *hynix = nand_get_manufacturer_data(chip);
+
+	if (!hynix)
+		return;
+
+	kfree(hynix->read_retry);
+	kfree(hynix);
+	nand_set_manufacturer_data(chip, NULL);
+}
+
 static int hynix_nand_init(struct nand_chip *chip)
 {
+	struct hynix_nand *hynix;
+	int ret;
+
 	if (!nand_is_slc(chip))
 		chip->bbt_options |= NAND_BBT_SCANLASTPAGE;
 	else
 		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
 
-	return 0;
+	hynix = kzalloc(sizeof(*hynix), GFP_KERNEL);
+	if (!hynix)
+		return -ENOMEM;
+
+	nand_set_manufacturer_data(chip, hynix);
+
+	ret = hynix_nand_rr_init(chip);
+	if (ret)
+		hynix_nand_cleanup(chip);
+
+	return ret;
 }
 
 const struct nand_manufacturer_ops hynix_nand_manuf_ops = {
 	.detect = hynix_nand_decode_id,
 	.init = hynix_nand_init,
+	.cleanup = hynix_nand_cleanup,
 };
-- 
cgit v1.2.3


From 740ad8f43c78a8dc3ec73515b7b847ca0edac781 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:12 -0300
Subject: rtc: rv8803: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rv8803.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index f9277e536f7e..9ad97ab29866 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/rtc.h>
 
 #define RV8803_I2C_TRY_COUNT		4
@@ -556,7 +557,11 @@ static int rv8803_probe(struct i2c_client *client,
 
 	mutex_init(&rv8803->flags_lock);
 	rv8803->client = client;
-	rv8803->type = id->driver_data;
+	if (client->dev.of_node)
+		rv8803->type = (enum rv8803_type)
+			of_device_get_match_data(&client->dev);
+	else
+		rv8803->type = id->driver_data;
 	i2c_set_clientdata(client, rv8803);
 
 	flags = rv8803_read_reg(client, RV8803_FLAG);
@@ -627,9 +632,23 @@ static const struct i2c_device_id rv8803_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, rv8803_id);
 
+static const struct of_device_id rv8803_of_match[] = {
+	{
+		.compatible = "microcrystal,rv8803",
+		.data = (void *)rx_8900
+	},
+	{
+		.compatible = "epson,rx8900",
+		.data = (void *)rx_8900
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rv8803_of_match);
+
 static struct i2c_driver rv8803_driver = {
 	.driver = {
 		.name = "rtc-rv8803",
+		.of_match_table = of_match_ptr(rv8803_of_match),
 	},
 	.probe		= rv8803_probe,
 	.remove		= rv8803_remove,
-- 
cgit v1.2.3


From e696a1dd7e80ee64fd930982d9e1c6b152635534 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:13 -0300
Subject: rtc: rv3029: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rv3029c2.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c
index 1f9f7b4bf3fb..85fa1da03762 100644
--- a/drivers/rtc/rtc-rv3029c2.c
+++ b/drivers/rtc/rtc-rv3029c2.c
@@ -875,9 +875,18 @@ static struct i2c_device_id rv3029_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, rv3029_id);
 
+static const struct of_device_id rv3029_of_match[] = {
+	{ .compatible = "rv3029" },
+	{ .compatible = "rv3029c2" },
+	{ .compatible = "mc,rv3029c2" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rv3029_of_match);
+
 static struct i2c_driver rv3029_driver = {
 	.driver = {
 		.name = "rtc-rv3029c2",
+		.of_match_table = of_match_ptr(rv3029_of_match),
 	},
 	.probe		= rv3029_i2c_probe,
 	.id_table	= rv3029_id,
-- 
cgit v1.2.3


From 36138c70e69c9272bd7e0be517f1ac7ad0c4f0df Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:14 -0300
Subject: rtc: bq32k: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-bq32k.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c
index 2b223935001f..98ac8d5c7901 100644
--- a/drivers/rtc/rtc-bq32k.c
+++ b/drivers/rtc/rtc-bq32k.c
@@ -310,9 +310,16 @@ static const struct i2c_device_id bq32k_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, bq32k_id);
 
+static const struct of_device_id bq32k_of_match[] = {
+	{ .compatible = "ti,bq32000" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, bq32k_of_match);
+
 static struct i2c_driver bq32k_driver = {
 	.driver = {
 		.name	= "bq32k",
+		.of_match_table = of_match_ptr(bq32k_of_match),
 	},
 	.probe		= bq32k_probe,
 	.remove		= bq32k_remove,
-- 
cgit v1.2.3


From 7ef6d2c266e5b0eb09ba0d5f5aa2dfc11df19745 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:15 -0300
Subject: rtc: ds1307: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 67 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 4ad97be48043..f085b487bbe7 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -16,6 +16,7 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/rtc/ds1307.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -192,6 +193,65 @@ static const struct i2c_device_id ds1307_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ds1307_id);
 
+#ifdef CONFIG_OF
+static const struct of_device_id ds1307_of_match[] = {
+	{
+		.compatible = "dallas,ds1307",
+		.data = (void *)ds_1307
+	},
+	{
+		.compatible = "dallas,ds1337",
+		.data = (void *)ds_1337
+	},
+	{
+		.compatible = "dallas,ds1338",
+		.data = (void *)ds_1338
+	},
+	{
+		.compatible = "dallas,ds1339",
+		.data = (void *)ds_1339
+	},
+	{
+		.compatible = "dallas,ds1388",
+		.data = (void *)ds_1388
+	},
+	{
+		.compatible = "dallas,ds1340",
+		.data = (void *)ds_1340
+	},
+	{
+		.compatible = "maxim,ds3231",
+		.data = (void *)ds_3231
+	},
+	{
+		.compatible = "st,m41t00",
+		.data = (void *)m41t00
+	},
+	{
+		.compatible = "microchip,mcp7940x",
+		.data = (void *)mcp794xx
+	},
+	{
+		.compatible = "microchip,mcp7941x",
+		.data = (void *)mcp794xx
+	},
+	{
+		.compatible = "pericom,pt7c4338",
+		.data = (void *)ds_1307
+	},
+	{
+		.compatible = "epson,rx8025",
+		.data = (void *)rx_8025
+	},
+	{
+		.compatible = "isil,isl12057",
+		.data = (void *)ds_1337
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ds1307_of_match);
+#endif
+
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id ds1307_acpi_ids[] = {
 	{ .id = "DS1307", .driver_data = ds_1307 },
@@ -1318,7 +1378,12 @@ static int ds1307_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, ds1307);
 
 	ds1307->client	= client;
-	if (id) {
+
+	if (client->dev.of_node) {
+		ds1307->type = (enum ds_type)
+			of_device_get_match_data(&client->dev);
+		chip = &chips[ds1307->type];
+	} else if (id) {
 		chip = &chips[id->driver_data];
 		ds1307->type = id->driver_data;
 	} else {
@@ -1711,6 +1776,7 @@ static int ds1307_remove(struct i2c_client *client)
 static struct i2c_driver ds1307_driver = {
 	.driver = {
 		.name	= "rtc-ds1307",
+		.of_match_table = of_match_ptr(ds1307_of_match),
 		.acpi_match_table = ACPI_PTR(ds1307_acpi_ids),
 	},
 	.probe		= ds1307_probe,
-- 
cgit v1.2.3


From 81b779ce5f3bb1df54d0accd83834a1c90a32121 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:16 -0300
Subject: rtc: rx8010: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8010.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c
index d08da371912c..1ed3403ff8ac 100644
--- a/drivers/rtc/rtc-rx8010.c
+++ b/drivers/rtc/rtc-rx8010.c
@@ -59,6 +59,12 @@ static const struct i2c_device_id rx8010_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, rx8010_id);
 
+static const struct of_device_id rx8010_of_match[] = {
+	{ .compatible = "epson,rx8010" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rx8010_of_match);
+
 struct rx8010_data {
 	struct i2c_client *client;
 	struct rtc_device *rtc;
@@ -487,6 +493,7 @@ static int rx8010_probe(struct i2c_client *client,
 static struct i2c_driver rx8010_driver = {
 	.driver = {
 		.name = "rtc-rx8010",
+		.of_match_table = of_match_ptr(rx8010_of_match),
 	},
 	.probe		= rx8010_probe,
 	.id_table	= rx8010_id,
-- 
cgit v1.2.3


From 4dfbd1378dceaa418ec1634428ffe038e0870b66 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:17 -0300
Subject: rtc: ds3232: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds3232.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index 9bb39a06b994..deff431a37c4 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -442,9 +442,16 @@ static const struct i2c_device_id ds3232_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ds3232_id);
 
+static const struct of_device_id ds3232_of_match[] = {
+	{ .compatible = "dallas,ds3232" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ds3232_of_match);
+
 static struct i2c_driver ds3232_driver = {
 	.driver = {
 		.name = "rtc-ds3232",
+		.of_match_table = of_match_ptr(ds3232_of_match),
 		.pm	= &ds3232_pm_ops,
 	},
 	.probe = ds3232_i2c_probe,
-- 
cgit v1.2.3


From 23194ac0992be4f4f6ebd0926dac473ed03568d0 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:18 -0300
Subject: rtc: rtc-ds1672: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1672.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c
index 5c18ac7394c4..7bf46bfe11a4 100644
--- a/drivers/rtc/rtc-ds1672.c
+++ b/drivers/rtc/rtc-ds1672.c
@@ -196,10 +196,17 @@ static struct i2c_device_id ds1672_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ds1672_id);
 
+static const struct of_device_id ds1672_of_match[] = {
+	{ .compatible = "dallas,ds1672" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ds1672_of_match);
+
 static struct i2c_driver ds1672_driver = {
 	.driver = {
 		   .name = "rtc-ds1672",
-		   },
+		   .of_match_table = of_match_ptr(ds1672_of_match),
+	},
 	.probe = &ds1672_probe,
 	.id_table = ds1672_id,
 };
-- 
cgit v1.2.3


From abac12e1105e7f3572f641db7583547069c1deaf Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:19 -0300
Subject: rtc: ds1374: Set .of_match_table to OF device ID table

The driver has a OF device ID table but the struct i2c_driver
.of_match_table field is not set.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1374.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c
index 52429f0a57cc..4cd115e93223 100644
--- a/drivers/rtc/rtc-ds1374.c
+++ b/drivers/rtc/rtc-ds1374.c
@@ -704,6 +704,7 @@ static SIMPLE_DEV_PM_OPS(ds1374_pm, ds1374_suspend, ds1374_resume);
 static struct i2c_driver ds1374_driver = {
 	.driver = {
 		.name = "rtc-ds1374",
+		.of_match_table = of_match_ptr(ds1374_of_match),
 		.pm = &ds1374_pm,
 	},
 	.probe = ds1374_probe,
-- 
cgit v1.2.3


From 0383550402352c0036129524f751d8182e837960 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:20 -0300
Subject: rtc: isl1208: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-isl1208.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index 2893785f0eba..8dd299c6a1f3 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -687,10 +687,18 @@ static const struct i2c_device_id isl1208_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, isl1208_id);
 
+static const struct of_device_id isl1208_of_match[] = {
+	{ .compatible = "isil,isl1208" },
+	{ .compatible = "isil,isl1218" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, isl1208_of_match);
+
 static struct i2c_driver isl1208_driver = {
 	.driver = {
-		   .name = "rtc-isl1208",
-		   },
+		.name = "rtc-isl1208",
+		.of_match_table = of_match_ptr(isl1208_of_match),
+	},
 	.probe = isl1208_probe,
 	.remove = isl1208_remove,
 	.id_table = isl1208_id,
-- 
cgit v1.2.3


From 21f274327080271dc5adb7ae9852ffa5e028ce55 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:21 -0300
Subject: rtc: s35390a: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s35390a.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index 5dab4665ca3b..449820eeefe8 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -58,6 +58,13 @@ static const struct i2c_device_id s35390a_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, s35390a_id);
 
+static const struct of_device_id s35390a_of_match[] = {
+	{ .compatible = "s35390a" },
+	{ .compatible = "sii,s35390a" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, s35390a_of_match);
+
 struct s35390a {
 	struct i2c_client *client[8];
 	struct rtc_device *rtc;
@@ -502,6 +509,7 @@ static int s35390a_remove(struct i2c_client *client)
 static struct i2c_driver s35390a_driver = {
 	.driver		= {
 		.name	= "rtc-s35390a",
+		.of_match_table = of_match_ptr(s35390a_of_match),
 	},
 	.probe		= s35390a_probe,
 	.remove		= s35390a_remove,
-- 
cgit v1.2.3


From ffbecfbdbeeebe9749398fe9eb9eab2dd47ddff4 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:22 -0300
Subject: rtc: rx8581: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rx8581.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c
index 0c362a3d1f17..9998d7937688 100644
--- a/drivers/rtc/rtc-rx8581.c
+++ b/drivers/rtc/rtc-rx8581.c
@@ -308,9 +308,16 @@ static const struct i2c_device_id rx8581_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, rx8581_id);
 
+static const struct of_device_id rx8581_of_match[] = {
+	{ .compatible = "epson,rx8581" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rx8581_of_match);
+
 static struct i2c_driver rx8581_driver = {
 	.driver		= {
 		.name	= "rtc-rx8581",
+		.of_match_table = of_match_ptr(rx8581_of_match),
 	},
 	.probe		= rx8581_probe,
 	.id_table	= rx8581_id,
-- 
cgit v1.2.3


From eb235c561d04ea64d19a0d3e1413f9c3bc25596c Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:23 -0300
Subject: rtc: m41t80: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-m41t80.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 61 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 58698d21c2c3..5b070aa711f9 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
@@ -86,8 +87,61 @@ static const struct i2c_device_id m41t80_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, m41t80_id);
 
+static const struct of_device_id m41t80_of_match[] = {
+	{
+		.compatible = "st,m41t62",
+		.data = (void *)(M41T80_FEATURE_SQ | M41T80_FEATURE_SQ_ALT)
+	},
+	{
+		.compatible = "st,m41t65",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_WD)
+	},
+	{
+		.compatible = "st,m41t80",
+		.data = (void *)(M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t81",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t81s",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t82",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t83",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t84",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t85",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,m41t87",
+		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
+	},
+	{
+		.compatible = "st,rv4162",
+		.data = (void *)(M41T80_FEATURE_SQ | M41T80_FEATURE_WD | M41T80_FEATURE_SQ_ALT)
+	},
+	{
+		.compatible = "rv4162",
+		.data = (void *)(M41T80_FEATURE_SQ | M41T80_FEATURE_WD | M41T80_FEATURE_SQ_ALT)
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, m41t80_of_match);
+
 struct m41t80_data {
-	u8 features;
+	unsigned long features;
 	struct rtc_device *rtc;
 };
 
@@ -786,7 +840,11 @@ static int m41t80_probe(struct i2c_client *client,
 	if (!m41t80_data)
 		return -ENOMEM;
 
-	m41t80_data->features = id->driver_data;
+	if (client->dev.of_node)
+		m41t80_data->features = (unsigned long)
+			of_device_get_match_data(&client->dev);
+	else
+		m41t80_data->features = id->driver_data;
 	i2c_set_clientdata(client, m41t80_data);
 
 	if (client->irq > 0) {
@@ -894,6 +952,7 @@ static int m41t80_remove(struct i2c_client *client)
 static struct i2c_driver m41t80_driver = {
 	.driver = {
 		.name = "rtc-m41t80",
+		.of_match_table = of_match_ptr(m41t80_of_match),
 		.pm = &m41t80_pm,
 	},
 	.probe = m41t80_probe,
-- 
cgit v1.2.3


From ff764b88e63a097a6a8383f0f1f76e7fa5a5e096 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Fri, 3 Mar 2017 11:29:24 -0300
Subject: rtc: rs5c372: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rs5c372.c | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index c8c757466783..d4eff8d7131f 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -15,6 +15,7 @@
 #include <linux/bcd.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 
 /*
  * Ricoh has a family of I2C based RTCs, which differ only slightly from
@@ -83,6 +84,35 @@ static const struct i2c_device_id rs5c372_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, rs5c372_id);
 
+static const struct of_device_id rs5c372_of_match[] = {
+	{
+		.compatible = "ricoh,r2025sd",
+		.data = (void *)rtc_r2025sd
+	},
+	{
+		.compatible = "ricoh,r2221tl",
+		.data = (void *)rtc_r2221tl
+	},
+	{
+		.compatible = "ricoh,rs5c372a",
+		.data = (void *)rtc_rs5c372a
+	},
+	{
+		.compatible = "ricoh,rs5c372b",
+		.data = (void *)rtc_rs5c372b
+	},
+	{
+		.compatible = "ricoh,rv5c386",
+		.data = (void *)rtc_rv5c386
+	},
+	{
+		.compatible = "ricoh,rv5c387a",
+		.data = (void *)rtc_rv5c387a
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rs5c372_of_match);
+
 /* REVISIT:  this assumes that:
  *  - we're in the 21st century, so it's safe to ignore the century
  *    bit for rv5c38[67] (REG_MONTH bit 7);
@@ -581,7 +611,11 @@ static int rs5c372_probe(struct i2c_client *client,
 
 	rs5c372->client = client;
 	i2c_set_clientdata(client, rs5c372);
-	rs5c372->type = id->driver_data;
+	if (client->dev.of_node)
+		rs5c372->type = (enum rtc_type)
+			of_device_get_match_data(&client->dev);
+	else
+		rs5c372->type = id->driver_data;
 
 	/* we read registers 0x0f then 0x00-0x0f; skip the first one */
 	rs5c372->regs = &rs5c372->buf[1];
@@ -673,6 +707,7 @@ static int rs5c372_remove(struct i2c_client *client)
 static struct i2c_driver rs5c372_driver = {
 	.driver		= {
 		.name	= "rtc-rs5c372",
+		.of_match_table = of_match_ptr(rs5c372_of_match),
 	},
 	.probe		= rs5c372_probe,
 	.remove		= rs5c372_remove,
-- 
cgit v1.2.3


From b9de1a1dae8c78de05a59e40632aa8241ef78e1a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 1 Mar 2017 15:33:23 -0800
Subject: rtc: omap: remove incorrect __exit markups

Even if bus is not hot-pluggable, devices can be unbound from the
driver via sysfs, so we should not be using __exit annotations on
remove() methods. The only exception is drivers registered with
platform_driver_probe(), which specifically disables sysfs bind/unbind
attributes.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-By: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-omap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 73594f38c453..66c43a3ded24 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -844,7 +844,7 @@ err:
 	return ret;
 }
 
-static int __exit omap_rtc_remove(struct platform_device *pdev)
+static int omap_rtc_remove(struct platform_device *pdev)
 {
 	struct omap_rtc *rtc = platform_get_drvdata(pdev);
 	u8 reg;
@@ -964,7 +964,7 @@ static void omap_rtc_shutdown(struct platform_device *pdev)
 
 static struct platform_driver omap_rtc_driver = {
 	.probe		= omap_rtc_probe,
-	.remove		= __exit_p(omap_rtc_remove),
+	.remove		= omap_rtc_remove,
 	.shutdown	= omap_rtc_shutdown,
 	.driver		= {
 		.name	= "omap_rtc",
-- 
cgit v1.2.3


From 0c749eacaa80a36b9d2b74c6c99c61baa0196eb9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 1 Mar 2017 15:34:38 -0800
Subject: rtc: omap: mark PM methods as __maybe_unused

Instead of using #ifdef guards around PM methods, let's annotate
them as __maybe_unused, as it provides better compile coverage.

Also drop empty stub for omap_rtc_runtime_resume().

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-omap.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 66c43a3ded24..13f7cd11c07e 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -882,8 +882,7 @@ static int omap_rtc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int omap_rtc_suspend(struct device *dev)
+static int __maybe_unused omap_rtc_suspend(struct device *dev)
 {
 	struct omap_rtc *rtc = dev_get_drvdata(dev);
 
@@ -906,7 +905,7 @@ static int omap_rtc_suspend(struct device *dev)
 	return 0;
 }
 
-static int omap_rtc_resume(struct device *dev)
+static int __maybe_unused omap_rtc_resume(struct device *dev)
 {
 	struct omap_rtc *rtc = dev_get_drvdata(dev);
 
@@ -921,10 +920,8 @@ static int omap_rtc_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
-#ifdef CONFIG_PM
-static int omap_rtc_runtime_suspend(struct device *dev)
+static int __maybe_unused omap_rtc_runtime_suspend(struct device *dev)
 {
 	struct omap_rtc *rtc = dev_get_drvdata(dev);
 
@@ -934,16 +931,9 @@ static int omap_rtc_runtime_suspend(struct device *dev)
 	return 0;
 }
 
-static int omap_rtc_runtime_resume(struct device *dev)
-{
-	return 0;
-}
-#endif
-
 static const struct dev_pm_ops omap_rtc_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(omap_rtc_suspend, omap_rtc_resume)
-	SET_RUNTIME_PM_OPS(omap_rtc_runtime_suspend,
-			   omap_rtc_runtime_resume, NULL)
+	SET_RUNTIME_PM_OPS(omap_rtc_runtime_suspend, NULL, NULL)
 };
 
 static void omap_rtc_shutdown(struct platform_device *pdev)
-- 
cgit v1.2.3


From 0522de00929e9e9ee51235fc40035179e4d45381 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Thu, 2 Mar 2017 01:27:08 +0100
Subject: dt-bindings: Add vendor prefix for Motorola

Motorola was involved in semiconductor and mobile phone business.
The "motorola," prefix is already used by a couple of bindings:

* rtc/rtc-cmos.txt
* mfd/motorola-cpcap.txt
* regulator/cpcap-regulator.txt

Apart from that it is used in the DT file for the Droid 4 mobile
phone.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 Documentation/devicetree/bindings/vendor-prefixes.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index ec0bfb9bbebd..a1d72fa56aac 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -190,6 +190,7 @@ minix	MINIX Technology Ltd.
 miramems	MiraMEMS Sensing Technology Co., Ltd.
 mitsubishi	Mitsubishi Electric Corporation
 mosaixtech	Mosaix Technologies, Inc.
+motorola	Motorola, Inc.
 moxa	Moxa
 mpl	MPL AG
 mqmaker	mqmaker Inc.
-- 
cgit v1.2.3


From dd3bf50b35e3e111d6325207177b12af88aec824 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Thu, 2 Mar 2017 01:27:09 +0100
Subject: rtc: cpcap: new rtc driver

This driver supports the Motorola CPCAP PMIC found on
some of Motorola's mobile phones, such as the Droid 4.

Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 .../devicetree/bindings/rtc/cpcap-rtc.txt          |  18 ++
 drivers/rtc/Kconfig                                |   7 +
 drivers/rtc/Makefile                               |   1 +
 drivers/rtc/rtc-cpcap.c                            | 332 +++++++++++++++++++++
 4 files changed, 358 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/rtc/cpcap-rtc.txt
 create mode 100644 drivers/rtc/rtc-cpcap.c

diff --git a/Documentation/devicetree/bindings/rtc/cpcap-rtc.txt b/Documentation/devicetree/bindings/rtc/cpcap-rtc.txt
new file mode 100644
index 000000000000..45750ff3112d
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/cpcap-rtc.txt
@@ -0,0 +1,18 @@
+Motorola CPCAP PMIC RTC
+-----------------------
+
+This module is part of the CPCAP. For more details about the whole
+chip see Documentation/devicetree/bindings/mfd/motorola-cpcap.txt.
+
+Requires node properties:
+- compatible: should contain "motorola,cpcap-rtc"
+- interrupts: An interrupt specifier for alarm and 1 Hz irq
+
+Example:
+
+&cpcap {
+	cpcap_rtc: rtc {
+		compatible = "motorola,cpcap-rtc";
+		interrupts = <39 IRQ_TYPE_NONE>, <26 IRQ_TYPE_NONE>;
+	};
+};
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index ee1b0e9dde79..050bec749fae 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1731,6 +1731,13 @@ config RTC_DRV_STM32
 	   This driver can also be built as a module, if so, the module
 	   will be called "rtc-stm32".
 
+config RTC_DRV_CPCAP
+	depends on MFD_CPCAP
+	tristate "Motorola CPCAP RTC"
+	help
+	   Say y here for CPCAP rtc found on some Motorola phones
+	   and tablets such as Droid 4.
+
 comment "HID Sensor RTC drivers"
 
 config RTC_DRV_HID_SENSOR_TIME
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index f07297b1460a..13857d2fce09 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_RTC_DRV_BQ32K)	+= rtc-bq32k.o
 obj-$(CONFIG_RTC_DRV_BQ4802)	+= rtc-bq4802.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
 obj-$(CONFIG_RTC_DRV_COH901331)	+= rtc-coh901331.o
+obj-$(CONFIG_RTC_DRV_CPCAP)	+= rtc-cpcap.o
 obj-$(CONFIG_RTC_DRV_DA9052)	+= rtc-da9052.o
 obj-$(CONFIG_RTC_DRV_DA9055)	+= rtc-da9055.o
 obj-$(CONFIG_RTC_DRV_DA9063)	+= rtc-da9063.o
diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c
new file mode 100644
index 000000000000..7c6a3c3167bd
--- /dev/null
+++ b/drivers/rtc/rtc-cpcap.c
@@ -0,0 +1,332 @@
+/*
+ * Motorola CPCAP PMIC RTC driver
+ *
+ * Based on cpcap-regulator.c from Motorola Linux kernel tree
+ * Copyright (C) 2009 Motorola, Inc.
+ *
+ * Rewritten for mainline kernel
+ *  - use DT
+ *  - use regmap
+ *  - use standard interrupt framework
+ *  - use managed device resources
+ *  - remove custom "secure clock daemon" helpers
+ *
+ * Copyright (C) 2017 Sebastian Reichel <sre@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+#include <linux/err.h>
+#include <linux/regmap.h>
+#include <linux/mfd/motorola-cpcap.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#define SECS_PER_DAY 86400
+#define DAY_MASK  0x7FFF
+#define TOD1_MASK 0x00FF
+#define TOD2_MASK 0x01FF
+
+struct cpcap_time {
+	int day;
+	int tod1;
+	int tod2;
+};
+
+struct cpcap_rtc {
+	struct regmap *regmap;
+	struct rtc_device *rtc_dev;
+	u16 vendor;
+	int alarm_irq;
+	bool alarm_enabled;
+	int update_irq;
+	bool update_enabled;
+};
+
+static void cpcap2rtc_time(struct rtc_time *rtc, struct cpcap_time *cpcap)
+{
+	unsigned long int tod;
+	unsigned long int time;
+
+	tod = (cpcap->tod1 & TOD1_MASK) | ((cpcap->tod2 & TOD2_MASK) << 8);
+	time = tod + ((cpcap->day & DAY_MASK) * SECS_PER_DAY);
+
+	rtc_time_to_tm(time, rtc);
+}
+
+static void rtc2cpcap_time(struct cpcap_time *cpcap, struct rtc_time *rtc)
+{
+	unsigned long time;
+
+	rtc_tm_to_time(rtc, &time);
+
+	cpcap->day = time / SECS_PER_DAY;
+	time %= SECS_PER_DAY;
+	cpcap->tod2 = (time >> 8) & TOD2_MASK;
+	cpcap->tod1 = time & TOD1_MASK;
+}
+
+static int cpcap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct cpcap_rtc *rtc = dev_get_drvdata(dev);
+
+	if (rtc->alarm_enabled == enabled)
+		return 0;
+
+	if (enabled)
+		enable_irq(rtc->alarm_irq);
+	else
+		disable_irq(rtc->alarm_irq);
+
+	rtc->alarm_enabled = !!enabled;
+
+	return 0;
+}
+
+static int cpcap_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct cpcap_rtc *rtc;
+	struct cpcap_time cpcap_tm;
+	int temp_tod2;
+	int ret;
+
+	rtc = dev_get_drvdata(dev);
+
+	ret = regmap_read(rtc->regmap, CPCAP_REG_TOD2, &temp_tod2);
+	ret |= regmap_read(rtc->regmap, CPCAP_REG_DAY, &cpcap_tm.day);
+	ret |= regmap_read(rtc->regmap, CPCAP_REG_TOD1, &cpcap_tm.tod1);
+	ret |= regmap_read(rtc->regmap, CPCAP_REG_TOD2, &cpcap_tm.tod2);
+
+	if (temp_tod2 > cpcap_tm.tod2)
+		ret |= regmap_read(rtc->regmap, CPCAP_REG_DAY, &cpcap_tm.day);
+
+	if (ret) {
+		dev_err(dev, "Failed to read time\n");
+		return -EIO;
+	}
+
+	cpcap2rtc_time(tm, &cpcap_tm);
+
+	return rtc_valid_tm(tm);
+}
+
+static int cpcap_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct cpcap_rtc *rtc;
+	struct cpcap_time cpcap_tm;
+	int ret = 0;
+
+	rtc = dev_get_drvdata(dev);
+
+	rtc2cpcap_time(&cpcap_tm, tm);
+
+	if (rtc->alarm_enabled)
+		disable_irq(rtc->alarm_irq);
+	if (rtc->update_enabled)
+		disable_irq(rtc->update_irq);
+
+	if (rtc->vendor == CPCAP_VENDOR_ST) {
+		/* The TOD1 and TOD2 registers MUST be written in this order
+		 * for the change to properly set.
+		 */
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TOD1,
+					  TOD1_MASK, cpcap_tm.tod1);
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TOD2,
+					  TOD2_MASK, cpcap_tm.tod2);
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_DAY,
+					  DAY_MASK, cpcap_tm.day);
+	} else {
+		/* Clearing the upper lower 8 bits of the TOD guarantees that
+		 * the upper half of TOD (TOD2) will not increment for 0xFF RTC
+		 * ticks (255 seconds).  During this time we can safely write
+		 * to DAY, TOD2, then TOD1 (in that order) and expect RTC to be
+		 * synchronized to the exact time requested upon the final write
+		 * to TOD1.
+		 */
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TOD1,
+					  TOD1_MASK, 0);
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_DAY,
+					  DAY_MASK, cpcap_tm.day);
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TOD2,
+					  TOD2_MASK, cpcap_tm.tod2);
+		ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TOD1,
+					  TOD1_MASK, cpcap_tm.tod1);
+	}
+
+	if (rtc->update_enabled)
+		enable_irq(rtc->update_irq);
+	if (rtc->alarm_enabled)
+		enable_irq(rtc->alarm_irq);
+
+	return ret;
+}
+
+static int cpcap_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct cpcap_rtc *rtc;
+	struct cpcap_time cpcap_tm;
+	int ret;
+
+	rtc = dev_get_drvdata(dev);
+
+	alrm->enabled = rtc->alarm_enabled;
+
+	ret = regmap_read(rtc->regmap, CPCAP_REG_DAYA, &cpcap_tm.day);
+	ret |= regmap_read(rtc->regmap, CPCAP_REG_TODA2, &cpcap_tm.tod2);
+	ret |= regmap_read(rtc->regmap, CPCAP_REG_TODA1, &cpcap_tm.tod1);
+
+	if (ret) {
+		dev_err(dev, "Failed to read time\n");
+		return -EIO;
+	}
+
+	cpcap2rtc_time(&alrm->time, &cpcap_tm);
+	return rtc_valid_tm(&alrm->time);
+}
+
+static int cpcap_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct cpcap_rtc *rtc;
+	struct cpcap_time cpcap_tm;
+	int ret;
+
+	rtc = dev_get_drvdata(dev);
+
+	rtc2cpcap_time(&cpcap_tm, &alrm->time);
+
+	if (rtc->alarm_enabled)
+		disable_irq(rtc->alarm_irq);
+
+	ret = regmap_update_bits(rtc->regmap, CPCAP_REG_DAYA, DAY_MASK,
+				 cpcap_tm.day);
+	ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TODA2, TOD2_MASK,
+				  cpcap_tm.tod2);
+	ret |= regmap_update_bits(rtc->regmap, CPCAP_REG_TODA1, TOD1_MASK,
+				  cpcap_tm.tod1);
+
+	if (!ret) {
+		enable_irq(rtc->alarm_irq);
+		rtc->alarm_enabled = true;
+	}
+
+	return ret;
+}
+
+static const struct rtc_class_ops cpcap_rtc_ops = {
+	.read_time		= cpcap_rtc_read_time,
+	.set_time		= cpcap_rtc_set_time,
+	.read_alarm		= cpcap_rtc_read_alarm,
+	.set_alarm		= cpcap_rtc_set_alarm,
+	.alarm_irq_enable	= cpcap_rtc_alarm_irq_enable,
+};
+
+static irqreturn_t cpcap_rtc_alarm_irq(int irq, void *data)
+{
+	struct cpcap_rtc *rtc = data;
+
+	rtc_update_irq(rtc->rtc_dev, 1, RTC_AF | RTC_IRQF);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cpcap_rtc_update_irq(int irq, void *data)
+{
+	struct cpcap_rtc *rtc = data;
+
+	rtc_update_irq(rtc->rtc_dev, 1, RTC_UF | RTC_IRQF);
+	return IRQ_HANDLED;
+}
+
+static int cpcap_rtc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct cpcap_rtc *rtc;
+	int err;
+
+	rtc = devm_kzalloc(dev, sizeof(*rtc), GFP_KERNEL);
+	if (!rtc)
+		return -ENOMEM;
+
+	rtc->regmap = dev_get_regmap(dev->parent, NULL);
+	if (!rtc->regmap)
+		return -ENODEV;
+
+	platform_set_drvdata(pdev, rtc);
+	rtc->rtc_dev = devm_rtc_device_register(dev, "cpcap_rtc",
+						&cpcap_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(rtc->rtc_dev)) {
+		kfree(rtc);
+		return PTR_ERR(rtc->rtc_dev);
+	}
+
+	err = cpcap_get_vendor(dev, rtc->regmap, &rtc->vendor);
+	if (err)
+		return err;
+
+	rtc->alarm_irq = platform_get_irq(pdev, 0);
+	err = devm_request_threaded_irq(dev, rtc->alarm_irq, NULL,
+					cpcap_rtc_alarm_irq, IRQ_NONE,
+					"rtc_alarm", rtc);
+	if (err) {
+		dev_err(dev, "Could not request alarm irq: %d\n", err);
+		return err;
+	}
+	disable_irq(rtc->alarm_irq);
+
+	/* Stock Android uses the 1 Hz interrupt for "secure clock daemon",
+	 * which is not supported by the mainline kernel. The mainline kernel
+	 * does not use the irq at the moment, but we explicitly request and
+	 * disable it, so that its masked and does not wake up the processor
+	 * every second.
+	 */
+	rtc->update_irq = platform_get_irq(pdev, 1);
+	err = devm_request_threaded_irq(dev, rtc->update_irq, NULL,
+					cpcap_rtc_update_irq, IRQ_NONE,
+					"rtc_1hz", rtc);
+	if (err) {
+		dev_err(dev, "Could not request update irq: %d\n", err);
+		return err;
+	}
+	disable_irq(rtc->update_irq);
+
+	err = device_init_wakeup(dev, 1);
+	if (err) {
+		dev_err(dev, "wakeup initialization failed (%d)\n", err);
+		/* ignore error and continue without wakeup support */
+	}
+
+	return 0;
+}
+
+static const struct of_device_id cpcap_rtc_of_match[] = {
+	{ .compatible = "motorola,cpcap-rtc", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, cpcap_rtc_of_match);
+
+static struct platform_driver cpcap_rtc_driver = {
+	.probe		= cpcap_rtc_probe,
+	.driver		= {
+		.name	= "cpcap-rtc",
+		.of_match_table = cpcap_rtc_of_match,
+	},
+};
+
+module_platform_driver(cpcap_rtc_driver);
+
+MODULE_ALIAS("platform:cpcap-rtc");
+MODULE_DESCRIPTION("CPCAP RTC driver");
+MODULE_AUTHOR("Sebastian Reichel <sre@kernel.org>");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From bdc7dd67e7d9a50af992fd6c5e21b080475321e3 Mon Sep 17 00:00:00 2001
From: Elaine Zhang <zhangqing@rock-chips.com>
Date: Mon, 6 Feb 2017 10:50:34 +0800
Subject: clk: rockchip: add rk3328 clk_mac2io_ext ID

Signed-off-by: Elaine Zhang <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/rk3328-cru.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/dt-bindings/clock/rk3328-cru.h b/include/dt-bindings/clock/rk3328-cru.h
index ee702c8e4c09..d2b26a4b43eb 100644
--- a/include/dt-bindings/clock/rk3328-cru.h
+++ b/include/dt-bindings/clock/rk3328-cru.h
@@ -97,6 +97,7 @@
 #define SCLK_MAC2IO_SRC		99
 #define SCLK_MAC2IO		100
 #define SCLK_MAC2PHY		101
+#define SCLK_MAC2IO_EXT		102
 
 /* dclk gates */
 #define DCLK_LCDC		120
-- 
cgit v1.2.3


From a4fa90d24b4bb7d171fbcb2ceffd0315a6e29050 Mon Sep 17 00:00:00 2001
From: Elaine Zhang <zhangqing@rock-chips.com>
Date: Tue, 7 Mar 2017 19:05:15 +0800
Subject: clk: rockchip: fix up rk3368 timer-ids

The timer-ids are wrong compared to the manual, probably due a simple
copy-paste mistake from the otherwise very similar rk3288. And there
are even more timers in the system than the ones wrongly listed here.

Timer-Ids were unused both in clock-driver as well as devicetree
till now, so fixing them won't break anything.

Signed-off-by: Elaine Zhang <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/rk3368-cru.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/dt-bindings/clock/rk3368-cru.h b/include/dt-bindings/clock/rk3368-cru.h
index 9c5dd9ba2f6c..2bf23eda8938 100644
--- a/include/dt-bindings/clock/rk3368-cru.h
+++ b/include/dt-bindings/clock/rk3368-cru.h
@@ -44,13 +44,12 @@
 #define SCLK_I2S_8CH		82
 #define SCLK_SPDIF_8CH		83
 #define SCLK_I2S_2CH		84
-#define SCLK_TIMER0		85
-#define SCLK_TIMER1		86
-#define SCLK_TIMER2		87
-#define SCLK_TIMER3		88
-#define SCLK_TIMER4		89
-#define SCLK_TIMER5		90
-#define SCLK_TIMER6		91
+#define SCLK_TIMER00		85
+#define SCLK_TIMER01		86
+#define SCLK_TIMER02		87
+#define SCLK_TIMER03		88
+#define SCLK_TIMER04		89
+#define SCLK_TIMER05		90
 #define SCLK_OTGPHY0		93
 #define SCLK_OTG_ADP		96
 #define SCLK_HSICPHY480M	97
-- 
cgit v1.2.3


From 710fbd769c9c9ba80f5b2727dd6bf4e06bb69b9e Mon Sep 17 00:00:00 2001
From: Elaine Zhang <zhangqing@rock-chips.com>
Date: Tue, 7 Mar 2017 19:05:17 +0800
Subject: clk: rockchip: add clock ids for timer10-15 of RK3368 SoCs

Signed-off-by: Elaine Zhang <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/rk3368-cru.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/dt-bindings/clock/rk3368-cru.h b/include/dt-bindings/clock/rk3368-cru.h
index 2bf23eda8938..aeb83e581a11 100644
--- a/include/dt-bindings/clock/rk3368-cru.h
+++ b/include/dt-bindings/clock/rk3368-cru.h
@@ -81,6 +81,12 @@
 #define SCLK_SFC		126
 #define SCLK_MAC		127
 #define SCLK_MACREF_OUT		128
+#define SCLK_TIMER10		133
+#define SCLK_TIMER11		134
+#define SCLK_TIMER12		135
+#define SCLK_TIMER13		136
+#define SCLK_TIMER14		137
+#define SCLK_TIMER15		138
 
 #define DCLK_VOP		190
 #define MCLK_CRYPTO		191
-- 
cgit v1.2.3


From 9bb4db62327ebfd125fa6a8c892ebe3152a9f531 Mon Sep 17 00:00:00 2001
From: Elaine Zhang <zhangqing@rock-chips.com>
Date: Mon, 6 Feb 2017 10:50:35 +0800
Subject: clk: rockchip: describe clk_gmac using the new muxgrf type on rk3328

With the newly introduced clk type for muxes in the grf we now can
describe some missing clocks, like the clk_gmac2io and clk_gmac2phy
that selects between clk_mac2io_src and gmac_clkin based on a bit
set in the general register files.

Signed-off-by: Elaine Zhang <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 drivers/clk/rockchip/clk-rk3328.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/clk/rockchip/clk-rk3328.c b/drivers/clk/rockchip/clk-rk3328.c
index 1e384e143504..b04f29774ee7 100644
--- a/drivers/clk/rockchip/clk-rk3328.c
+++ b/drivers/clk/rockchip/clk-rk3328.c
@@ -20,6 +20,7 @@
 #include <dt-bindings/clock/rk3328-cru.h>
 #include "clk.h"
 
+#define RK3328_GRF_SOC_CON4		0x410
 #define RK3328_GRF_SOC_STATUS0		0x480
 #define RK3328_GRF_MAC_CON1		0x904
 #define RK3328_GRF_MAC_CON2		0x908
@@ -214,6 +215,8 @@ PNAME(mux_mac2io_src_p)		= { "clk_mac2io_src",
 				    "gmac_clkin" };
 PNAME(mux_mac2phy_src_p)	= { "clk_mac2phy_src",
 				    "phy_50m_out" };
+PNAME(mux_mac2io_ext_p)		= { "clk_mac2io",
+				    "gmac_clkin" };
 
 static struct rockchip_pll_clock rk3328_pll_clks[] __initdata = {
 	[apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p,
@@ -680,6 +683,10 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 	COMPOSITE(SCLK_MAC2IO_OUT, "clk_mac2io_out", mux_2plls_p, 0,
 			RK3328_CLKSEL_CON(27), 15, 1, MFLAGS, 8, 5, DFLAGS,
 			RK3328_CLKGATE_CON(3), 5, GFLAGS),
+	MUXGRF(SCLK_MAC2IO, "clk_mac2io", mux_mac2io_src_p, CLK_SET_RATE_NO_REPARENT,
+			RK3328_GRF_MAC_CON1, 10, 1, MFLAGS),
+	MUXGRF(SCLK_MAC2IO_EXT, "clk_mac2io_ext", mux_mac2io_ext_p, CLK_SET_RATE_NO_REPARENT,
+			RK3328_GRF_SOC_CON4, 14, 1, MFLAGS),
 
 	COMPOSITE(SCLK_MAC2PHY_SRC, "clk_mac2phy_src", mux_2plls_p, 0,
 			RK3328_CLKSEL_CON(26), 7, 1, MFLAGS, 0, 5, DFLAGS,
@@ -691,6 +698,8 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 	COMPOSITE_NOMUX(SCLK_MAC2PHY_OUT, "clk_mac2phy_out", "clk_mac2phy", 0,
 			RK3328_CLKSEL_CON(26), 8, 2, DFLAGS,
 			RK3328_CLKGATE_CON(9), 2, GFLAGS),
+	MUXGRF(SCLK_MAC2PHY, "clk_mac2phy", mux_mac2phy_src_p, CLK_SET_RATE_NO_REPARENT,
+			RK3328_GRF_MAC_CON2, 10, 1, MFLAGS),
 
 	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
 
-- 
cgit v1.2.3


From 4f060850ecd66368316249557a2c0e4344bf36fa Mon Sep 17 00:00:00 2001
From: Elaine Zhang <zhangqing@rock-chips.com>
Date: Tue, 7 Mar 2017 19:05:18 +0800
Subject: clk: rockchip: export SCLK_TIMERXX id for timers on rk3368

Signed-off-by: Elaine Zhang <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 drivers/clk/rockchip/clk-rk3368.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c
index 6cb474c593e7..139d418f448e 100644
--- a/drivers/clk/rockchip/clk-rk3368.c
+++ b/drivers/clk/rockchip/clk-rk3368.c
@@ -835,18 +835,18 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 0, GFLAGS),
 
 	/* timer gates */
-	GATE(0, "sclk_timer15", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 11, GFLAGS),
-	GATE(0, "sclk_timer14", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 10, GFLAGS),
-	GATE(0, "sclk_timer13", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 9, GFLAGS),
-	GATE(0, "sclk_timer12", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 8, GFLAGS),
-	GATE(0, "sclk_timer11", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 7, GFLAGS),
-	GATE(0, "sclk_timer10", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 6, GFLAGS),
-	GATE(0, "sclk_timer05", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 5, GFLAGS),
-	GATE(0, "sclk_timer04", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 4, GFLAGS),
-	GATE(0, "sclk_timer03", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 3, GFLAGS),
-	GATE(0, "sclk_timer02", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 2, GFLAGS),
-	GATE(0, "sclk_timer01", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 1, GFLAGS),
-	GATE(0, "sclk_timer00", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 0, GFLAGS),
+	GATE(SCLK_TIMER15, "sclk_timer15", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 11, GFLAGS),
+	GATE(SCLK_TIMER14, "sclk_timer14", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 10, GFLAGS),
+	GATE(SCLK_TIMER13, "sclk_timer13", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 9, GFLAGS),
+	GATE(SCLK_TIMER12, "sclk_timer12", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 8, GFLAGS),
+	GATE(SCLK_TIMER11, "sclk_timer11", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 7, GFLAGS),
+	GATE(SCLK_TIMER10, "sclk_timer10", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 6, GFLAGS),
+	GATE(SCLK_TIMER05, "sclk_timer05", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 5, GFLAGS),
+	GATE(SCLK_TIMER04, "sclk_timer04", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 4, GFLAGS),
+	GATE(SCLK_TIMER03, "sclk_timer03", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 3, GFLAGS),
+	GATE(SCLK_TIMER02, "sclk_timer02", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 2, GFLAGS),
+	GATE(SCLK_TIMER01, "sclk_timer01", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 1, GFLAGS),
+	GATE(SCLK_TIMER00, "sclk_timer00", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 0, GFLAGS),
 };
 
 static const char *const rk3368_critical_clocks[] __initconst = {
-- 
cgit v1.2.3


From 920c480a73a341de93cf0e8816ea0becab729e1e Mon Sep 17 00:00:00 2001
From: Elaine Zhang <zhangqing@rock-chips.com>
Date: Tue, 7 Mar 2017 19:16:48 +0800
Subject: clk: rockchip: mark some rk3368 core-clks as critical

Mark pclk_pd_alive, pclk_peri, hclk_peri as critical

Signed-off-by: Elaine Zhang <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 drivers/clk/rockchip/clk-rk3368.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c
index 139d418f448e..024762d3214d 100644
--- a/drivers/clk/rockchip/clk-rk3368.c
+++ b/drivers/clk/rockchip/clk-rk3368.c
@@ -858,6 +858,9 @@ static const char *const rk3368_critical_clocks[] __initconst = {
 	 */
 	"pclk_pwm1",
 	"pclk_pd_pmu",
+	"pclk_pd_alive",
+	"pclk_peri",
+	"hclk_peri",
 };
 
 static void __init rk3368_clk_init(struct device_node *np)
-- 
cgit v1.2.3


From 65ba6fa439e7c3cbf97de9dce9e7a3390ae2638c Mon Sep 17 00:00:00 2001
From: Stephen Boyd <stephen.boyd@linaro.org>
Date: Thu, 16 Feb 2017 13:18:20 -0800
Subject: scripts: objdiff: Ignore debug info when comparing

If the kernel is configured to be built with debug symbols, or
has bug tables, comparing files may not work if line numbers
change. This makes comparing object files with these options
harder to do. Let's strip out the debug info and drop the
__bug_table here so that we don't see false positives. There may
be other things to drop later, and it may be architecture
specific, but this works for me with my ARM64 build.

Signed-off-by: Stephen Boyd <stephen.boyd@linaro.org>
Reviewed-by: Jason Cooper <jason@lakedaemon.net>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/objdiff | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/objdiff b/scripts/objdiff
index 62e51dae2138..4fb5d6796893 100755
--- a/scripts/objdiff
+++ b/scripts/objdiff
@@ -57,13 +57,15 @@ get_output_dir() {
 do_objdump() {
 	dir=$(get_output_dir $1)
 	base=${1##*/}
+	stripped=$dir/${base%.o}.stripped
 	dis=$dir/${base%.o}.dis
 
 	[ ! -d "$dir" ] && mkdir -p $dir
 
 	# remove addresses for a cleaner diff
 	# http://dummdida.tumblr.com/post/60924060451/binary-diff-between-libc-from-scientificlinux-and
-	$OBJDUMP -D $1 | sed "s/^[[:space:]]\+[0-9a-f]\+//" > $dis
+	$STRIP -g $1 -R __bug_table -R .note -R .comment -o $stripped
+	$OBJDUMP -D $stripped | sed -e "s/^[[:space:]]\+[0-9a-f]\+//" -e "s:^$stripped:$1:" > $dis
 }
 
 dorecord() {
@@ -73,6 +75,7 @@ dorecord() {
 
 	CMT="`git rev-parse --short HEAD`"
 
+	STRIP="${CROSS_COMPILE}strip"
 	OBJDUMP="${CROSS_COMPILE}objdump"
 
 	for d in $FILES; do
-- 
cgit v1.2.3


From 35288e30ebca59effbd44c5976afc71a342e9a85 Mon Sep 17 00:00:00 2001
From: Seung-Woo Kim <sw0312.kim@samsung.com>
Date: Fri, 10 Mar 2017 11:01:00 +0900
Subject: Kbuild: fix file name in comment about extra gcc checks

Extra gcc checks like W=1 were moved to scripts/Makefile.exrawarn,
so the file name in comment needs to be fixed.

Signed-off-by: Seung-Woo Kim <sw0312.kim@samsung.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 165cf9783a5d..faa9d263cfe3 100644
--- a/Makefile
+++ b/Makefile
@@ -707,7 +707,7 @@ KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
 else
 
 # These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.build)
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 endif
-- 
cgit v1.2.3


From 7c3f4309a0e5d70d164c60cd160262db21cb050d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 1 Mar 2017 14:07:21 +0000
Subject: dmaengine: mv_xor: remove redundant null check on cd

The check to see if cd is null is redundant, pdata->channels is
never null at this point, and hence &pdata->channels[i] cannot
be null, so remove the null check.

Detected by CoverityScan, CID#1357194 ("Logically Dead Code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/mv_xor.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 0cb951b743a6..ea53b879859c 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1580,11 +1580,6 @@ static int mv_xor_probe(struct platform_device *pdev)
 			int irq;
 
 			cd = &pdata->channels[i];
-			if (!cd) {
-				ret = -ENODEV;
-				goto err_channel_add;
-			}
-
 			irq = platform_get_irq(pdev, i);
 			if (irq < 0) {
 				ret = irq;
-- 
cgit v1.2.3


From 6f3125ceb66120796f62e7797a1a313418a5911a Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Wed, 8 Mar 2017 10:13:09 +0100
Subject: dmaengine: imx-sdma: advertise correct residue granularity

The SDMA hardware/driver does not actually report the transfer residue at
burst size granularity, but in fact is only able to report residue after
each finished segment.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/imx-sdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index d1651a50c349..46359c4be22d 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1832,7 +1832,7 @@ static int sdma_probe(struct platform_device *pdev)
 	sdma->dma_device.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	sdma->dma_device.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	sdma->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-	sdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	sdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
 	sdma->dma_device.device_issue_pending = sdma_issue_pending;
 	sdma->dma_device.dev->dma_parms = &sdma->dma_parms;
 	dma_set_max_seg_size(sdma->dma_device.dev, 65535);
-- 
cgit v1.2.3


From 676f9c26c330d087125769200bcf080c3c88488e Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 14 Mar 2017 00:59:11 +0900
Subject: dmaengine: xilinx: fix device_terminate_all() callback for AXI CDMA

The device_terminate_all() callback for this driver stops current DMA
operations by clearing RUNSTOP bit in the control register and waiting
HALTED bit set in the status register.

But AXI CDMA which is one of the supported DMA engine by this driver
does not provide the run / stop controls and those bits in the control
and status registers are reserved.  So when device_terminate_all() is
called, the error message is printed and the channel is marked as having
errors in xilinx_dma_halt().

This change adds stop_transfer() callback which differentiates CDMA and
other DMA engine.  The CDMA's one avoids the unsupported operations and
instead polls the status register to check if the DMA operations are in
progress for AXI CDMA.

Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Kedareswara rao Appana <appana.durga.rao@xilinx.com>
Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/xilinx/xilinx_dma.c | 49 +++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 8288fe4d17c3..df1676efdd73 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -331,6 +331,7 @@ struct xilinx_dma_tx_descriptor {
  * @seg_v: Statically allocated segments base
  * @cyclic_seg_v: Statically allocated segment base for cyclic transfers
  * @start_transfer: Differentiate b/w DMA IP's transfer
+ * @stop_transfer: Differentiate b/w DMA IP's quiesce
  */
 struct xilinx_dma_chan {
 	struct xilinx_dma_device *xdev;
@@ -361,6 +362,7 @@ struct xilinx_dma_chan {
 	struct xilinx_axidma_tx_segment *seg_v;
 	struct xilinx_axidma_tx_segment *cyclic_seg_v;
 	void (*start_transfer)(struct xilinx_dma_chan *chan);
+	int (*stop_transfer)(struct xilinx_dma_chan *chan);
 	u16 tdest;
 };
 
@@ -946,26 +948,32 @@ static bool xilinx_dma_is_idle(struct xilinx_dma_chan *chan)
 }
 
 /**
- * xilinx_dma_halt - Halt DMA channel
+ * xilinx_dma_stop_transfer - Halt DMA channel
  * @chan: Driver specific DMA channel
  */
-static void xilinx_dma_halt(struct xilinx_dma_chan *chan)
+static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan)
 {
-	int err;
 	u32 val;
 
 	dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP);
 
 	/* Wait for the hardware to halt */
-	err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
-				      (val & XILINX_DMA_DMASR_HALTED), 0,
-				      XILINX_DMA_LOOP_COUNT);
+	return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+				       val & XILINX_DMA_DMASR_HALTED, 0,
+				       XILINX_DMA_LOOP_COUNT);
+}
 
-	if (err) {
-		dev_err(chan->dev, "Cannot stop channel %p: %x\n",
-			chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
-		chan->err = true;
-	}
+/**
+ * xilinx_cdma_stop_transfer - Wait for the current transfer to complete
+ * @chan: Driver specific DMA channel
+ */
+static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan)
+{
+	u32 val;
+
+	return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+				       val & XILINX_DMA_DMASR_IDLE, 0,
+				       XILINX_DMA_LOOP_COUNT);
 }
 
 /**
@@ -2003,12 +2011,17 @@ static int xilinx_dma_terminate_all(struct dma_chan *dchan)
 {
 	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 	u32 reg;
+	int err;
 
 	if (chan->cyclic)
 		xilinx_dma_chan_reset(chan);
 
-	/* Halt the DMA engine */
-	xilinx_dma_halt(chan);
+	err = chan->stop_transfer(chan);
+	if (err) {
+		dev_err(chan->dev, "Cannot stop channel %p: %x\n",
+			chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
+		chan->err = true;
+	}
 
 	/* Remove and free all of the descriptors in the lists */
 	xilinx_dma_free_descriptors(chan);
@@ -2397,12 +2410,16 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 		return err;
 	}
 
-	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA)
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
 		chan->start_transfer = xilinx_dma_start_transfer;
-	else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA)
+		chan->stop_transfer = xilinx_dma_stop_transfer;
+	} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
 		chan->start_transfer = xilinx_cdma_start_transfer;
-	else
+		chan->stop_transfer = xilinx_cdma_stop_transfer;
+	} else {
 		chan->start_transfer = xilinx_vdma_start_transfer;
+		chan->stop_transfer = xilinx_dma_stop_transfer;
+	}
 
 	/* Initialize the tasklet */
 	tasklet_init(&chan->tasklet, xilinx_dma_do_tasklet,
-- 
cgit v1.2.3


From db6a3d03e22f3daf09d8d1532bd1d3747062a561 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 14 Mar 2017 00:59:12 +0900
Subject: dmaengine: xilinx: avoid memory corruption for
 device_prep_dma_memcpy()

The device_prep_dma_memcpy() callback for this driver allocates a new
xilinx_dma_tx_descriptor whose TX segments list is initialized as empty,
but then gets invalid TX segment pointer by list_last_entry() from the
empty TX segments list and memory corruption happens by the attempt to
update the next descriptor in invalid TX segment pointer.

This removes unnecessary memory access for nonexistent tail TX segment
which causes memory corruption.

Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Kedareswara rao Appana <appana.durga.rao@xilinx.com>
Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/xilinx/xilinx_dma.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index df1676efdd73..8cf87b1a284b 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -1661,7 +1661,7 @@ xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
 {
 	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 	struct xilinx_dma_tx_descriptor *desc;
-	struct xilinx_cdma_tx_segment *segment, *prev;
+	struct xilinx_cdma_tx_segment *segment;
 	struct xilinx_cdma_desc_hw *hw;
 
 	if (!len || len > XILINX_DMA_MAX_TRANS_LEN)
@@ -1688,21 +1688,11 @@ xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
 		hw->dest_addr_msb = upper_32_bits(dma_dst);
 	}
 
-	/* Fill the previous next descriptor with current */
-	prev = list_last_entry(&desc->segments,
-			       struct xilinx_cdma_tx_segment, node);
-	prev->hw.next_desc = segment->phys;
-
 	/* Insert the segment into the descriptor segments list. */
 	list_add_tail(&segment->node, &desc->segments);
 
-	prev = segment;
-
-	/* Link the last hardware descriptor with the first. */
-	segment = list_first_entry(&desc->segments,
-				struct xilinx_cdma_tx_segment, node);
 	desc->async_tx.phys = segment->phys;
-	prev->hw.next_desc = segment->phys;
+	hw->next_desc = segment->phys;
 
 	return &desc->async_tx;
 
-- 
cgit v1.2.3


From f78271dfb77353c4d045f9735deebe21839fb2ed Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 22 Jan 2017 23:02:32 +0900
Subject: kbuild: drop unneeded patterns '.*.orig' and '.*.rej' from distclean

The patterns '.*.orig' and '.*.rej' are cleaned away by '*.orig' and
'*.rej' seen two lines above.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index faa9d263cfe3..515aeea7e702 100644
--- a/Makefile
+++ b/Makefile
@@ -1315,8 +1315,8 @@ PHONY += distclean
 distclean: mrproper
 	@find $(srctree) $(RCS_FIND_IGNORE) \
 		\( -name '*.orig' -o -name '*.rej' -o -name '*~' \
-		-o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \
-		-o -name '.*.rej' -o -name '*%'  -o -name 'core' \) \
+		-o -name '*.bak' -o -name '#*#' -o -name '*%' \
+		-o -name 'core' \) \
 		-type f -print | xargs rm -f
 
 
-- 
cgit v1.2.3


From 6b244bbf0ddfcb5539bcf4e60d8799c926b0108d Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 15 Mar 2017 12:31:47 +0100
Subject: mtd: nand: sunxi: simplify optional reset handling

As of commit bb475230b8e5 ("reset: make optional functions really
optional"), the reset framework API calls use NULL pointers to describe
optional, non-present reset controls.

This allows to return errors from devm_reset_control_get_optional and to
call reset_control_(de)assert unconditionally.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/sunxi_nand.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index 0eeeb8b889ea..118a26fff368 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -2212,17 +2212,17 @@ static int sunxi_nfc_probe(struct platform_device *pdev)
 		goto out_ahb_clk_unprepare;
 
 	nfc->reset = devm_reset_control_get_optional(dev, "ahb");
-	if (!IS_ERR(nfc->reset)) {
-		ret = reset_control_deassert(nfc->reset);
-		if (ret) {
-			dev_err(dev, "reset err %d\n", ret);
-			goto out_mod_clk_unprepare;
-		}
-	} else if (PTR_ERR(nfc->reset) != -ENOENT) {
+	if (IS_ERR(nfc->reset)) {
 		ret = PTR_ERR(nfc->reset);
 		goto out_mod_clk_unprepare;
 	}
 
+	ret = reset_control_deassert(nfc->reset);
+	if (ret) {
+		dev_err(dev, "reset err %d\n", ret);
+		goto out_mod_clk_unprepare;
+	}
+
 	ret = sunxi_nfc_rst(nfc);
 	if (ret)
 		goto out_ahb_reset_reassert;
@@ -2262,8 +2262,7 @@ out_release_dmac:
 	if (nfc->dmac)
 		dma_release_channel(nfc->dmac);
 out_ahb_reset_reassert:
-	if (!IS_ERR(nfc->reset))
-		reset_control_assert(nfc->reset);
+	reset_control_assert(nfc->reset);
 out_mod_clk_unprepare:
 	clk_disable_unprepare(nfc->mod_clk);
 out_ahb_clk_unprepare:
@@ -2278,8 +2277,7 @@ static int sunxi_nfc_remove(struct platform_device *pdev)
 
 	sunxi_nand_chips_cleanup(nfc);
 
-	if (!IS_ERR(nfc->reset))
-		reset_control_assert(nfc->reset);
+	reset_control_assert(nfc->reset);
 
 	if (nfc->dmac)
 		dma_release_channel(nfc->dmac);
-- 
cgit v1.2.3


From 215157fb5338129b4b9f2dca23826084e71bcf4d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 23 Feb 2017 11:30:48 +0000
Subject: mtd: nand: nandsim: fix spelling mistake: "weakpagess" -> "weakpages"

trivial fix to spelling mistake in NS_ERR error message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nandsim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index c84742671a5f..cef818f535ed 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -901,7 +901,7 @@ static int parse_weakpages(void)
 		zero_ok = (*w == '0' ? 1 : 0);
 		page_no = simple_strtoul(w, &w, 0);
 		if (!zero_ok && !page_no) {
-			NS_ERR("invalid weakpagess.\n");
+			NS_ERR("invalid weakpages.\n");
 			return -EINVAL;
 		}
 		max_writes = 3;
-- 
cgit v1.2.3


From 1932a9646b777c448aec9d992995b0bb0648b11d Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 20 Feb 2017 14:10:07 +0100
Subject: mtd: nand: tango: Enforce DMA direction type

do_dma() uses an int to pass the DMA data direction information and
pass the same value to dmaengine_prep_slave_sg().

Currently, DMA_{FROM,TO}_DEVICE match DMA_{DEV_TO_MEM,MEM_TO_DEV}
definitions so it works fine, but assuming this will always be the case
is not safe.

Enforce enum dma_data_direction type in the function prototype and make
the enum dma_data_direction -> enum dma_transfer_direction conversion
explicit.

Reported-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
---
 drivers/mtd/nand/tango_nand.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c
index 4a5e948c62df..05b6e1065203 100644
--- a/drivers/mtd/nand/tango_nand.c
+++ b/drivers/mtd/nand/tango_nand.c
@@ -223,12 +223,13 @@ static void tango_dma_callback(void *arg)
 	complete(arg);
 }
 
-static int do_dma(struct tango_nfc *nfc, int dir, int cmd, const void *buf,
-		  int len, int page)
+static int do_dma(struct tango_nfc *nfc, enum dma_data_direction dir, int cmd,
+		  const void *buf, int len, int page)
 {
 	void __iomem *addr = nfc->reg_base + NFC_STATUS;
 	struct dma_chan *chan = nfc->chan;
 	struct dma_async_tx_descriptor *desc;
+	enum dma_transfer_direction tdir;
 	struct scatterlist sg;
 	struct completion tx_done;
 	int err = -EIO;
@@ -238,7 +239,8 @@ static int do_dma(struct tango_nfc *nfc, int dir, int cmd, const void *buf,
 	if (dma_map_sg(chan->device->dev, &sg, 1, dir) != 1)
 		return -EIO;
 
-	desc = dmaengine_prep_slave_sg(chan, &sg, 1, dir, DMA_PREP_INTERRUPT);
+	tdir = dir == DMA_TO_DEVICE ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
+	desc = dmaengine_prep_slave_sg(chan, &sg, 1, tdir, DMA_PREP_INTERRUPT);
 	if (!desc)
 		goto dma_unmap;
 
-- 
cgit v1.2.3


From 4edafe391e8914c19b92e500b72e1ea5aa3a26f2 Mon Sep 17 00:00:00 2001
From: Alison Wang <b18965@freescale.com>
Date: Mon, 13 Feb 2017 14:46:55 +0800
Subject: memory: ifc: Update dependency of IFC for LS1021A

As Freescale/NXP IFC controller is available on LS1021A, the dependency
for LS1021A is added.

LS1021A is an earlier product and is not compatible with later
LayerScape architecture. So ARCH_LAYERSCAPE can't cover LS1021A.

Signed-off-by: Alison Wang <alison.wang@nxp.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/memory/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig
index ec80e35c8dfe..fff8345f5bb0 100644
--- a/drivers/memory/Kconfig
+++ b/drivers/memory/Kconfig
@@ -115,7 +115,7 @@ config FSL_CORENET_CF
 
 config FSL_IFC
 	bool
-	depends on FSL_SOC || ARCH_LAYERSCAPE
+	depends on FSL_SOC || ARCH_LAYERSCAPE || SOC_LS1021A
 
 config JZ4780_NEMC
 	bool "Ingenic JZ4780 SoC NEMC driver"
-- 
cgit v1.2.3


From 01eb9ae1aeaf776653a861bb38aaa816a911da58 Mon Sep 17 00:00:00 2001
From: Alison Wang <b18965@freescale.com>
Date: Mon, 13 Feb 2017 14:46:56 +0800
Subject: mtd: nand: Update dependency of IFC for LS1021A

As NAND support for Freescale/NXP IFC controller is available on
LS1021A, the dependency for LS1021A is added.

LS1021A is an earlier product and is not compatible with later
LayerScape architecture. So ARCH_LAYERSCAPE can't cover LS1021A.

Signed-off-by: Alison Wang <alison.wang@nxp.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 1ac7f321203e..ae7ae77f295e 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -439,7 +439,7 @@ config MTD_NAND_FSL_ELBC
 
 config MTD_NAND_FSL_IFC
 	tristate "NAND support for Freescale IFC controller"
-	depends on FSL_SOC || ARCH_LAYERSCAPE
+	depends on FSL_SOC || ARCH_LAYERSCAPE || SOC_LS1021A
 	select FSL_IFC
 	select MEMORY
 	help
-- 
cgit v1.2.3


From 44dd182861f99f04171256f29b9dc51940757e5d Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Fri, 10 Feb 2017 15:01:10 +0100
Subject: mtd: nand: gpio: make nCE GPIO optional

On some hardware, the nCE signal is wired to the ChipSelect associated
to bus address of the NAND, so it is automatically driven during the
memory access and it is not managed by a GPIO.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/gpio.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index 0d24857469ab..85294f150f4f 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -78,7 +78,9 @@ static void gpio_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 	gpio_nand_dosync(gpiomtd);
 
 	if (ctrl & NAND_CTRL_CHANGE) {
-		gpio_set_value(gpiomtd->plat.gpio_nce, !(ctrl & NAND_NCE));
+		if (gpio_is_valid(gpiomtd->plat.gpio_nce))
+			gpio_set_value(gpiomtd->plat.gpio_nce,
+				       !(ctrl & NAND_NCE));
 		gpio_set_value(gpiomtd->plat.gpio_cle, !!(ctrl & NAND_CLE));
 		gpio_set_value(gpiomtd->plat.gpio_ale, !!(ctrl & NAND_ALE));
 		gpio_nand_dosync(gpiomtd);
@@ -201,7 +203,8 @@ static int gpio_nand_remove(struct platform_device *pdev)
 
 	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
 		gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
-	gpio_set_value(gpiomtd->plat.gpio_nce, 1);
+	if (gpio_is_valid(gpiomtd->plat.gpio_nce))
+		gpio_set_value(gpiomtd->plat.gpio_nce, 1);
 
 	return 0;
 }
@@ -239,10 +242,13 @@ static int gpio_nand_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_nce, "NAND NCE");
-	if (ret)
-		return ret;
-	gpio_direction_output(gpiomtd->plat.gpio_nce, 1);
+	if (gpio_is_valid(gpiomtd->plat.gpio_nce)) {
+		ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_nce,
+					"NAND NCE");
+		if (ret)
+			return ret;
+		gpio_direction_output(gpiomtd->plat.gpio_nce, 1);
+	}
 
 	if (gpio_is_valid(gpiomtd->plat.gpio_nwp)) {
 		ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_nwp,
-- 
cgit v1.2.3


From 7a8128e2e2aca47ded4347d97585129e19c82451 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Date: Sat, 11 Mar 2017 15:42:13 +0900
Subject: rtc: wm8350: Remove unused to_wm8350_from_rtc_dev

The to_wm8350_from_rtc_dev macro is not used by anything in the
rtc-wm8350 driver.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Acked-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-wm8350.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index fa247deb9cf4..483c7993516b 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -30,8 +30,6 @@
 #define WM8350_SET_TIME_RETRIES	5
 #define WM8350_GET_TIME_RETRIES	5
 
-#define to_wm8350_from_rtc_dev(d) container_of(d, struct wm8350, rtc.pdev.dev)
-
 /*
  * Read current time and date in RTC
  */
-- 
cgit v1.2.3


From 65e9e65cebbea755db638875a44b227a1027570e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 14 Mar 2017 10:56:30 +0300
Subject: rtc: cpcap: kfreeing devm allocated memory

We shouldn't kfree(rtc) because is devm_ managed memory.  It leads to a
double free.

Fixes: dd3bf50b35e3 ("rtc: cpcap: new rtc driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-By: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-cpcap.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c
index 7c6a3c3167bd..5d163be98e9f 100644
--- a/drivers/rtc/rtc-cpcap.c
+++ b/drivers/rtc/rtc-cpcap.c
@@ -266,10 +266,8 @@ static int cpcap_rtc_probe(struct platform_device *pdev)
 	rtc->rtc_dev = devm_rtc_device_register(dev, "cpcap_rtc",
 						&cpcap_rtc_ops, THIS_MODULE);
 
-	if (IS_ERR(rtc->rtc_dev)) {
-		kfree(rtc);
+	if (IS_ERR(rtc->rtc_dev))
 		return PTR_ERR(rtc->rtc_dev);
-	}
 
 	err = cpcap_get_vendor(dev, rtc->regmap, &rtc->vendor);
 	if (err)
-- 
cgit v1.2.3


From cf8178f78645148dc38b28263b9d3f604148e3a8 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 15 Mar 2017 17:10:10 +0000
Subject: x86/microcode/AMD: Remove redundant NULL check on mc

mc is a pointer to the static u8 array amd_ucode_patch and
therefore can never be null, so the check is redundant. Remove it.

Detected by CoverityScan, CID#1372871 ("Logically Dead Code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: kernel-janitors@vger.kernel.org
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/20170315171010.17536-1-colin.king@canonical.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/microcode/amd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 7889ae492af0..1d38e53c2d5c 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -352,8 +352,6 @@ void reload_ucode_amd(void)
 	u32 rev, dummy;
 
 	mc = (struct microcode_amd *)amd_ucode_patch;
-	if (!mc)
-		return;
 
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
-- 
cgit v1.2.3


From 9326947f2215e1816a9133b0b47e4c9200552777 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Thu, 23 Feb 2017 12:44:38 +0200
Subject: clk: tegra: Fix pll_a1 iddq register, add pll_a1

pll_a1 was using CLK_RST_CONTROLLER_PLLA1_MISC_0 for IDDQ control rather
than the correct register CLK_RST_CONTROLLER_PLLA1_MISC_1. Also add
pll_a1 to the set of clocks defined for Tegra210.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 2896d2e783ce..2ef8d49537c2 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -1772,7 +1772,7 @@ static struct tegra_clk_pll_params pll_a1_params = {
 	.misc_reg = PLLA1_MISC0,
 	.lock_mask = PLLCX_BASE_LOCK,
 	.lock_delay = 300,
-	.iddq_reg = PLLA1_MISC0,
+	.iddq_reg = PLLA1_MISC1,
 	.iddq_bit_idx = PLLCX_IDDQ_BIT,
 	.reset_reg = PLLA1_MISC0,
 	.reset_bit_idx = PLLCX_RESET_BIT,
@@ -2209,6 +2209,7 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_c4_out2] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT2, .present = true },
 	[tegra_clk_pll_c4_out3] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT3, .present = true },
 	[tegra_clk_apb2ape] = { .dt_id = TEGRA210_CLK_APB2APE, .present = true },
+	[tegra_clk_pll_a1] = { .dt_id = TEGRA210_CLK_PLL_A1, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
-- 
cgit v1.2.3


From 34ac2c278b306cc3006dd5cbfaff4ec52065bf6f Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Thu, 23 Feb 2017 12:44:39 +0200
Subject: clk: tegra: Fix ISP clock modelling

The 2 ISP clocks (ispa and ispb) share a mux/divider control. So model
this as 1 mux/divider clock and child gate clocks.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-id.h               |  1 +
 drivers/clk/tegra/clk-tegra-periph.c     | 11 +++++++++--
 drivers/clk/tegra/clk-tegra210.c         |  1 +
 include/dt-bindings/clock/tegra210-car.h |  4 ++--
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h
index 5738635c5274..1019eb8eff4d 100644
--- a/drivers/clk/tegra/clk-id.h
+++ b/drivers/clk/tegra/clk-id.h
@@ -307,6 +307,7 @@ enum clk_id {
 	tegra_clk_xusb_ssp_src,
 	tegra_clk_sclk_mux,
 	tegra_clk_sor_safe,
+	tegra_clk_ispa,
 	tegra_clk_max,
 };
 
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index 4ce4e7fb1124..19b00b77ecbe 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -168,6 +168,12 @@
 			      0, TEGRA_PERIPH_NO_GATE, _clk_id,\
 			      _parents##_idx, 0, _lock)
 
+#define MUX8_NOGATE(_name, _parents, _offset, _clk_id)	\
+	TEGRA_INIT_DATA_TABLE(_name, NULL, NULL, _parents, _offset,	\
+			      29, MASK(3), 0, 0, 8, 1, TEGRA_DIVIDER_ROUND_UP,\
+			      0, TEGRA_PERIPH_NO_GATE, _clk_id,\
+			      _parents##_idx, 0, NULL)
+
 #define INT(_name, _parents, _offset,	\
 			    _clk_num, _gate_flags, _clk_id)	\
 	TEGRA_INIT_DATA_TABLE(_name, NULL, NULL, _parents, _offset,\
@@ -739,7 +745,7 @@ static struct tegra_periph_init_data periph_clks[] = {
 	MUX8("soc_therm", mux_clkm_pllc_pllp_plla, CLK_SOURCE_SOC_THERM, 78, TEGRA_PERIPH_ON_APB, tegra_clk_soc_therm_8),
 	MUX8("vi_sensor", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_VI_SENSOR, 164, TEGRA_PERIPH_NO_RESET, tegra_clk_vi_sensor_8),
 	MUX8("isp", mux_pllm_pllc_pllp_plla_clkm_pllc4, CLK_SOURCE_ISP, 23, TEGRA_PERIPH_ON_APB, tegra_clk_isp_8),
-	MUX8("isp", mux_pllc_pllp_plla1_pllc2_c3_clkm_pllc4, CLK_SOURCE_ISP, 23, TEGRA_PERIPH_ON_APB, tegra_clk_isp_9),
+	MUX8_NOGATE("isp", mux_pllc_pllp_plla1_pllc2_c3_clkm_pllc4, CLK_SOURCE_ISP, tegra_clk_isp_9),
 	MUX8("entropy", mux_pllp_clkm1, CLK_SOURCE_ENTROPY, 149,  0, tegra_clk_entropy),
 	MUX8("entropy", mux_pllp_clkm_clk32_plle, CLK_SOURCE_ENTROPY, 149,  0, tegra_clk_entropy_8),
 	MUX8("hdmi_audio", mux_pllp3_pllc_clkm, CLK_SOURCE_HDMI_AUDIO, 176, TEGRA_PERIPH_NO_RESET, tegra_clk_hdmi_audio),
@@ -819,7 +825,8 @@ static struct tegra_periph_init_data gate_clks[] = {
 	GATE("xusb_dev", "xusb_dev_src", 95, 0, tegra_clk_xusb_dev, 0),
 	GATE("emc", "emc_mux", 57, 0, tegra_clk_emc, CLK_IGNORE_UNUSED),
 	GATE("sata_cold", "clk_m", 129, TEGRA_PERIPH_ON_APB, tegra_clk_sata_cold, 0),
-	GATE("ispb", "clk_m", 3, 0, tegra_clk_ispb, 0),
+	GATE("ispa", "isp", 23, 0, tegra_clk_ispa, 0),
+	GATE("ispb", "isp", 3, 0, tegra_clk_ispb, 0),
 	GATE("vim2_clk", "clk_m", 11, 0, tegra_clk_vim2_clk, 0),
 	GATE("pcie", "clk_m", 70, 0, tegra_clk_pcie, 0),
 	GATE("gpu", "pll_ref", 184, 0, tegra_clk_gpu, 0),
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 2ef8d49537c2..7bda8ba449a8 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -2210,6 +2210,7 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_c4_out3] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT3, .present = true },
 	[tegra_clk_apb2ape] = { .dt_id = TEGRA210_CLK_APB2APE, .present = true },
 	[tegra_clk_pll_a1] = { .dt_id = TEGRA210_CLK_PLL_A1, .present = true },
+	[tegra_clk_ispa] = { .dt_id = TEGRA210_CLK_ISPA, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
diff --git a/include/dt-bindings/clock/tegra210-car.h b/include/dt-bindings/clock/tegra210-car.h
index 35288b20f2c9..f5c6563ab2d6 100644
--- a/include/dt-bindings/clock/tegra210-car.h
+++ b/include/dt-bindings/clock/tegra210-car.h
@@ -39,7 +39,7 @@
 /* 20 (register bit affects vi and vi_sensor) */
 /* 21 */
 #define TEGRA210_CLK_USBD 22
-#define TEGRA210_CLK_ISP 23
+#define TEGRA210_CLK_ISPA 23
 /* 24 */
 /* 25 */
 #define TEGRA210_CLK_DISP2 26
@@ -349,7 +349,7 @@
 #define TEGRA210_CLK_PLL_RE_OUT1 319
 /* 320 */
 /* 321 */
-/* 322 */
+#define TEGRA210_CLK_ISP 322
 /* 323 */
 /* 324 */
 /* 325 */
-- 
cgit v1.2.3


From e7a49675e2c273e5490ac4a5187fa1eeddbdd4b4 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Thu, 23 Feb 2017 12:44:40 +0200
Subject: clk: tegra: Correct afi clock parent

The parent for afi is actually mselect, not clk_m.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra-periph.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index 19b00b77ecbe..c9e795b190f2 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -815,7 +815,7 @@ static struct tegra_periph_init_data gate_clks[] = {
 	GATE("usb2", "clk_m", 58, 0, tegra_clk_usb2, 0),
 	GATE("usb3", "clk_m", 59, 0, tegra_clk_usb3, 0),
 	GATE("csi", "pll_p_out3", 52, 0, tegra_clk_csi, 0),
-	GATE("afi", "clk_m", 72, 0, tegra_clk_afi, 0),
+	GATE("afi", "mselect", 72, 0, tegra_clk_afi, 0),
 	GATE("csus", "clk_m", 92, TEGRA_PERIPH_NO_RESET, tegra_clk_csus, 0),
 	GATE("dds", "clk_m", 150, TEGRA_PERIPH_ON_APB, tegra_clk_dds, 0),
 	GATE("dp2", "clk_m", 152, TEGRA_PERIPH_ON_APB, tegra_clk_dp2, 0),
-- 
cgit v1.2.3


From 8809eeac21c4fc8c964a4bbcda53c7228ad23370 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Thu, 23 Feb 2017 12:44:41 +0200
Subject: clk: tegra: Remove non-existing pll_m_out1 clock

This clock doesn't actually exist, so remove it.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 7bda8ba449a8..b7ef8a78eeb0 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -2115,7 +2115,6 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_c2] = { .dt_id = TEGRA210_CLK_PLL_C2, .present = true },
 	[tegra_clk_pll_c3] = { .dt_id = TEGRA210_CLK_PLL_C3, .present = true },
 	[tegra_clk_pll_m] = { .dt_id = TEGRA210_CLK_PLL_M, .present = true },
-	[tegra_clk_pll_m_out1] = { .dt_id = TEGRA210_CLK_PLL_M_OUT1, .present = true },
 	[tegra_clk_pll_p] = { .dt_id = TEGRA210_CLK_PLL_P, .present = true },
 	[tegra_clk_pll_p_out1] = { .dt_id = TEGRA210_CLK_PLL_P_OUT1, .present = true },
 	[tegra_clk_pll_p_out3] = { .dt_id = TEGRA210_CLK_PLL_P_OUT3, .present = true },
@@ -2229,7 +2228,6 @@ static struct tegra_devclk devclks[] __initdata = {
 	{ .con_id = "pll_p_out3", .dt_id = TEGRA210_CLK_PLL_P_OUT3 },
 	{ .con_id = "pll_p_out4", .dt_id = TEGRA210_CLK_PLL_P_OUT4 },
 	{ .con_id = "pll_m", .dt_id = TEGRA210_CLK_PLL_M },
-	{ .con_id = "pll_m_out1", .dt_id = TEGRA210_CLK_PLL_M_OUT1 },
 	{ .con_id = "pll_x", .dt_id = TEGRA210_CLK_PLL_X },
 	{ .con_id = "pll_x_out0", .dt_id = TEGRA210_CLK_PLL_X_OUT0 },
 	{ .con_id = "pll_u", .dt_id = TEGRA210_CLK_PLL_U },
@@ -2404,9 +2402,6 @@ static void __init tegra210_pll_init(void __iomem *clk_base,
 	clk_register_clkdev(clk, "pll_mb", NULL);
 	clks[TEGRA210_CLK_PLL_MB] = clk;
 
-	clk_register_clkdev(clk, "pll_m_out1", NULL);
-	clks[TEGRA210_CLK_PLL_M_OUT1] = clk;
-
 	/* PLLM_UD */
 	clk = clk_register_fixed_factor(NULL, "pll_m_ud", "pll_m",
 					CLK_SET_RATE_PARENT, 1, 1);
-- 
cgit v1.2.3


From 8dce89a1c2cf458875e0a703f3671bdb72b54c53 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Thu, 23 Feb 2017 12:44:42 +0200
Subject: clk: tegra: Don't warn for PLL defaults unnecessarily

If the PLL is on, only warn if the defaults are not yet set. Otherwise
be silent.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index b7ef8a78eeb0..fe698d2f9004 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -502,7 +502,7 @@ static void tegra210_pllcx_set_defaults(const char *name,
 	pllcx->params->defaults_set = true;
 
 	if (readl_relaxed(clk_base + pllcx->params->base_reg) &
-			PLL_ENABLE) {
+			PLL_ENABLE && !pllcx->params->defaults_set) {
 		/* PLL is ON: only check if defaults already set */
 		pllcx_check_defaults(pllcx->params);
 		pr_warn("%s already enabled. Postponing set full defaults\n",
@@ -608,7 +608,6 @@ static void tegra210_plld_set_defaults(struct tegra_clk_pll *plld)
 
 	if (readl_relaxed(clk_base + plld->params->base_reg) &
 			PLL_ENABLE) {
-		pr_warn("PLL_D already enabled. Postponing set full defaults\n");
 
 		/*
 		 * PLL is ON: check if defaults already set, then set those
@@ -625,6 +624,9 @@ static void tegra210_plld_set_defaults(struct tegra_clk_pll *plld)
 		_pll_misc_chk_default(clk_base, plld->params, 0, val,
 				~mask & PLLD_MISC0_WRITE_MASK);
 
+		if (!plld->params->defaults_set)
+			pr_warn("PLL_D already enabled. Postponing set full defaults\n");
+
 		/* Enable lock detect */
 		mask = PLLD_MISC0_LOCK_ENABLE | PLLD_MISC0_LOCK_OVERRIDE;
 		val = readl_relaxed(clk_base + plld->params->ext_misc_reg[0]);
@@ -896,7 +898,6 @@ static void tegra210_pllx_set_defaults(struct tegra_clk_pll *pllx)
 	val |= step_b << PLLX_MISC2_DYNRAMP_STEPB_SHIFT;
 
 	if (readl_relaxed(clk_base + pllx->params->base_reg) & PLL_ENABLE) {
-		pr_warn("PLL_X already enabled. Postponing set full defaults\n");
 
 		/*
 		 * PLL is ON: check if defaults already set, then set those
@@ -904,6 +905,8 @@ static void tegra210_pllx_set_defaults(struct tegra_clk_pll *pllx)
 		 */
 		pllx_check_defaults(pllx);
 
+		if (!pllx->params->defaults_set)
+			pr_warn("PLL_X already enabled. Postponing set full defaults\n");
 		/* Configure dyn ramp, disable lock override */
 		writel_relaxed(val, clk_base + pllx->params->ext_misc_reg[2]);
 
@@ -948,7 +951,6 @@ static void tegra210_pllmb_set_defaults(struct tegra_clk_pll *pllmb)
 	pllmb->params->defaults_set = true;
 
 	if (val & PLL_ENABLE) {
-		pr_warn("PLL_MB already enabled. Postponing set full defaults\n");
 
 		/*
 		 * PLL is ON: check if defaults already set, then set those
@@ -959,6 +961,8 @@ static void tegra210_pllmb_set_defaults(struct tegra_clk_pll *pllmb)
 		_pll_misc_chk_default(clk_base, pllmb->params, 0, val,
 				~mask & PLLMB_MISC1_WRITE_MASK);
 
+		if (!pllmb->params->defaults_set)
+			pr_warn("PLL_MB already enabled. Postponing set full defaults\n");
 		/* Enable lock detect */
 		val = readl_relaxed(clk_base + pllmb->params->ext_misc_reg[0]);
 		val &= ~mask;
@@ -1008,13 +1012,14 @@ static void tegra210_pllp_set_defaults(struct tegra_clk_pll *pllp)
 	pllp->params->defaults_set = true;
 
 	if (val & PLL_ENABLE) {
-		pr_warn("PLL_P already enabled. Postponing set full defaults\n");
 
 		/*
 		 * PLL is ON: check if defaults already set, then set those
 		 * that can be updated in flight.
 		 */
 		pllp_check_defaults(pllp, true);
+		if (!pllp->params->defaults_set)
+			pr_warn("PLL_P already enabled. Postponing set full defaults\n");
 
 		/* Enable lock detect */
 		val = readl_relaxed(clk_base + pllp->params->ext_misc_reg[0]);
@@ -1069,13 +1074,14 @@ static void tegra210_pllu_set_defaults(struct tegra_clk_pll *pllu)
 	pllu->params->defaults_set = true;
 
 	if (val & PLL_ENABLE) {
-		pr_warn("PLL_U already enabled. Postponing set full defaults\n");
 
 		/*
 		 * PLL is ON: check if defaults already set, then set those
 		 * that can be updated in flight.
 		 */
 		pllu_check_defaults(pllu, false);
+		if (!pllu->params->defaults_set)
+			pr_warn("PLL_U already enabled. Postponing set full defaults\n");
 
 		/* Enable lock detect */
 		val = readl_relaxed(clk_base + pllu->params->ext_misc_reg[0]);
-- 
cgit v1.2.3


From ef6ed2b9562c1b2354dbbbcfeacd37ce91111502 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Thu, 23 Feb 2017 12:44:43 +0200
Subject: clk: tegra: Correct tegra210_pll_fixed_mdiv_cfg rate calculation

Return the actually achieved rate in cfg->output_rate rather than just
the requested rate. This is important to make clk_round_rate() return
the correct result.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index fe698d2f9004..58d7f9ce9197 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -1222,6 +1222,7 @@ static int tegra210_pll_fixed_mdiv_cfg(struct clk_hw *hw,
 	cfg->n = p_rate / cf;
 
 	cfg->sdm_data = 0;
+	cfg->output_rate = input_rate;
 	if (params->sdm_ctrl_reg) {
 		unsigned long rem = p_rate - cf * cfg->n;
 		/* If ssc is enabled SDM enabled as well, even for integer n */
@@ -1232,10 +1233,15 @@ static int tegra210_pll_fixed_mdiv_cfg(struct clk_hw *hw,
 			s -= PLL_SDM_COEFF / 2;
 			cfg->sdm_data = sdin_din_to_data(s);
 		}
+		cfg->output_rate *= cfg->n * PLL_SDM_COEFF + PLL_SDM_COEFF/2 +
+					sdin_data_to_din(cfg->sdm_data);
+		cfg->output_rate /= p * cfg->m * PLL_SDM_COEFF;
+	} else {
+		cfg->output_rate *= cfg->n;
+		cfg->output_rate /= p * cfg->m;
 	}
 
 	cfg->input_rate = input_rate;
-	cfg->output_rate = rate;
 
 	return 0;
 }
-- 
cgit v1.2.3


From e589376dab0753bc24c20867cd4f65290ff76381 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Thu, 23 Feb 2017 12:44:44 +0200
Subject: clk: tegra: Fix type for m field

When used as part of fractional ndiv calculations, the current range is
not enough because the denominator of the fraction is multiplied with m.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h
index 6ba82ecffd4d..a62ea736398f 100644
--- a/drivers/clk/tegra/clk.h
+++ b/drivers/clk/tegra/clk.h
@@ -116,7 +116,7 @@ struct tegra_clk_pll_freq_table {
 	unsigned long	input_rate;
 	unsigned long	output_rate;
 	u32		n;
-	u16		m;
+	u32		m;
 	u8		p;
 	u8		cpcon;
 	u16		sdm_data;
-- 
cgit v1.2.3


From bfa34832df1fffb79c1719d4016e9cacf0f83b22 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 28 Feb 2017 16:37:17 +0200
Subject: clk: tegra: Add CEC clock

This clock is used to clock the HDMI CEC interface.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-id.h                      | 1 +
 drivers/clk/tegra/clk-tegra-periph.c            | 1 +
 drivers/clk/tegra/clk-tegra114.c                | 1 +
 drivers/clk/tegra/clk-tegra124.c                | 1 +
 drivers/clk/tegra/clk-tegra210.c                | 1 +
 drivers/clk/tegra/clk-tegra30.c                 | 1 +
 include/dt-bindings/clock/tegra114-car.h        | 2 +-
 include/dt-bindings/clock/tegra124-car-common.h | 2 +-
 include/dt-bindings/clock/tegra210-car.h        | 2 +-
 include/dt-bindings/clock/tegra30-car.h         | 2 +-
 10 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h
index 1019eb8eff4d..fc978b2bd1ce 100644
--- a/drivers/clk/tegra/clk-id.h
+++ b/drivers/clk/tegra/clk-id.h
@@ -308,6 +308,7 @@ enum clk_id {
 	tegra_clk_sclk_mux,
 	tegra_clk_sor_safe,
 	tegra_clk_ispa,
+	tegra_clk_cec,
 	tegra_clk_max,
 };
 
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index c9e795b190f2..a2aed27b3265 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -837,6 +837,7 @@ static struct tegra_periph_init_data gate_clks[] = {
 	GATE("pll_p_out_cpu", "pll_p", 223, 0, tegra_clk_pll_p_out_cpu, 0),
 	GATE("pll_p_out_adsp", "pll_p", 187, 0, tegra_clk_pll_p_out_adsp, 0),
 	GATE("apb2ape", "clk_m", 107, 0, tegra_clk_apb2ape, 0),
+	GATE("cec", "pclk", 136, 0, tegra_clk_cec, 0),
 };
 
 static struct tegra_periph_init_data div_clks[] = {
diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index 933b5dd698b8..fd1a99c05c2d 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/clk/tegra/clk-tegra114.c
@@ -819,6 +819,7 @@ static struct tegra_clk tegra114_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_clk_out_3_mux] = { .dt_id = TEGRA114_CLK_CLK_OUT_3_MUX, .present = true },
 	[tegra_clk_dsia_mux] = { .dt_id = TEGRA114_CLK_DSIA_MUX, .present = true },
 	[tegra_clk_dsib_mux] = { .dt_id = TEGRA114_CLK_DSIB_MUX, .present = true },
+	[tegra_clk_cec] = { .dt_id = TEGRA114_CLK_CEC, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c
index a112d3d2bff1..e81ea5b11577 100644
--- a/drivers/clk/tegra/clk-tegra124.c
+++ b/drivers/clk/tegra/clk-tegra124.c
@@ -928,6 +928,7 @@ static struct tegra_clk tegra124_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_clk_out_1_mux] = { .dt_id = TEGRA124_CLK_CLK_OUT_1_MUX, .present = true },
 	[tegra_clk_clk_out_2_mux] = { .dt_id = TEGRA124_CLK_CLK_OUT_2_MUX, .present = true },
 	[tegra_clk_clk_out_3_mux] = { .dt_id = TEGRA124_CLK_CLK_OUT_3_MUX, .present = true },
+	[tegra_clk_cec] = { .dt_id = TEGRA124_CLK_CEC, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 58d7f9ce9197..bdb296ad1151 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -2222,6 +2222,7 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_apb2ape] = { .dt_id = TEGRA210_CLK_APB2APE, .present = true },
 	[tegra_clk_pll_a1] = { .dt_id = TEGRA210_CLK_PLL_A1, .present = true },
 	[tegra_clk_ispa] = { .dt_id = TEGRA210_CLK_ISPA, .present = true },
+	[tegra_clk_cec] = { .dt_id = TEGRA210_CLK_CEC, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index 8e2db5ead8da..a2d163f759b4 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -817,6 +817,7 @@ static struct tegra_clk tegra30_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_p_out4] = { .dt_id = TEGRA30_CLK_PLL_P_OUT4, .present = true },
 	[tegra_clk_pll_a] = { .dt_id = TEGRA30_CLK_PLL_A, .present = true },
 	[tegra_clk_pll_a_out0] = { .dt_id = TEGRA30_CLK_PLL_A_OUT0, .present = true },
+	[tegra_clk_cec] = { .dt_id = TEGRA30_CLK_CEC, .present = true },
 };
 
 static const char *pll_e_parents[] = { "pll_ref", "pll_p" };
diff --git a/include/dt-bindings/clock/tegra114-car.h b/include/dt-bindings/clock/tegra114-car.h
index 534c03f8ad72..ed5ca218c857 100644
--- a/include/dt-bindings/clock/tegra114-car.h
+++ b/include/dt-bindings/clock/tegra114-car.h
@@ -156,7 +156,7 @@
 /* 133 */
 /* 134 */
 /* 135 */
-/* 136 */
+#define TEGRA114_CLK_CEC 136
 /* 137 */
 /* 138 */
 /* 139 */
diff --git a/include/dt-bindings/clock/tegra124-car-common.h b/include/dt-bindings/clock/tegra124-car-common.h
index a2156090563f..9352c7e2ce0b 100644
--- a/include/dt-bindings/clock/tegra124-car-common.h
+++ b/include/dt-bindings/clock/tegra124-car-common.h
@@ -156,7 +156,7 @@
 /* 133 */
 /* 134 */
 /* 135 */
-/* 136 */
+#define TEGRA124_CLK_CEC 136
 /* 137 */
 /* 138 */
 /* 139 */
diff --git a/include/dt-bindings/clock/tegra210-car.h b/include/dt-bindings/clock/tegra210-car.h
index f5c6563ab2d6..e7a2578831f7 100644
--- a/include/dt-bindings/clock/tegra210-car.h
+++ b/include/dt-bindings/clock/tegra210-car.h
@@ -156,7 +156,7 @@
 /* 133 */
 /* 134 */
 /* 135 */
-/* 136 */
+#define TEGRA210_CLK_CEC 136
 /* 137 */
 /* 138 */
 /* 139 */
diff --git a/include/dt-bindings/clock/tegra30-car.h b/include/dt-bindings/clock/tegra30-car.h
index 889e49ba0aa3..7213354b9652 100644
--- a/include/dt-bindings/clock/tegra30-car.h
+++ b/include/dt-bindings/clock/tegra30-car.h
@@ -156,7 +156,7 @@
 /* 133 */
 /* 134 */
 /* 135 */
-/* 136 */
+#define TEGRA30_CLK_CEC 136
 /* 137 */
 /* 138 */
 /* 139 */
-- 
cgit v1.2.3


From 319af7975c9ff500a30b2e6c4433c1f327283884 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 28 Feb 2017 16:37:18 +0200
Subject: clk: tegra: Define Tegra210 DMIC sync clocks

Tegra210 has 3 DMIC inputs which can be clocked from the recovered clock
of several other audio inputs (eg. i2s0, i2s1, ...). To model this, we
add a 3 new clocks similar to the audio* clocks which handle the same
function for the I2S and SPDIF clocks.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-id.h               |  6 +++
 drivers/clk/tegra/clk-tegra-audio.c      | 85 +++++++++++++++++++++++---------
 drivers/clk/tegra/clk-tegra210.c         |  6 +++
 include/dt-bindings/clock/tegra210-car.h |  9 +++-
 4 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h
index fc978b2bd1ce..b40293c19c19 100644
--- a/drivers/clk/tegra/clk-id.h
+++ b/drivers/clk/tegra/clk-id.h
@@ -309,6 +309,12 @@ enum clk_id {
 	tegra_clk_sor_safe,
 	tegra_clk_ispa,
 	tegra_clk_cec,
+	tegra_clk_dmic1_sync_clk,
+	tegra_clk_dmic2_sync_clk,
+	tegra_clk_dmic3_sync_clk,
+	tegra_clk_dmic1_sync_clk_mux,
+	tegra_clk_dmic2_sync_clk_mux,
+	tegra_clk_dmic3_sync_clk_mux,
 	tegra_clk_max,
 };
 
diff --git a/drivers/clk/tegra/clk-tegra-audio.c b/drivers/clk/tegra/clk-tegra-audio.c
index e2bfa9b368f6..b37cae7af26d 100644
--- a/drivers/clk/tegra/clk-tegra-audio.c
+++ b/drivers/clk/tegra/clk-tegra-audio.c
@@ -31,6 +31,9 @@
 #define AUDIO_SYNC_CLK_I2S3 0x4ac
 #define AUDIO_SYNC_CLK_I2S4 0x4b0
 #define AUDIO_SYNC_CLK_SPDIF 0x4b4
+#define AUDIO_SYNC_CLK_DMIC1 0x560
+#define AUDIO_SYNC_CLK_DMIC2 0x564
+#define AUDIO_SYNC_CLK_DMIC3 0x6b8
 
 #define AUDIO_SYNC_DOUBLER 0x49c
 
@@ -91,8 +94,14 @@ struct tegra_audio2x_clk_initdata {
 
 static DEFINE_SPINLOCK(clk_doubler_lock);
 
-static const char *mux_audio_sync_clk[] = { "spdif_in_sync", "i2s0_sync",
-	"i2s1_sync", "i2s2_sync", "i2s3_sync", "i2s4_sync", "vimclk_sync",
+static const char * const mux_audio_sync_clk[] = { "spdif_in_sync",
+	"i2s0_sync", "i2s1_sync", "i2s2_sync", "i2s3_sync", "i2s4_sync",
+	"pll_a_out0", "vimclk_sync",
+};
+
+static const char * const mux_dmic_sync_clk[] = { "unused", "i2s0_sync",
+	"i2s1_sync", "i2s2_sync", "i2s3_sync", "i2s4_sync", "pll_a_out0",
+	"vimclk_sync",
 };
 
 static struct tegra_sync_source_initdata sync_source_clks[] __initdata = {
@@ -114,6 +123,12 @@ static struct tegra_audio_clk_initdata audio_clks[] = {
 	AUDIO(spdif, AUDIO_SYNC_CLK_SPDIF),
 };
 
+static struct tegra_audio_clk_initdata dmic_clks[] = {
+	AUDIO(dmic1_sync_clk, AUDIO_SYNC_CLK_DMIC1),
+	AUDIO(dmic2_sync_clk, AUDIO_SYNC_CLK_DMIC2),
+	AUDIO(dmic3_sync_clk, AUDIO_SYNC_CLK_DMIC3),
+};
+
 static struct tegra_audio2x_clk_initdata audio2x_clks[] = {
 	AUDIO2X(audio0, 113, 24),
 	AUDIO2X(audio1, 114, 25),
@@ -123,6 +138,41 @@ static struct tegra_audio2x_clk_initdata audio2x_clks[] = {
 	AUDIO2X(spdif, 118, 29),
 };
 
+static void __init tegra_audio_sync_clk_init(void __iomem *clk_base,
+				      struct tegra_clk *tegra_clks,
+				      struct tegra_audio_clk_initdata *sync,
+				      int num_sync_clks,
+				      const char * const *mux_names,
+				      int num_mux_inputs)
+{
+	struct clk *clk;
+	struct clk **dt_clk;
+	struct tegra_audio_clk_initdata *data;
+	int i;
+
+	for (i = 0, data = sync; i < num_sync_clks; i++, data++) {
+		dt_clk = tegra_lookup_dt_id(data->mux_clk_id, tegra_clks);
+		if (!dt_clk)
+			continue;
+
+		clk = clk_register_mux(NULL, data->mux_name, mux_names,
+					num_mux_inputs,
+					CLK_SET_RATE_NO_REPARENT,
+					clk_base + data->offset, 0, 3, 0,
+					NULL);
+		*dt_clk = clk;
+
+		dt_clk = tegra_lookup_dt_id(data->gate_clk_id, tegra_clks);
+		if (!dt_clk)
+			continue;
+
+		clk = clk_register_gate(NULL, data->gate_name, data->mux_name,
+					0, clk_base + data->offset, 4,
+					CLK_GATE_SET_TO_DISABLE, NULL);
+		*dt_clk = clk;
+	}
+}
+
 void __init tegra_audio_clk_init(void __iomem *clk_base,
 			void __iomem *pmc_base, struct tegra_clk *tegra_clks,
 			struct tegra_audio_clk_info *audio_info,
@@ -176,30 +226,17 @@ void __init tegra_audio_clk_init(void __iomem *clk_base,
 		*dt_clk = clk;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(audio_clks); i++) {
-		struct tegra_audio_clk_initdata *data;
+	tegra_audio_sync_clk_init(clk_base, tegra_clks, audio_clks,
+				  ARRAY_SIZE(audio_clks), mux_audio_sync_clk,
+				  ARRAY_SIZE(mux_audio_sync_clk));
 
-		data = &audio_clks[i];
-		dt_clk = tegra_lookup_dt_id(data->mux_clk_id, tegra_clks);
+	/* make sure the DMIC sync clocks have a valid parent */
+	for (i = 0; i < ARRAY_SIZE(dmic_clks); i++)
+		writel_relaxed(1, clk_base + dmic_clks[i].offset);
 
-		if (!dt_clk)
-			continue;
-		clk = clk_register_mux(NULL, data->mux_name, mux_audio_sync_clk,
-					ARRAY_SIZE(mux_audio_sync_clk),
-					CLK_SET_RATE_NO_REPARENT,
-					clk_base + data->offset, 0, 3, 0,
-					NULL);
-		*dt_clk = clk;
-
-		dt_clk = tegra_lookup_dt_id(data->gate_clk_id, tegra_clks);
-		if (!dt_clk)
-			continue;
-
-		clk = clk_register_gate(NULL, data->gate_name, data->mux_name,
-					0, clk_base + data->offset, 4,
-					CLK_GATE_SET_TO_DISABLE, NULL);
-		*dt_clk = clk;
-	}
+	tegra_audio_sync_clk_init(clk_base, tegra_clks, dmic_clks,
+				  ARRAY_SIZE(dmic_clks), mux_dmic_sync_clk,
+				  ARRAY_SIZE(mux_dmic_sync_clk));
 
 	for (i = 0; i < ARRAY_SIZE(audio2x_clks); i++) {
 		struct tegra_audio2x_clk_initdata *data;
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index bdb296ad1151..ca63901a5416 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -2223,6 +2223,12 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_a1] = { .dt_id = TEGRA210_CLK_PLL_A1, .present = true },
 	[tegra_clk_ispa] = { .dt_id = TEGRA210_CLK_ISPA, .present = true },
 	[tegra_clk_cec] = { .dt_id = TEGRA210_CLK_CEC, .present = true },
+	[tegra_clk_dmic1_sync_clk] = { .dt_id = TEGRA210_CLK_DMIC1_SYNC_CLK, .present = true },
+	[tegra_clk_dmic2_sync_clk] = { .dt_id = TEGRA210_CLK_DMIC2_SYNC_CLK, .present = true },
+	[tegra_clk_dmic3_sync_clk] = { .dt_id = TEGRA210_CLK_DMIC3_SYNC_CLK, .present = true },
+	[tegra_clk_dmic1_sync_clk_mux] = { .dt_id = TEGRA210_CLK_DMIC1_SYNC_CLK_MUX, .present = true },
+	[tegra_clk_dmic2_sync_clk_mux] = { .dt_id = TEGRA210_CLK_DMIC2_SYNC_CLK_MUX, .present = true },
+	[tegra_clk_dmic3_sync_clk_mux] = { .dt_id = TEGRA210_CLK_DMIC3_SYNC_CLK_MUX, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
diff --git a/include/dt-bindings/clock/tegra210-car.h b/include/dt-bindings/clock/tegra210-car.h
index e7a2578831f7..5aa10278ea1b 100644
--- a/include/dt-bindings/clock/tegra210-car.h
+++ b/include/dt-bindings/clock/tegra210-car.h
@@ -396,6 +396,13 @@
 #define TEGRA210_CLK_PLL_C_UD 364
 #define TEGRA210_CLK_SCLK_MUX 365
 
-#define TEGRA210_CLK_CLK_MAX 366
+#define TEGRA210_CLK_DMIC1_SYNC_CLK 388
+#define TEGRA210_CLK_DMIC1_SYNC_CLK_MUX 389
+#define TEGRA210_CLK_DMIC2_SYNC_CLK 390
+#define TEGRA210_CLK_DMIC2_SYNC_CLK_MUX 391
+#define TEGRA210_CLK_DMIC3_SYNC_CLK 392
+#define TEGRA210_CLK_DMIC3_SYNC_CLK_MUX 393
+
+#define TEGRA210_CLK_CLK_MAX 394
 
 #endif	/* _DT_BINDINGS_CLOCK_TEGRA210_CAR_H */
-- 
cgit v1.2.3


From 9e8c93edd22cc466bfb71b7a73c8122df0f39597 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 28 Feb 2017 16:37:19 +0200
Subject: clk: tegra: Fix constness for peripheral clocks

checkpatch now warns for const ** and expects const * const * to be used
instead. This means we have to update the prototypes and function
declarations to handle this change.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-periph.c | 4 ++--
 drivers/clk/tegra/clk.h        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/tegra/clk-periph.c b/drivers/clk/tegra/clk-periph.c
index a17ca6d7f649..a5a5809dce6d 100644
--- a/drivers/clk/tegra/clk-periph.c
+++ b/drivers/clk/tegra/clk-periph.c
@@ -138,7 +138,7 @@ static const struct clk_ops tegra_clk_periph_no_gate_ops = {
 };
 
 static struct clk *_tegra_clk_register_periph(const char *name,
-			const char **parent_names, int num_parents,
+			const char * const *parent_names, int num_parents,
 			struct tegra_clk_periph *periph,
 			void __iomem *clk_base, u32 offset,
 			unsigned long flags)
@@ -186,7 +186,7 @@ static struct clk *_tegra_clk_register_periph(const char *name,
 }
 
 struct clk *tegra_clk_register_periph(const char *name,
-		const char **parent_names, int num_parents,
+		const char * const *parent_names, int num_parents,
 		struct tegra_clk_periph *periph, void __iomem *clk_base,
 		u32 offset, unsigned long flags)
 {
diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h
index a62ea736398f..8b09021eee00 100644
--- a/drivers/clk/tegra/clk.h
+++ b/drivers/clk/tegra/clk.h
@@ -586,7 +586,7 @@ struct tegra_clk_periph {
 
 extern const struct clk_ops tegra_clk_periph_ops;
 struct clk *tegra_clk_register_periph(const char *name,
-		const char **parent_names, int num_parents,
+		const char * const *parent_names, int num_parents,
 		struct tegra_clk_periph *periph, void __iomem *clk_base,
 		u32 offset, unsigned long flags);
 struct clk *tegra_clk_register_periph_nodiv(const char *name,
@@ -626,7 +626,7 @@ struct tegra_periph_init_data {
 	const char *name;
 	int clk_id;
 	union {
-		const char **parent_names;
+		const char *const *parent_names;
 		const char *parent_name;
 	} p;
 	int num_parents;
-- 
cgit v1.2.3


From 6cfc8bc9ee66677fbd1b3331167d6f520e30b6fd Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 28 Feb 2017 16:37:20 +0200
Subject: clk: tegra: Define Tegra210 DMIC clocks

Tegra210 has 3 inputs for Digital Microphones (DMICs). Provide the
required clocks for them.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-id.h           |  5 ++++-
 drivers/clk/tegra/clk-tegra-periph.c | 21 +++++++++++++++++++++
 drivers/clk/tegra/clk-tegra210.c     |  3 +++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h
index b40293c19c19..d3a79a8011ca 100644
--- a/drivers/clk/tegra/clk-id.h
+++ b/drivers/clk/tegra/clk-id.h
@@ -307,8 +307,11 @@ enum clk_id {
 	tegra_clk_xusb_ssp_src,
 	tegra_clk_sclk_mux,
 	tegra_clk_sor_safe,
-	tegra_clk_ispa,
 	tegra_clk_cec,
+	tegra_clk_ispa,
+	tegra_clk_dmic1,
+	tegra_clk_dmic2,
+	tegra_clk_dmic3,
 	tegra_clk_dmic1_sync_clk,
 	tegra_clk_dmic2_sync_clk,
 	tegra_clk_dmic3_sync_clk,
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index a2aed27b3265..9e6ac11ccf75 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -138,6 +138,9 @@
 #define CLK_SOURCE_TSECB 0x6d8
 #define CLK_SOURCE_MAUD 0x6d4
 #define CLK_SOURCE_USB2_HSIC_TRK 0x6cc
+#define CLK_SOURCE_DMIC1 0x64c
+#define CLK_SOURCE_DMIC2 0x650
+#define CLK_SOURCE_DMIC3 0x6bc
 
 #define MASK(x) (BIT(x) - 1)
 
@@ -625,6 +628,21 @@ static const char *mux_clkm_plldp_sor0lvds[] = {
 };
 #define mux_clkm_plldp_sor0lvds_idx NULL
 
+static const char * const mux_dmic1[] = {
+	"pll_a_out0", "dmic1_sync_clk", "pll_p", "clk_m"
+};
+#define mux_dmic1_idx NULL
+
+static const char * const mux_dmic2[] = {
+	"pll_a_out0", "dmic2_sync_clk", "pll_p", "clk_m"
+};
+#define mux_dmic2_idx NULL
+
+static const char * const mux_dmic3[] = {
+	"pll_a_out0", "dmic3_sync_clk", "pll_p", "clk_m"
+};
+#define mux_dmic3_idx NULL
+
 static struct tegra_periph_init_data periph_clks[] = {
 	AUDIO("d_audio", CLK_SOURCE_D_AUDIO, 106, TEGRA_PERIPH_ON_APB, tegra_clk_d_audio),
 	AUDIO("dam0", CLK_SOURCE_DAM0, 108, TEGRA_PERIPH_ON_APB, tegra_clk_dam0),
@@ -794,6 +812,9 @@ static struct tegra_periph_init_data periph_clks[] = {
 	MUX("uartape", mux_pllp_pllc_clkm, CLK_SOURCE_UARTAPE, 212, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_uartape),
 	MUX8("tsecb", mux_pllp_pllc2_c_c3_clkm, CLK_SOURCE_TSECB, 206, 0, tegra_clk_tsecb),
 	MUX8("maud", mux_pllp_pllp_out3_clkm_clk32k_plla, CLK_SOURCE_MAUD, 202, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_maud),
+	MUX8("dmic1", mux_dmic1, CLK_SOURCE_DMIC1, 161, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_dmic1),
+	MUX8("dmic2", mux_dmic2, CLK_SOURCE_DMIC2, 162, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_dmic2),
+	MUX8("dmic3", mux_dmic3, CLK_SOURCE_DMIC3, 197, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_dmic3),
 };
 
 static struct tegra_periph_init_data gate_clks[] = {
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index ca63901a5416..cfe70781e2b4 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -2223,6 +2223,9 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_a1] = { .dt_id = TEGRA210_CLK_PLL_A1, .present = true },
 	[tegra_clk_ispa] = { .dt_id = TEGRA210_CLK_ISPA, .present = true },
 	[tegra_clk_cec] = { .dt_id = TEGRA210_CLK_CEC, .present = true },
+	[tegra_clk_dmic1] = { .dt_id = TEGRA210_CLK_DMIC1, .present = true },
+	[tegra_clk_dmic2] = { .dt_id = TEGRA210_CLK_DMIC2, .present = true },
+	[tegra_clk_dmic3] = { .dt_id = TEGRA210_CLK_DMIC3, .present = true },
 	[tegra_clk_dmic1_sync_clk] = { .dt_id = TEGRA210_CLK_DMIC1_SYNC_CLK, .present = true },
 	[tegra_clk_dmic2_sync_clk] = { .dt_id = TEGRA210_CLK_DMIC2_SYNC_CLK, .present = true },
 	[tegra_clk_dmic3_sync_clk] = { .dt_id = TEGRA210_CLK_DMIC3_SYNC_CLK, .present = true },
-- 
cgit v1.2.3


From e827ba1840bc6a9540deb81c6df6943a19e0e891 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 28 Feb 2017 16:37:21 +0200
Subject: clk: tegra: Add super clock mux/divider

Add a super clock type which implements both mux and divider. This is
used for aclk.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-super.c | 87 +++++++++++++++++++++++++++++++++++++++++--
 drivers/clk/tegra/clk.h       |  7 +++-
 2 files changed, 89 insertions(+), 5 deletions(-)

diff --git a/drivers/clk/tegra/clk-super.c b/drivers/clk/tegra/clk-super.c
index 131d1b5085e2..84267cfc4433 100644
--- a/drivers/clk/tegra/clk-super.c
+++ b/drivers/clk/tegra/clk-super.c
@@ -121,9 +121,50 @@ out:
 	return err;
 }
 
+const struct clk_ops tegra_clk_super_mux_ops = {
+	.get_parent = clk_super_get_parent,
+	.set_parent = clk_super_set_parent,
+};
+
+static long clk_super_round_rate(struct clk_hw *hw, unsigned long rate,
+				 unsigned long *parent_rate)
+{
+	struct tegra_clk_super_mux *super = to_clk_super_mux(hw);
+	struct clk_hw *div_hw = &super->frac_div.hw;
+
+	__clk_hw_set_clk(div_hw, hw);
+
+	return super->div_ops->round_rate(div_hw, rate, parent_rate);
+}
+
+static unsigned long clk_super_recalc_rate(struct clk_hw *hw,
+					   unsigned long parent_rate)
+{
+	struct tegra_clk_super_mux *super = to_clk_super_mux(hw);
+	struct clk_hw *div_hw = &super->frac_div.hw;
+
+	__clk_hw_set_clk(div_hw, hw);
+
+	return super->div_ops->recalc_rate(div_hw, parent_rate);
+}
+
+static int clk_super_set_rate(struct clk_hw *hw, unsigned long rate,
+			      unsigned long parent_rate)
+{
+	struct tegra_clk_super_mux *super = to_clk_super_mux(hw);
+	struct clk_hw *div_hw = &super->frac_div.hw;
+
+	__clk_hw_set_clk(div_hw, hw);
+
+	return super->div_ops->set_rate(div_hw, rate, parent_rate);
+}
+
 const struct clk_ops tegra_clk_super_ops = {
 	.get_parent = clk_super_get_parent,
 	.set_parent = clk_super_set_parent,
+	.set_rate = clk_super_set_rate,
+	.round_rate = clk_super_round_rate,
+	.recalc_rate = clk_super_recalc_rate,
 };
 
 struct clk *tegra_clk_register_super_mux(const char *name,
@@ -136,13 +177,11 @@ struct clk *tegra_clk_register_super_mux(const char *name,
 	struct clk_init_data init;
 
 	super = kzalloc(sizeof(*super), GFP_KERNEL);
-	if (!super) {
-		pr_err("%s: could not allocate super clk\n", __func__);
+	if (!super)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	init.name = name;
-	init.ops = &tegra_clk_super_ops;
+	init.ops = &tegra_clk_super_mux_ops;
 	init.flags = flags;
 	init.parent_names = parent_names;
 	init.num_parents = num_parents;
@@ -163,3 +202,43 @@ struct clk *tegra_clk_register_super_mux(const char *name,
 
 	return clk;
 }
+
+struct clk *tegra_clk_register_super_clk(const char *name,
+		const char * const *parent_names, u8 num_parents,
+		unsigned long flags, void __iomem *reg, u8 clk_super_flags,
+		spinlock_t *lock)
+{
+	struct tegra_clk_super_mux *super;
+	struct clk *clk;
+	struct clk_init_data init;
+
+	super = kzalloc(sizeof(*super), GFP_KERNEL);
+	if (!super)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &tegra_clk_super_ops;
+	init.flags = flags;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+
+	super->reg = reg;
+	super->lock = lock;
+	super->width = 4;
+	super->flags = clk_super_flags;
+	super->frac_div.reg = reg + 4;
+	super->frac_div.shift = 16;
+	super->frac_div.width = 8;
+	super->frac_div.frac_width = 1;
+	super->frac_div.lock = lock;
+	super->div_ops = &tegra_clk_frac_div_ops;
+
+	/* Data in .init is copied by clk_register(), so stack variable OK */
+	super->hw.init = &init;
+
+	clk = clk_register(NULL, &super->hw);
+	if (IS_ERR(clk))
+		kfree(super);
+
+	return clk;
+}
diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h
index 8b09021eee00..960e47e41fdf 100644
--- a/drivers/clk/tegra/clk.h
+++ b/drivers/clk/tegra/clk.h
@@ -686,6 +686,8 @@ struct tegra_periph_init_data {
 struct tegra_clk_super_mux {
 	struct clk_hw	hw;
 	void __iomem	*reg;
+	struct tegra_clk_frac_div frac_div;
+	const struct clk_ops	*div_ops;
 	u8		width;
 	u8		flags;
 	u8		div2_index;
@@ -702,7 +704,10 @@ struct clk *tegra_clk_register_super_mux(const char *name,
 		const char **parent_names, u8 num_parents,
 		unsigned long flags, void __iomem *reg, u8 clk_super_flags,
 		u8 width, u8 pllx_index, u8 div2_index, spinlock_t *lock);
-
+struct clk *tegra_clk_register_super_clk(const char *name,
+		const char * const *parent_names, u8 num_parents,
+		unsigned long flags, void __iomem *reg, u8 clk_super_flags,
+		spinlock_t *lock);
 /**
  * struct clk_init_table - clock initialization table
  * @clk_id:	clock id as mentioned in device tree bindings
-- 
cgit v1.2.3


From 24c3ebef1ab6d5620eaaa51f69223118cac97db6 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 28 Feb 2017 16:37:22 +0200
Subject: clk: tegra: Add aclk

This clock clocks the ADSP Cortex-A9.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210.c         | 10 ++++++++++
 include/dt-bindings/clock/tegra210-car.h |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index cfe70781e2b4..9a2512a6b419 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -2308,6 +2308,11 @@ static struct tegra_audio_clk_info tegra210_audio_plls[] = {
 
 static struct clk **clks;
 
+static const char * const aclk_parents[] = {
+	"pll_a1", "pll_c", "pll_p", "pll_a_out0", "pll_c2", "pll_c3",
+	"clk_m"
+};
+
 static __init void tegra210_periph_clk_init(void __iomem *clk_base,
 					    void __iomem *pmc_base)
 {
@@ -2369,6 +2374,11 @@ static __init void tegra210_periph_clk_init(void __iomem *clk_base,
 	clk_register_clkdev(clk, "cml1", NULL);
 	clks[TEGRA210_CLK_CML1] = clk;
 
+	clk = tegra_clk_register_super_clk("aclk", aclk_parents,
+				ARRAY_SIZE(aclk_parents), 0, clk_base + 0x6e0,
+				0, NULL);
+	clks[TEGRA210_CLK_ACLK] = clk;
+
 	tegra_periph_clk_init(clk_base, pmc_base, tegra210_clks, &pll_p_params);
 }
 
diff --git a/include/dt-bindings/clock/tegra210-car.h b/include/dt-bindings/clock/tegra210-car.h
index 5aa10278ea1b..8744b19cca3e 100644
--- a/include/dt-bindings/clock/tegra210-car.h
+++ b/include/dt-bindings/clock/tegra210-car.h
@@ -396,6 +396,8 @@
 #define TEGRA210_CLK_PLL_C_UD 364
 #define TEGRA210_CLK_SCLK_MUX 365
 
+#define TEGRA210_CLK_ACLK 370
+
 #define TEGRA210_CLK_DMIC1_SYNC_CLK 388
 #define TEGRA210_CLK_DMIC1_SYNC_CLK_MUX 389
 #define TEGRA210_CLK_DMIC2_SYNC_CLK 390
-- 
cgit v1.2.3


From 3843832fc8cadc2d48ba4ea4cd350a696906ac42 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 28 Feb 2017 17:19:24 +0200
Subject: clk: tegra: Handle UTMIPLL IDDQ

Export UTMIPLL IDDQ functions. These will be needed when powergating the
XUSB partition.

Signed-off-by: BH Hsieh <bhsieh@nvidia.com>
Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210.c | 26 ++++++++++++++++++++++++++
 include/linux/clk/tegra.h        |  2 ++
 2 files changed, 28 insertions(+)

diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 9a2512a6b419..f89d2a912273 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -2313,6 +2313,32 @@ static const char * const aclk_parents[] = {
 	"clk_m"
 };
 
+void tegra210_put_utmipll_in_iddq(void)
+{
+	u32 reg;
+
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+
+	if (reg & UTMIPLL_HW_PWRDN_CFG0_UTMIPLL_LOCK) {
+		pr_err("trying to assert IDDQ while UTMIPLL is locked\n");
+		return;
+	}
+
+	reg |= UTMIPLL_HW_PWRDN_CFG0_IDDQ_OVERRIDE;
+	writel_relaxed(reg, clk_base + UTMIPLL_HW_PWRDN_CFG0);
+}
+EXPORT_SYMBOL_GPL(tegra210_put_utmipll_in_iddq);
+
+void tegra210_put_utmipll_out_iddq(void)
+{
+	u32 reg;
+
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+	reg &= ~UTMIPLL_HW_PWRDN_CFG0_IDDQ_OVERRIDE;
+	writel_relaxed(reg, clk_base + UTMIPLL_HW_PWRDN_CFG0);
+}
+EXPORT_SYMBOL_GPL(tegra210_put_utmipll_out_iddq);
+
 static __init void tegra210_periph_clk_init(void __iomem *clk_base,
 					    void __iomem *pmc_base)
 {
diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h
index 7007a5f48080..e17d32831e28 100644
--- a/include/linux/clk/tegra.h
+++ b/include/linux/clk/tegra.h
@@ -125,5 +125,7 @@ extern void tegra210_xusb_pll_hw_control_enable(void);
 extern void tegra210_xusb_pll_hw_sequence_start(void);
 extern void tegra210_sata_pll_hw_control_enable(void);
 extern void tegra210_sata_pll_hw_sequence_start(void);
+extern void tegra210_put_utmipll_in_iddq(void);
+extern void tegra210_put_utmipll_out_iddq(void);
 
 #endif /* __LINUX_CLK_TEGRA_H_ */
-- 
cgit v1.2.3


From 9619dba8325fce098bbc9ee2911d1b0150fec0c9 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Thu, 2 Mar 2017 15:22:05 +0200
Subject: clk: tegra: Fix disable unused for clocks sharing enable bit

In case 2 clocks share an enable bit and one of them is enabled by a
driver and the other one is not, CCF will think it's enabled because it
will only look at the HW state. Therefore it will disable the clock and
thus also disable the other clock which was enabled. Solve this by
reading the initial state of the enable bit and incrementing the
refcount if it's set.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-periph-gate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/clk/tegra/clk-periph-gate.c b/drivers/clk/tegra/clk-periph-gate.c
index 88127828befe..303ef32ee3f1 100644
--- a/drivers/clk/tegra/clk-periph-gate.c
+++ b/drivers/clk/tegra/clk-periph-gate.c
@@ -159,6 +159,9 @@ struct clk *tegra_clk_register_periph_gate(const char *name,
 	gate->enable_refcnt = enable_refcnt;
 	gate->regs = pregs;
 
+	if (read_enb(gate) & periph_clk_to_bit(gate))
+		enable_refcnt[clk_num]++;
+
 	/* Data in .init is copied by clk_register(), so stack variable OK */
 	gate->hw.init = &init;
 
-- 
cgit v1.2.3


From 4236e752f19d4dae372336859a18ca8a5bed9374 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 2 Mar 2017 16:16:16 +0200
Subject: clk: tegra: Implement reset control reset

For completeness, also implement this reset framework API for Tegra.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Arto Merilainen <amerilainen@nvidia.com>
Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c
index b2cdd9a235f4..ba923f0d5953 100644
--- a/drivers/clk/tegra/clk.c
+++ b/drivers/clk/tegra/clk.c
@@ -17,6 +17,7 @@
 #include <linux/clkdev.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/delay.h>
 #include <linux/of.h>
 #include <linux/clk/tegra.h>
 #include <linux/reset-controller.h>
@@ -182,6 +183,20 @@ static int tegra_clk_rst_deassert(struct reset_controller_dev *rcdev,
 	return -EINVAL;
 }
 
+static int tegra_clk_rst_reset(struct reset_controller_dev *rcdev,
+		unsigned long id)
+{
+	int err;
+
+	err = tegra_clk_rst_assert(rcdev, id);
+	if (err)
+		return err;
+
+	udelay(1);
+
+	return tegra_clk_rst_deassert(rcdev, id);
+}
+
 const struct tegra_clk_periph_regs *get_reg_bank(int clkid)
 {
 	int reg_bank = clkid / 32;
@@ -274,6 +289,7 @@ void __init tegra_init_from_table(struct tegra_clk_init_table *tbl,
 static const struct reset_control_ops rst_ops = {
 	.assert = tegra_clk_rst_assert,
 	.deassert = tegra_clk_rst_deassert,
+	.reset = tegra_clk_rst_reset,
 };
 
 static struct reset_controller_dev rst_ctlr = {
-- 
cgit v1.2.3


From e745f992cf4b030ef73c5cdceecd03d9d3c727e9 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 14 Mar 2017 16:12:49 +0200
Subject: clk: tegra: Rework pll_u

In normal operation pll_u is under hardware control and has a fixed rate
of 480MHz. Hardware will turn on pll_u on whenever any of the XUSB
powerdomains is on. From a software point of view we model this is if
pll_u is always on using a fixed rate clock. However the bootloader
might or might not have configured pll_u this way. So we will check the
current state of pll_u at boot and reconfigure it if required.

There are 3 possiblities at kernel boot:
1) pll_u is under hardware control: do nothing
2) pll_u is under hardware control and enabled: enable hardware control
3) pll_u is disabled: enable pll_u and enable hardware control

In all cases we also check if UTMIPLL is under hardware control at boot
and configure it for hardware control if that is not the case.
The same is done during SC7 resume.

Thanks to Joseph Lo <josephl@nvidia.com> for bug fixes.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-pll.c      | 174 -----------------------
 drivers/clk/tegra/clk-tegra210.c | 295 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 272 insertions(+), 197 deletions(-)

diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c
index b3855360d6bc..159a854779e6 100644
--- a/drivers/clk/tegra/clk-pll.c
+++ b/drivers/clk/tegra/clk-pll.c
@@ -2517,152 +2517,6 @@ static int clk_plle_tegra210_is_enabled(struct clk_hw *hw)
 	return val & PLLE_BASE_ENABLE ? 1 : 0;
 }
 
-static int clk_pllu_tegra210_enable(struct clk_hw *hw)
-{
-	struct tegra_clk_pll *pll = to_clk_pll(hw);
-	struct clk_hw *pll_ref = clk_hw_get_parent(hw);
-	struct clk_hw *osc = clk_hw_get_parent(pll_ref);
-	const struct utmi_clk_param *params = NULL;
-	unsigned long flags = 0, input_rate;
-	unsigned int i;
-	int ret = 0;
-	u32 value;
-
-	if (!osc) {
-		pr_err("%s: failed to get OSC clock\n", __func__);
-		return -EINVAL;
-	}
-
-	input_rate = clk_hw_get_rate(osc);
-
-	if (pll->lock)
-		spin_lock_irqsave(pll->lock, flags);
-
-	_clk_pll_enable(hw);
-
-	ret = clk_pll_wait_for_lock(pll);
-	if (ret < 0)
-		goto out;
-
-	for (i = 0; i < ARRAY_SIZE(utmi_parameters); i++) {
-		if (input_rate == utmi_parameters[i].osc_frequency) {
-			params = &utmi_parameters[i];
-			break;
-		}
-	}
-
-	if (!params) {
-		pr_err("%s: unexpected input rate %lu Hz\n", __func__,
-		       input_rate);
-		ret = -EINVAL;
-		goto out;
-	}
-
-	value = pll_readl_base(pll);
-	value &= ~PLLU_BASE_OVERRIDE;
-	pll_writel_base(value, pll);
-
-	/* Put PLLU under HW control */
-	value = readl_relaxed(pll->clk_base + PLLU_HW_PWRDN_CFG0);
-	value |= PLLU_HW_PWRDN_CFG0_IDDQ_PD_INCLUDE |
-	         PLLU_HW_PWRDN_CFG0_USE_SWITCH_DETECT |
-	         PLLU_HW_PWRDN_CFG0_USE_LOCKDET;
-	value &= ~(PLLU_HW_PWRDN_CFG0_CLK_ENABLE_SWCTL |
-		   PLLU_HW_PWRDN_CFG0_CLK_SWITCH_SWCTL);
-	writel_relaxed(value, pll->clk_base + PLLU_HW_PWRDN_CFG0);
-
-	value = readl_relaxed(pll->clk_base + XUSB_PLL_CFG0);
-	value &= ~XUSB_PLL_CFG0_PLLU_LOCK_DLY;
-	writel_relaxed(value, pll->clk_base + XUSB_PLL_CFG0);
-
-	udelay(1);
-
-	value = readl_relaxed(pll->clk_base + PLLU_HW_PWRDN_CFG0);
-	value |= PLLU_HW_PWRDN_CFG0_SEQ_ENABLE;
-	writel_relaxed(value, pll->clk_base + PLLU_HW_PWRDN_CFG0);
-
-	udelay(1);
-
-	/* Disable PLLU clock branch to UTMIPLL since it uses OSC */
-	value = pll_readl_base(pll);
-	value &= ~PLLU_BASE_CLKENABLE_USB;
-	pll_writel_base(value, pll);
-
-	value = readl_relaxed(pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-	if (value & UTMIPLL_HW_PWRDN_CFG0_SEQ_ENABLE) {
-		pr_debug("UTMIPLL already enabled\n");
-		goto out;
-	}
-
-	value &= ~UTMIPLL_HW_PWRDN_CFG0_IDDQ_OVERRIDE;
-	writel_relaxed(value, pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-
-	/* Program UTMIP PLL stable and active counts */
-	value = readl_relaxed(pll->clk_base + UTMIP_PLL_CFG2);
-	value &= ~UTMIP_PLL_CFG2_STABLE_COUNT(~0);
-	value |= UTMIP_PLL_CFG2_STABLE_COUNT(params->stable_count);
-	value &= ~UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(~0);
-	value |= UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(params->active_delay_count);
-	value |= UTMIP_PLL_CFG2_PHY_XTAL_CLOCKEN;
-	writel_relaxed(value, pll->clk_base + UTMIP_PLL_CFG2);
-
-	/* Program UTMIP PLL delay and oscillator frequency counts */
-	value = readl_relaxed(pll->clk_base + UTMIP_PLL_CFG1);
-	value &= ~UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(~0);
-	value |= UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(params->enable_delay_count);
-	value &= ~UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(~0);
-	value |= UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(params->xtal_freq_count);
-	writel_relaxed(value, pll->clk_base + UTMIP_PLL_CFG1);
-
-	/* Remove power downs from UTMIP PLL control bits */
-	value = readl_relaxed(pll->clk_base + UTMIP_PLL_CFG1);
-	value &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN;
-	value |= UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERUP;
-	writel(value, pll->clk_base + UTMIP_PLL_CFG1);
-
-	udelay(1);
-
-	/* Enable samplers for SNPS, XUSB_HOST, XUSB_DEV */
-	value = readl_relaxed(pll->clk_base + UTMIP_PLL_CFG2);
-	value |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERUP;
-	value |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERUP;
-	value |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERUP;
-	value &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERDOWN;
-	value &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERDOWN;
-	value &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERDOWN;
-	writel_relaxed(value, pll->clk_base + UTMIP_PLL_CFG2);
-
-	/* Setup HW control of UTMIPLL */
-	value = readl_relaxed(pll->clk_base + UTMIP_PLL_CFG1);
-	value &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERUP;
-	value &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN;
-	writel_relaxed(value, pll->clk_base + UTMIP_PLL_CFG1);
-
-	value = readl_relaxed(pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-	value |= UTMIPLL_HW_PWRDN_CFG0_USE_LOCKDET;
-	value &= ~UTMIPLL_HW_PWRDN_CFG0_CLK_ENABLE_SWCTL;
-	writel_relaxed(value, pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-
-	udelay(1);
-
-	value = readl_relaxed(pll->clk_base + XUSB_PLL_CFG0);
-	value &= ~XUSB_PLL_CFG0_UTMIPLL_LOCK_DLY;
-	writel_relaxed(value, pll->clk_base + XUSB_PLL_CFG0);
-
-	udelay(1);
-
-	/* Enable HW control of UTMIPLL */
-	value = readl_relaxed(pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-	value |= UTMIPLL_HW_PWRDN_CFG0_SEQ_ENABLE;
-	writel_relaxed(value, pll->clk_base + UTMIPLL_HW_PWRDN_CFG0);
-
-out:
-	if (pll->lock)
-		spin_unlock_irqrestore(pll->lock, flags);
-
-	return ret;
-}
-
 static const struct clk_ops tegra_clk_plle_tegra210_ops = {
 	.is_enabled =  clk_plle_tegra210_is_enabled,
 	.enable = clk_plle_tegra210_enable,
@@ -2670,13 +2524,6 @@ static const struct clk_ops tegra_clk_plle_tegra210_ops = {
 	.recalc_rate = clk_pll_recalc_rate,
 };
 
-static const struct clk_ops tegra_clk_pllu_tegra210_ops = {
-	.is_enabled =  clk_pll_is_enabled,
-	.enable = clk_pllu_tegra210_enable,
-	.disable = clk_pll_disable,
-	.recalc_rate = clk_pllre_recalc_rate,
-};
-
 struct clk *tegra_clk_register_plle_tegra210(const char *name,
 				const char *parent_name,
 				void __iomem *clk_base, unsigned long flags,
@@ -2918,25 +2765,4 @@ struct clk *tegra_clk_register_pllmb(const char *name, const char *parent_name,
 	return clk;
 }
 
-struct clk *tegra_clk_register_pllu_tegra210(const char *name,
-		const char *parent_name, void __iomem *clk_base,
-		unsigned long flags, struct tegra_clk_pll_params *pll_params,
-		spinlock_t *lock)
-{
-	struct tegra_clk_pll *pll;
-	struct clk *clk;
-
-	pll_params->flags |= TEGRA_PLLU;
-
-	pll = _tegra_init_pll(clk_base, NULL, pll_params, lock);
-	if (IS_ERR(pll))
-		return ERR_CAST(pll);
-
-	clk = _tegra_clk_register_pll(pll, name, parent_name, flags,
-				      &tegra_clk_pllu_tegra210_ops);
-	if (IS_ERR(clk))
-		kfree(pll);
-
-	return clk;
-}
 #endif
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index f89d2a912273..1a8121abd427 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/clk/tegra.h>
 #include <dt-bindings/clock/tegra210-car.h>
+#include <linux/iopoll.h>
 
 #include "clk.h"
 #include "clk-id.h"
@@ -155,6 +156,27 @@
 #define PMC_PLLM_WB0_OVERRIDE 0x1dc
 #define PMC_PLLM_WB0_OVERRIDE_2 0x2b0
 
+#define UTMIP_PLL_CFG2 0x488
+#define UTMIP_PLL_CFG2_STABLE_COUNT(x) (((x) & 0xfff) << 6)
+#define UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(x) (((x) & 0x3f) << 18)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERDOWN BIT(0)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERUP BIT(1)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERDOWN BIT(2)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERUP BIT(3)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_C_POWERDOWN BIT(4)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_C_POWERUP BIT(5)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERDOWN BIT(24)
+#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERUP BIT(25)
+
+#define UTMIP_PLL_CFG1 0x484
+#define UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(x) (((x) & 0x1f) << 27)
+#define UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(x) (((x) & 0xfff) << 0)
+#define UTMIP_PLL_CFG1_FORCE_PLLU_POWERUP BIT(17)
+#define UTMIP_PLL_CFG1_FORCE_PLLU_POWERDOWN BIT(16)
+#define UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERUP BIT(15)
+#define UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN BIT(14)
+#define UTMIP_PLL_CFG1_FORCE_PLL_ACTIVE_POWERDOWN BIT(12)
+
 #define SATA_PLL_CFG0				0x490
 #define SATA_PLL_CFG0_PADPLL_RESET_SWCTL	BIT(0)
 #define SATA_PLL_CFG0_PADPLL_USE_LOCKDET	BIT(2)
@@ -1051,27 +1073,28 @@ static void tegra210_pllp_set_defaults(struct tegra_clk_pll *pllp)
  * Both VCO and post-divider output rates are fixed at 480MHz and 240MHz,
  * respectively.
  */
-static void pllu_check_defaults(struct tegra_clk_pll *pll, bool hw_control)
+static void pllu_check_defaults(struct tegra_clk_pll_params *params,
+				bool hw_control)
 {
 	u32 val, mask;
 
 	/* Ignore lock enable (will be set) and IDDQ if under h/w control */
 	val = PLLU_MISC0_DEFAULT_VALUE & (~PLLU_MISC0_IDDQ);
 	mask = PLLU_MISC0_LOCK_ENABLE | (hw_control ? PLLU_MISC0_IDDQ : 0);
-	_pll_misc_chk_default(clk_base, pll->params, 0, val,
+	_pll_misc_chk_default(clk_base, params, 0, val,
 			~mask & PLLU_MISC0_WRITE_MASK);
 
 	val = PLLU_MISC1_DEFAULT_VALUE;
 	mask = PLLU_MISC1_LOCK_OVERRIDE;
-	_pll_misc_chk_default(clk_base, pll->params, 1, val,
+	_pll_misc_chk_default(clk_base, params, 1, val,
 			~mask & PLLU_MISC1_WRITE_MASK);
 }
 
-static void tegra210_pllu_set_defaults(struct tegra_clk_pll *pllu)
+static void tegra210_pllu_set_defaults(struct tegra_clk_pll_params *pllu)
 {
-	u32 val = readl_relaxed(clk_base + pllu->params->base_reg);
+	u32 val = readl_relaxed(clk_base + pllu->base_reg);
 
-	pllu->params->defaults_set = true;
+	pllu->defaults_set = true;
 
 	if (val & PLL_ENABLE) {
 
@@ -1080,19 +1103,19 @@ static void tegra210_pllu_set_defaults(struct tegra_clk_pll *pllu)
 		 * that can be updated in flight.
 		 */
 		pllu_check_defaults(pllu, false);
-		if (!pllu->params->defaults_set)
+		if (!pllu->defaults_set)
 			pr_warn("PLL_U already enabled. Postponing set full defaults\n");
 
 		/* Enable lock detect */
-		val = readl_relaxed(clk_base + pllu->params->ext_misc_reg[0]);
+		val = readl_relaxed(clk_base + pllu->ext_misc_reg[0]);
 		val &= ~PLLU_MISC0_LOCK_ENABLE;
 		val |= PLLU_MISC0_DEFAULT_VALUE & PLLU_MISC0_LOCK_ENABLE;
-		writel_relaxed(val, clk_base + pllu->params->ext_misc_reg[0]);
+		writel_relaxed(val, clk_base + pllu->ext_misc_reg[0]);
 
-		val = readl_relaxed(clk_base + pllu->params->ext_misc_reg[1]);
+		val = readl_relaxed(clk_base + pllu->ext_misc_reg[1]);
 		val &= ~PLLU_MISC1_LOCK_OVERRIDE;
 		val |= PLLU_MISC1_DEFAULT_VALUE & PLLU_MISC1_LOCK_OVERRIDE;
-		writel_relaxed(val, clk_base + pllu->params->ext_misc_reg[1]);
+		writel_relaxed(val, clk_base + pllu->ext_misc_reg[1]);
 		udelay(1);
 
 		return;
@@ -1100,9 +1123,9 @@ static void tegra210_pllu_set_defaults(struct tegra_clk_pll *pllu)
 
 	/* set IDDQ, enable lock detect */
 	writel_relaxed(PLLU_MISC0_DEFAULT_VALUE,
-			clk_base + pllu->params->ext_misc_reg[0]);
+			clk_base + pllu->ext_misc_reg[0]);
 	writel_relaxed(PLLU_MISC1_DEFAULT_VALUE,
-			clk_base + pllu->params->ext_misc_reg[1]);
+			clk_base + pllu->ext_misc_reg[1]);
 	udelay(1);
 }
 
@@ -1999,9 +2022,9 @@ static struct div_nmp pllu_nmp = {
 };
 
 static struct tegra_clk_pll_freq_table pll_u_freq_table[] = {
-	{ 12000000, 480000000, 40, 1, 1, 0 },
-	{ 13000000, 480000000, 36, 1, 1, 0 }, /* actual: 468.0 MHz */
-	{ 38400000, 480000000, 25, 2, 1, 0 },
+	{ 12000000, 480000000, 40, 1, 0, 0 },
+	{ 13000000, 480000000, 36, 1, 0, 0 }, /* actual: 468.0 MHz */
+	{ 38400000, 480000000, 25, 2, 0, 0 },
 	{        0,         0,  0, 0, 0, 0 },
 };
 
@@ -2025,8 +2048,47 @@ static struct tegra_clk_pll_params pll_u_vco_params = {
 	.div_nmp = &pllu_nmp,
 	.freq_table = pll_u_freq_table,
 	.flags = TEGRA_PLLU | TEGRA_PLL_USE_LOCK | TEGRA_PLL_VCO_OUT,
-	.set_defaults = tegra210_pllu_set_defaults,
-	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
+};
+
+struct utmi_clk_param {
+	/* Oscillator Frequency in KHz */
+	u32 osc_frequency;
+	/* UTMIP PLL Enable Delay Count  */
+	u8 enable_delay_count;
+	/* UTMIP PLL Stable count */
+	u16 stable_count;
+	/*  UTMIP PLL Active delay count */
+	u8 active_delay_count;
+	/* UTMIP PLL Xtal frequency count */
+	u16 xtal_freq_count;
+};
+
+static const struct utmi_clk_param utmi_parameters[] = {
+	{
+		.osc_frequency = 38400000, .enable_delay_count = 0x0,
+		.stable_count = 0x0, .active_delay_count = 0x6,
+		.xtal_freq_count = 0x80
+	}, {
+		.osc_frequency = 13000000, .enable_delay_count = 0x02,
+		.stable_count = 0x33, .active_delay_count = 0x05,
+		.xtal_freq_count = 0x7f
+	}, {
+		.osc_frequency = 19200000, .enable_delay_count = 0x03,
+		.stable_count = 0x4b, .active_delay_count = 0x06,
+		.xtal_freq_count = 0xbb
+	}, {
+		.osc_frequency = 12000000, .enable_delay_count = 0x02,
+		.stable_count = 0x2f, .active_delay_count = 0x08,
+		.xtal_freq_count = 0x76
+	}, {
+		.osc_frequency = 26000000, .enable_delay_count = 0x04,
+		.stable_count = 0x66, .active_delay_count = 0x09,
+		.xtal_freq_count = 0xfe
+	}, {
+		.osc_frequency = 16800000, .enable_delay_count = 0x03,
+		.stable_count = 0x41, .active_delay_count = 0x0a,
+		.xtal_freq_count = 0xa4
+	},
 };
 
 static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
@@ -2339,6 +2401,190 @@ void tegra210_put_utmipll_out_iddq(void)
 }
 EXPORT_SYMBOL_GPL(tegra210_put_utmipll_out_iddq);
 
+static void tegra210_utmi_param_configure(void)
+{
+	u32 reg;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(utmi_parameters); i++) {
+		if (osc_freq == utmi_parameters[i].osc_frequency)
+			break;
+	}
+
+	if (i >= ARRAY_SIZE(utmi_parameters)) {
+		pr_err("%s: Unexpected oscillator freq %lu\n", __func__,
+			osc_freq);
+		return;
+	}
+
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+	reg &= ~UTMIPLL_HW_PWRDN_CFG0_IDDQ_OVERRIDE;
+	writel_relaxed(reg, clk_base + UTMIPLL_HW_PWRDN_CFG0);
+
+	udelay(10);
+
+	reg = readl_relaxed(clk_base + UTMIP_PLL_CFG2);
+
+	/* Program UTMIP PLL stable and active counts */
+	/* [FIXME] arclk_rst.h says WRONG! This should be 1ms -> 0x50 Check! */
+	reg &= ~UTMIP_PLL_CFG2_STABLE_COUNT(~0);
+	reg |= UTMIP_PLL_CFG2_STABLE_COUNT(utmi_parameters[i].stable_count);
+
+	reg &= ~UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(~0);
+
+	reg |=
+	UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(utmi_parameters[i].active_delay_count);
+	writel_relaxed(reg, clk_base + UTMIP_PLL_CFG2);
+
+	/* Program UTMIP PLL delay and oscillator frequency counts */
+	reg = readl_relaxed(clk_base + UTMIP_PLL_CFG1);
+	reg &= ~UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(~0);
+
+	reg |=
+	UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(utmi_parameters[i].enable_delay_count);
+
+	reg &= ~UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(~0);
+	reg |=
+	UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(utmi_parameters[i].xtal_freq_count);
+
+	reg |= UTMIP_PLL_CFG1_FORCE_PLLU_POWERDOWN;
+	writel_relaxed(reg, clk_base + UTMIP_PLL_CFG1);
+
+	/* Remove power downs from UTMIP PLL control bits */
+	reg = readl_relaxed(clk_base + UTMIP_PLL_CFG1);
+	reg &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN;
+	reg |= UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERUP;
+	writel_relaxed(reg, clk_base + UTMIP_PLL_CFG1);
+	udelay(1);
+
+	/* Enable samplers for SNPS, XUSB_HOST, XUSB_DEV */
+	reg = readl_relaxed(clk_base + UTMIP_PLL_CFG2);
+	reg |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERUP;
+	reg |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERUP;
+	reg |= UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERUP;
+	reg &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERDOWN;
+	reg &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERDOWN;
+	reg &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_D_POWERDOWN;
+	writel_relaxed(reg, clk_base + UTMIP_PLL_CFG2);
+
+	/* Setup HW control of UTMIPLL */
+	reg = readl_relaxed(clk_base + UTMIP_PLL_CFG1);
+	reg &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN;
+	reg &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERUP;
+	writel_relaxed(reg, clk_base + UTMIP_PLL_CFG1);
+
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+	reg |= UTMIPLL_HW_PWRDN_CFG0_USE_LOCKDET;
+	reg &= ~UTMIPLL_HW_PWRDN_CFG0_CLK_ENABLE_SWCTL;
+	writel_relaxed(reg, clk_base + UTMIPLL_HW_PWRDN_CFG0);
+
+	udelay(1);
+
+	reg = readl_relaxed(clk_base + XUSB_PLL_CFG0);
+	reg &= ~XUSB_PLL_CFG0_UTMIPLL_LOCK_DLY;
+	writel_relaxed(reg, clk_base + XUSB_PLL_CFG0);
+
+	udelay(1);
+
+	/* Enable HW control UTMIPLL */
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+	reg |= UTMIPLL_HW_PWRDN_CFG0_SEQ_ENABLE;
+	writel_relaxed(reg, clk_base + UTMIPLL_HW_PWRDN_CFG0);
+}
+
+static int tegra210_enable_pllu(void)
+{
+	struct tegra_clk_pll_freq_table *fentry;
+	struct tegra_clk_pll pllu;
+	u32 reg;
+
+	for (fentry = pll_u_freq_table; fentry->input_rate; fentry++) {
+		if (fentry->input_rate == pll_ref_freq)
+			break;
+	}
+
+	if (!fentry->input_rate) {
+		pr_err("Unknown PLL_U reference frequency %lu\n", pll_ref_freq);
+		return -EINVAL;
+	}
+
+	/* clear IDDQ bit */
+	pllu.params = &pll_u_vco_params;
+	reg = readl_relaxed(clk_base + pllu.params->ext_misc_reg[0]);
+	reg &= ~BIT(pllu.params->iddq_bit_idx);
+	writel_relaxed(reg, clk_base + pllu.params->ext_misc_reg[0]);
+
+	reg = readl_relaxed(clk_base + PLLU_BASE);
+	reg &= ~GENMASK(20, 0);
+	reg |= fentry->m;
+	reg |= fentry->n << 8;
+	reg |= fentry->p << 16;
+	writel(reg, clk_base + PLLU_BASE);
+	reg |= PLL_ENABLE;
+	writel(reg, clk_base + PLLU_BASE);
+
+	readl_relaxed_poll_timeout(clk_base + PLLU_BASE, reg,
+				   reg & PLL_BASE_LOCK, 2, 1000);
+	if (!(reg & PLL_BASE_LOCK)) {
+		pr_err("Timed out waiting for PLL_U to lock\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int tegra210_init_pllu(void)
+{
+	u32 reg;
+	int err;
+
+	tegra210_pllu_set_defaults(&pll_u_vco_params);
+	/* skip initialization when pllu is in hw controlled mode */
+	reg = readl_relaxed(clk_base + PLLU_BASE);
+	if (reg & PLLU_BASE_OVERRIDE) {
+		if (!(reg & PLL_ENABLE)) {
+			err = tegra210_enable_pllu();
+			if (err < 0) {
+				WARN_ON(1);
+				return err;
+			}
+		}
+		/* enable hw controlled mode */
+		reg = readl_relaxed(clk_base + PLLU_BASE);
+		reg &= ~PLLU_BASE_OVERRIDE;
+		writel(reg, clk_base + PLLU_BASE);
+
+		reg = readl_relaxed(clk_base + PLLU_HW_PWRDN_CFG0);
+		reg |= PLLU_HW_PWRDN_CFG0_IDDQ_PD_INCLUDE |
+		       PLLU_HW_PWRDN_CFG0_USE_SWITCH_DETECT |
+		       PLLU_HW_PWRDN_CFG0_USE_LOCKDET;
+		reg &= ~(PLLU_HW_PWRDN_CFG0_CLK_ENABLE_SWCTL |
+			PLLU_HW_PWRDN_CFG0_CLK_SWITCH_SWCTL);
+		writel_relaxed(reg, clk_base + PLLU_HW_PWRDN_CFG0);
+
+		reg = readl_relaxed(clk_base + XUSB_PLL_CFG0);
+		reg &= ~XUSB_PLL_CFG0_PLLU_LOCK_DLY_MASK;
+		writel_relaxed(reg, clk_base + XUSB_PLL_CFG0);
+		udelay(1);
+
+		reg = readl_relaxed(clk_base + PLLU_HW_PWRDN_CFG0);
+		reg |= PLLU_HW_PWRDN_CFG0_SEQ_ENABLE;
+		writel_relaxed(reg, clk_base + PLLU_HW_PWRDN_CFG0);
+		udelay(1);
+
+		reg = readl_relaxed(clk_base + PLLU_BASE);
+		reg &= ~PLLU_BASE_CLKENABLE_USB;
+		writel_relaxed(reg, clk_base + PLLU_BASE);
+	}
+
+	/* enable UTMIPLL hw control if not yet done by the bootloader */
+	reg = readl_relaxed(clk_base + UTMIPLL_HW_PWRDN_CFG0);
+	if (!(reg & UTMIPLL_HW_PWRDN_CFG0_SEQ_ENABLE))
+		tegra210_utmi_param_configure();
+
+	return 0;
+}
+
 static __init void tegra210_periph_clk_init(void __iomem *clk_base,
 					    void __iomem *pmc_base)
 {
@@ -2467,11 +2713,12 @@ static void __init tegra210_pll_init(void __iomem *clk_base,
 	clks[TEGRA210_CLK_PLL_M_UD] = clk;
 
 	/* PLLU_VCO */
-	clk = tegra_clk_register_pllu_tegra210("pll_u_vco", "pll_ref",
-					       clk_base, 0, &pll_u_vco_params,
-					       &pll_u_lock);
-	clk_register_clkdev(clk, "pll_u_vco", NULL);
-	clks[TEGRA210_CLK_PLL_U] = clk;
+	if (!tegra210_init_pllu()) {
+		clk = clk_register_fixed_rate(NULL, "pll_u_vco", "pll_ref", 0,
+					      480*1000*1000);
+		clk_register_clkdev(clk, "pll_u_vco", NULL);
+		clks[TEGRA210_CLK_PLL_U] = clk;
+	}
 
 	/* PLLU_OUT */
 	clk = clk_register_divider_table(NULL, "pll_u_out", "pll_u_vco", 0,
@@ -2716,6 +2963,8 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA210_CLK_PLL_DP, TEGRA210_CLK_CLK_MAX, 270000000, 0 },
 	{ TEGRA210_CLK_SOC_THERM, TEGRA210_CLK_PLL_P, 51000000, 0 },
 	{ TEGRA210_CLK_CCLK_G, TEGRA210_CLK_CLK_MAX, 0, 1 },
+	{ TEGRA210_CLK_PLL_U_OUT1, TEGRA210_CLK_CLK_MAX, 48000000, 1 },
+	{ TEGRA210_CLK_PLL_U_OUT2, TEGRA210_CLK_CLK_MAX, 60000000, 1 },
 	/* This MUST be the last entry. */
 	{ TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 },
 };
-- 
cgit v1.2.3


From 68d724cedcca8ab86eee824682f7da0af5e6e50d Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Wed, 15 Mar 2017 14:59:32 +0200
Subject: clk: tegra: Add Tegra210 special resets

Tegra210 has 2 special resets which don't follow the normal pattern:
DVCO and ADSP. Add them in this patch.

Changelog:

v2: add DT bindings file

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210.c         | 85 ++++++++++++++++++++++++++++++++
 include/dt-bindings/reset/tegra210-car.h | 13 +++++
 2 files changed, 98 insertions(+)
 create mode 100644 include/dt-bindings/reset/tegra210-car.h

diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 1a8121abd427..6f29125ec439 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/clk/tegra.h>
 #include <dt-bindings/clock/tegra210-car.h>
+#include <dt-bindings/reset/tegra210-car.h>
 #include <linux/iopoll.h>
 
 #include "clk.h"
@@ -218,6 +219,12 @@
 #define CLK_M_DIVISOR_SHIFT 2
 #define CLK_M_DIVISOR_MASK 0x3
 
+#define RST_DFLL_DVCO 0x2f4
+#define DVFS_DFLL_RESET_SHIFT 0
+
+#define CLK_RST_CONTROLLER_RST_DEV_Y_SET 0x2a8
+#define CLK_RST_CONTROLLER_RST_DEV_Y_CLR 0x2ac
+
 /*
  * SDM fractional divisor is 16-bit 2's complement signed number within
  * (-2^12 ... 2^12-1) range. Represented in PLL data structure as unsigned
@@ -2982,6 +2989,81 @@ static void __init tegra210_clock_apply_init_table(void)
 	tegra_init_from_table(init_table, clks, TEGRA210_CLK_CLK_MAX);
 }
 
+/**
+ * tegra210_car_barrier - wait for pending writes to the CAR to complete
+ *
+ * Wait for any outstanding writes to the CAR MMIO space from this CPU
+ * to complete before continuing execution.  No return value.
+ */
+static void tegra210_car_barrier(void)
+{
+	readl_relaxed(clk_base + RST_DFLL_DVCO);
+}
+
+/**
+ * tegra210_clock_assert_dfll_dvco_reset - assert the DFLL's DVCO reset
+ *
+ * Assert the reset line of the DFLL's DVCO.  No return value.
+ */
+static void tegra210_clock_assert_dfll_dvco_reset(void)
+{
+	u32 v;
+
+	v = readl_relaxed(clk_base + RST_DFLL_DVCO);
+	v |= (1 << DVFS_DFLL_RESET_SHIFT);
+	writel_relaxed(v, clk_base + RST_DFLL_DVCO);
+	tegra210_car_barrier();
+}
+
+/**
+ * tegra210_clock_deassert_dfll_dvco_reset - deassert the DFLL's DVCO reset
+ *
+ * Deassert the reset line of the DFLL's DVCO, allowing the DVCO to
+ * operate.  No return value.
+ */
+static void tegra210_clock_deassert_dfll_dvco_reset(void)
+{
+	u32 v;
+
+	v = readl_relaxed(clk_base + RST_DFLL_DVCO);
+	v &= ~(1 << DVFS_DFLL_RESET_SHIFT);
+	writel_relaxed(v, clk_base + RST_DFLL_DVCO);
+	tegra210_car_barrier();
+}
+
+static int tegra210_reset_assert(unsigned long id)
+{
+	if (id == TEGRA210_RST_DFLL_DVCO)
+		tegra210_clock_assert_dfll_dvco_reset();
+	else if (id == TEGRA210_RST_ADSP)
+		writel(GENMASK(26, 21) | BIT(7),
+			clk_base + CLK_RST_CONTROLLER_RST_DEV_Y_SET);
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int tegra210_reset_deassert(unsigned long id)
+{
+	if (id == TEGRA210_RST_DFLL_DVCO)
+		tegra210_clock_deassert_dfll_dvco_reset();
+	else if (id == TEGRA210_RST_ADSP) {
+		writel(BIT(21), clk_base + CLK_RST_CONTROLLER_RST_DEV_Y_CLR);
+		/*
+		 * Considering adsp cpu clock (min: 12.5MHZ, max: 1GHz)
+		 * a delay of 5us ensures that it's at least
+		 * 6 * adsp_cpu_cycle_period long.
+		 */
+		udelay(5);
+		writel(GENMASK(26, 22) | BIT(7),
+			clk_base + CLK_RST_CONTROLLER_RST_DEV_Y_CLR);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
 /**
  * tegra210_clock_init - Tegra210-specific clock initialization
  * @np: struct device_node * of the DT node for the SoC CAR IP block
@@ -3046,6 +3128,9 @@ static void __init tegra210_clock_init(struct device_node *np)
 
 	tegra_super_clk_gen5_init(clk_base, pmc_base, tegra210_clks,
 				  &pll_x_params);
+	tegra_init_special_resets(2, tegra210_reset_assert,
+				  tegra210_reset_deassert);
+
 	tegra_add_of_provider(np);
 	tegra_register_devclks(devclks, ARRAY_SIZE(devclks));
 
diff --git a/include/dt-bindings/reset/tegra210-car.h b/include/dt-bindings/reset/tegra210-car.h
new file mode 100644
index 000000000000..296ec6e3f8c0
--- /dev/null
+++ b/include/dt-bindings/reset/tegra210-car.h
@@ -0,0 +1,13 @@
+/*
+ * This header provides Tegra210-specific constants for binding
+ * nvidia,tegra210-car.
+ */
+
+#ifndef _DT_BINDINGS_RESET_TEGRA210_CAR_H
+#define _DT_BINDINGS_RESET_TEGRA210_CAR_H
+
+#define TEGRA210_RESET(x)		(7 * 32 + (x))
+#define TEGRA210_RST_DFLL_DVCO		TEGRA210_RESET(0)
+#define TEGRA210_RST_ADSP		TEGRA210_RESET(1)
+
+#endif	/* _DT_BINDINGS_RESET_TEGRA210_CAR_H */
-- 
cgit v1.2.3


From 59af78d78db8bde6a63e09772aa44192f772fa96 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Wed, 15 Mar 2017 17:42:05 +0200
Subject: clk: tegra: Add SATA seq input control

This will be used by the powergating driver to ensure proper sequencer
state when the SATA domain is powergated.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210.c | 25 +++++++++++++++++++++++++
 include/linux/clk/tegra.h        |  1 +
 2 files changed, 26 insertions(+)

diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 6f29125ec439..f3e51e640d4d 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -181,6 +181,11 @@
 #define SATA_PLL_CFG0				0x490
 #define SATA_PLL_CFG0_PADPLL_RESET_SWCTL	BIT(0)
 #define SATA_PLL_CFG0_PADPLL_USE_LOCKDET	BIT(2)
+#define SATA_PLL_CFG0_SATA_SEQ_IN_SWCTL		BIT(4)
+#define SATA_PLL_CFG0_SATA_SEQ_RESET_INPUT_VALUE	BIT(5)
+#define SATA_PLL_CFG0_SATA_SEQ_LANE_PD_INPUT_VALUE	BIT(6)
+#define SATA_PLL_CFG0_SATA_SEQ_PADPLL_PD_INPUT_VALUE	BIT(7)
+
 #define SATA_PLL_CFG0_PADPLL_SLEEP_IDDQ		BIT(13)
 #define SATA_PLL_CFG0_SEQ_ENABLE		BIT(24)
 
@@ -483,6 +488,26 @@ void tegra210_sata_pll_hw_sequence_start(void)
 }
 EXPORT_SYMBOL_GPL(tegra210_sata_pll_hw_sequence_start);
 
+void tegra210_set_sata_pll_seq_sw(bool state)
+{
+	u32 val;
+
+	val = readl_relaxed(clk_base + SATA_PLL_CFG0);
+	if (state) {
+		val |= SATA_PLL_CFG0_SATA_SEQ_IN_SWCTL;
+		val |= SATA_PLL_CFG0_SATA_SEQ_RESET_INPUT_VALUE;
+		val |= SATA_PLL_CFG0_SATA_SEQ_LANE_PD_INPUT_VALUE;
+		val |= SATA_PLL_CFG0_SATA_SEQ_PADPLL_PD_INPUT_VALUE;
+	} else {
+		val &= ~SATA_PLL_CFG0_SATA_SEQ_IN_SWCTL;
+		val &= ~SATA_PLL_CFG0_SATA_SEQ_RESET_INPUT_VALUE;
+		val &= ~SATA_PLL_CFG0_SATA_SEQ_LANE_PD_INPUT_VALUE;
+		val &= ~SATA_PLL_CFG0_SATA_SEQ_PADPLL_PD_INPUT_VALUE;
+	}
+	writel_relaxed(val, clk_base + SATA_PLL_CFG0);
+}
+EXPORT_SYMBOL_GPL(tegra210_set_sata_pll_seq_sw);
+
 static inline void _pll_misc_chk_default(void __iomem *base,
 					struct tegra_clk_pll_params *params,
 					u8 misc_num, u32 default_val, u32 mask)
diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h
index e17d32831e28..d23c9cf26993 100644
--- a/include/linux/clk/tegra.h
+++ b/include/linux/clk/tegra.h
@@ -125,6 +125,7 @@ extern void tegra210_xusb_pll_hw_control_enable(void);
 extern void tegra210_xusb_pll_hw_sequence_start(void);
 extern void tegra210_sata_pll_hw_control_enable(void);
 extern void tegra210_sata_pll_hw_sequence_start(void);
+extern void tegra210_set_sata_pll_seq_sw(bool state);
 extern void tegra210_put_utmipll_in_iddq(void);
 extern void tegra210_put_utmipll_out_iddq(void);
 
-- 
cgit v1.2.3


From 06995804b5762f016c7a80503406da853a8f3785 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Tue, 22 Nov 2016 22:34:29 +0100
Subject: arm64: Use full path in KBUILD_IMAGE definition

The KBUILD_IMAGE variable is used by the rpm and deb-pkg targets, which
expect it to point to the image file in the build directory. The
builddeb script has a workaround for architectures which only provide
the basename, but let's provide a clean interface for packaging tools.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Michal Marek <mmarek@suse.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/arm64/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b9a4a934ca05..adf977fd39f1 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -100,12 +100,12 @@ libs-y		:= arch/arm64/lib/ $(libs-y)
 core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 
 # Default target when executing plain make
-KBUILD_IMAGE	:= Image.gz
+boot		:= arch/arm64/boot
+KBUILD_IMAGE	:= $(boot)/Image.gz
 KBUILD_DTBS	:= dtbs
 
-all:	$(KBUILD_IMAGE) $(KBUILD_DTBS)
+all:	Image.gz $(KBUILD_DTBS)
 
-boot := arch/arm64/boot
 
 Image: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-- 
cgit v1.2.3


From 152e6744ebfc8fa6cc9fff4ba36271f5f1ba2821 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Tue, 22 Nov 2016 22:34:30 +0100
Subject: arm: Use full path in KBUILD_IMAGE definition

The KBUILD_IMAGE variable is used by the rpm and deb-pkg targets, which
expect it to point to the image file in the build directory. The
builddeb script has a workaround for architectures which only provide
the basename, but let's provide a clean interface for packaging tools.

Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/arm/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index ab30cc634d02..65f4e2a4eb94 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -297,10 +297,11 @@ drivers-$(CONFIG_OPROFILE)      += arch/arm/oprofile/
 libs-y				:= arch/arm/lib/ $(libs-y)
 
 # Default target when executing plain make
+boot := arch/arm/boot
 ifeq ($(CONFIG_XIP_KERNEL),y)
-KBUILD_IMAGE := xipImage
+KBUILD_IMAGE := $(boot)/xipImage
 else
-KBUILD_IMAGE := zImage
+KBUILD_IMAGE := $(boot)/zImage
 endif
 
 # Build the DT binary blobs if we have OF configured
@@ -308,9 +309,8 @@ ifeq ($(CONFIG_USE_OF),y)
 KBUILD_DTBS := dtbs
 endif
 
-all:	$(KBUILD_IMAGE) $(KBUILD_DTBS)
+all:	$(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS)
 
-boot := arch/arm/boot
 
 archheaders:
 	$(Q)$(MAKE) $(build)=arch/arm/tools uapi
-- 
cgit v1.2.3


From 5e40f0fd234f36564881b03d28deaa69653ae9f2 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Tue, 22 Nov 2016 22:34:31 +0100
Subject: arc: Use full path in KBUILD_IMAGE definition

The KBUILD_IMAGE variable is used by the rpm and deb-pkg targets, which
expect it to point to the image file in the build directory. The
builddeb script has a workaround for architectures which only provide
the basename, but let's provide a clean interface for packaging tools.

Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: linux-snps-arc@lists.infradead.org
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/arc/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 19cce226d1a8..44ef35d33956 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -123,9 +123,9 @@ libs-y		+= arch/arc/lib/ $(LIBGCC)
 boot		:= arch/arc/boot
 
 #default target for make without any arguments.
-KBUILD_IMAGE	:= bootpImage
+KBUILD_IMAGE	:= $(boot)/bootpImage
 
-all:	$(KBUILD_IMAGE)
+all:	bootpImage
 bootpImage: vmlinux
 
 boot_targets += uImage uImage.bin uImage.gz
-- 
cgit v1.2.3


From e62c527efb07fa62a549cb03109a4d0265cedce2 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Tue, 22 Nov 2016 22:34:32 +0100
Subject: sh: Use full path in KBUILD_IMAGE definition

The KBUILD_IMAGE variable is used by the rpm and deb-pkg targets, which
expect it to point to the image file in the build directory. The
builddeb script has a workaround for architectures which only provide
the basename, but let's provide a clean interface for packaging tools.

Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: linux-sh@vger.kernel.org
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/sh/Makefile | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 336f33a419d9..280bbff12102 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -94,7 +94,8 @@ defaultimage-$(CONFIG_SH_7206_SOLUTION_ENGINE)	:= vmlinux
 defaultimage-$(CONFIG_SH_7619_SOLUTION_ENGINE)	:= vmlinux
 
 # Set some sensible Kbuild defaults
-KBUILD_IMAGE		:= $(defaultimage-y)
+boot := arch/sh/boot
+KBUILD_IMAGE		:= $(boot)/$(defaultimage-y)
 
 #
 # Choosing incompatible machines durings configuration will result in
@@ -186,8 +187,6 @@ cpuincdir-y			+= cpu-common	# Must be last
 drivers-y			+= arch/sh/drivers/
 drivers-$(CONFIG_OPROFILE)	+= arch/sh/oprofile/
 
-boot := arch/sh/boot
-
 cflags-y	+= $(foreach d, $(cpuincdir-y), -Iarch/sh/include/$(d)) \
 		   $(foreach d, $(machdir-y), -Iarch/sh/include/$(d))
 
@@ -211,7 +210,7 @@ BOOT_TARGETS = uImage uImage.bz2 uImage.gz uImage.lzma uImage.xz uImage.lzo \
 	       romImage
 PHONY += $(BOOT_TARGETS)
 
-all: $(KBUILD_IMAGE)
+all: $(notdir $(KBUILD_IMAGE))
 
 $(BOOT_TARGETS): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-- 
cgit v1.2.3


From 41f5b8db796fdea5d5265798add260e66d44276b Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Tue, 22 Nov 2016 22:34:33 +0100
Subject: unicore32: Use full path in KBUILD_IMAGE definition

The KBUILD_IMAGE variable is used by the rpm and deb-pkg targets, which
expect it to point to the image file in the build directory. The
builddeb script has a workaround for architectures which only provide
the basename, but let's provide a clean interface for packaging tools.

Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/unicore32/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/unicore32/Makefile b/arch/unicore32/Makefile
index b6f5c4c1eaf9..98a5ca43ae87 100644
--- a/arch/unicore32/Makefile
+++ b/arch/unicore32/Makefile
@@ -43,9 +43,9 @@ boot			:= arch/unicore32/boot
 
 # Default defconfig and target when executing plain make
 KBUILD_DEFCONFIG	:= $(ARCH)_defconfig
-KBUILD_IMAGE		:= zImage
+KBUILD_IMAGE		:= $(boot)/zImage
 
-all:	$(KBUILD_IMAGE)
+all:	zImage
 
 zImage Image uImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-- 
cgit v1.2.3


From 09549aa1baa90d9e273ecd6c69c493bea6473dec Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Tue, 22 Nov 2016 22:34:34 +0100
Subject: deb-pkg: Remove the KBUILD_IMAGE workaround

The arch Makefile are fixed to set KBUILD_IMAGE to the full patch, so
the workaround is no longer needed.

Signed-off-by: Michal Marek <mmarek@suse.com>
Reviewed-by: Riku Voipio <riku.voipio@linaro.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/package/builddeb | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 3c575cd07888..f04e91f0845d 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -143,12 +143,7 @@ else
 	cp System.map "$tmpdir/boot/System.map-$version"
 	cp $KCONFIG_CONFIG "$tmpdir/boot/config-$version"
 fi
-# Not all arches include the boot path in KBUILD_IMAGE
-if [ -e $KBUILD_IMAGE ]; then
-	cp $KBUILD_IMAGE "$tmpdir/$installed_image_path"
-else
-	cp arch/$ARCH/boot/$KBUILD_IMAGE "$tmpdir/$installed_image_path"
-fi
+cp "$($MAKE -s image_name)" "$tmpdir/$installed_image_path"
 
 if grep -q "^CONFIG_OF=y" $KCONFIG_CONFIG ; then
 	# Only some architectures with OF support have this target
-- 
cgit v1.2.3


From bea1baa1e7722c82631fa263653c1279002dd273 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 28 Feb 2017 17:19:50 +0200
Subject: clk: tegra: Mark TEGRA210_CLK_DBGAPB as always on

This is needed to make the JTAG debugging interface work.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
[treding@nvidia.com: add TODO comment]
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index f3e51e640d4d..9897dc55676b 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -2985,6 +2985,8 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA210_CLK_EMC, TEGRA210_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA210_CLK_MSELECT, TEGRA210_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA210_CLK_CSITE, TEGRA210_CLK_CLK_MAX, 0, 1 },
+	/* TODO find a way to enable this on-demand */
+	{ TEGRA210_CLK_DBGAPB, TEGRA210_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA210_CLK_TSENSOR, TEGRA210_CLK_CLK_M, 400000, 0 },
 	{ TEGRA210_CLK_I2C1, TEGRA210_CLK_PLL_P, 0, 0 },
 	{ TEGRA210_CLK_I2C2, TEGRA210_CLK_PLL_P, 0, 0 },
-- 
cgit v1.2.3


From 39133505caf993c2d61c7adc8cab753e8a0b815d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 20 Mar 2017 17:14:14 +0100
Subject: clk: tegra: Fix build warnings on Tegra20/Tegra30

The recent conversion of proper const usage was only partial and didn't
include Tegra20 and Tegra30 support. Fix that up.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-periph.c | 2 +-
 drivers/clk/tegra/clk.h        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/tegra/clk-periph.c b/drivers/clk/tegra/clk-periph.c
index a5a5809dce6d..cf80831de79d 100644
--- a/drivers/clk/tegra/clk-periph.c
+++ b/drivers/clk/tegra/clk-periph.c
@@ -195,7 +195,7 @@ struct clk *tegra_clk_register_periph(const char *name,
 }
 
 struct clk *tegra_clk_register_periph_nodiv(const char *name,
-		const char **parent_names, int num_parents,
+		const char * const *parent_names, int num_parents,
 		struct tegra_clk_periph *periph, void __iomem *clk_base,
 		u32 offset)
 {
diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h
index 960e47e41fdf..945b07093afa 100644
--- a/drivers/clk/tegra/clk.h
+++ b/drivers/clk/tegra/clk.h
@@ -590,7 +590,7 @@ struct clk *tegra_clk_register_periph(const char *name,
 		struct tegra_clk_periph *periph, void __iomem *clk_base,
 		u32 offset, unsigned long flags);
 struct clk *tegra_clk_register_periph_nodiv(const char *name,
-		const char **parent_names, int num_parents,
+		const char * const *parent_names, int num_parents,
 		struct tegra_clk_periph *periph, void __iomem *clk_base,
 		u32 offset);
 
-- 
cgit v1.2.3


From 0348aaa34412e24ebe622a2b1b013e68d6ae5412 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Mon, 20 Mar 2017 18:30:59 +0100
Subject: omapfb: dss: Handle return errors in dss_init_ports()

dss_init_ports() is not handling return errors from dpi_init_port() and
sdi_init_port(). It is also always returning 0 currently which results in
part of error handling code in dss_bind() being unused.

Fix dss_init_ports() to handle return errors from dpi_init_port() and
sdi_init_port().

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Cc: tomi.valkeinen@ti.com
[b.zolnierkie: fail early on errors, minor fixups]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/omap2/omapfb/dss/dss.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dss.c b/drivers/video/fbdev/omap2/omapfb/dss/dss.c
index 47d7f69ad9ad..48c6500c24e1 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/dss.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/dss.c
@@ -941,11 +941,13 @@ static int dss_init_features(struct platform_device *pdev)
 	return 0;
 }
 
+static void dss_uninit_ports(struct platform_device *pdev);
+
 static int dss_init_ports(struct platform_device *pdev)
 {
 	struct device_node *parent = pdev->dev.of_node;
 	struct device_node *port;
-	int r;
+	int r, ret = 0;
 
 	if (parent == NULL)
 		return 0;
@@ -972,17 +974,21 @@ static int dss_init_ports(struct platform_device *pdev)
 
 		switch (port_type) {
 		case OMAP_DISPLAY_TYPE_DPI:
-			dpi_init_port(pdev, port);
+			ret = dpi_init_port(pdev, port);
 			break;
 		case OMAP_DISPLAY_TYPE_SDI:
-			sdi_init_port(pdev, port);
+			ret = sdi_init_port(pdev, port);
 			break;
 		default:
 			break;
 		}
-	} while ((port = omapdss_of_get_next_port(parent, port)) != NULL);
+	} while (!ret &&
+		 (port = omapdss_of_get_next_port(parent, port)) != NULL);
 
-	return 0;
+	if (ret)
+		dss_uninit_ports(pdev);
+
+	return ret;
 }
 
 static void dss_uninit_ports(struct platform_device *pdev)
-- 
cgit v1.2.3


From 982711ccb04bd1ea30b45a837c67692ef897f689 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 20 Mar 2017 18:30:59 +0100
Subject: video: fbdev: pmag-ba-fb: remove incorrect __exit markups

Even if bus is not hot-pluggable, the devices can be unbound from the
driver via sysfs, so we should not be using __exit annotations on
remove() methods. The only exception is drivers registered with
platform_driver_probe() which specifically disables sysfs bind/unbind
attributes.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: "Maciej W. Rozycki" <macro@linux-mips.org>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/pmag-ba-fb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/pmag-ba-fb.c b/drivers/video/fbdev/pmag-ba-fb.c
index df02fb4b7fd1..1fd02f40708e 100644
--- a/drivers/video/fbdev/pmag-ba-fb.c
+++ b/drivers/video/fbdev/pmag-ba-fb.c
@@ -235,7 +235,7 @@ err_alloc:
 	return err;
 }
 
-static int __exit pmagbafb_remove(struct device *dev)
+static int pmagbafb_remove(struct device *dev)
 {
 	struct tc_dev *tdev = to_tc_dev(dev);
 	struct fb_info *info = dev_get_drvdata(dev);
@@ -270,7 +270,7 @@ static struct tc_driver pmagbafb_driver = {
 		.name	= "pmagbafb",
 		.bus	= &tc_bus_type,
 		.probe	= pmagbafb_probe,
-		.remove	= __exit_p(pmagbafb_remove),
+		.remove	= pmagbafb_remove,
 	},
 };
 
-- 
cgit v1.2.3


From b48c3cebd1337fb3da0c9ef78a9c9e6f36316176 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 20 Mar 2017 18:31:00 +0100
Subject: video: fbdev: pmagb-b-fb: remove incorrect __exit markups

Even if bus is not hot-pluggable, the devices can be unbound from the
driver via sysfs, so we should not be using __exit annotations on
remove() methods. The only exception is drivers registered with
platform_driver_probe() which specifically disables sysfs bind/unbind
attributes.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/pmagb-b-fb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/pmagb-b-fb.c b/drivers/video/fbdev/pmagb-b-fb.c
index a7a179a0bb33..46e96c451506 100644
--- a/drivers/video/fbdev/pmagb-b-fb.c
+++ b/drivers/video/fbdev/pmagb-b-fb.c
@@ -353,7 +353,7 @@ err_alloc:
 	return err;
 }
 
-static int __exit pmagbbfb_remove(struct device *dev)
+static int pmagbbfb_remove(struct device *dev)
 {
 	struct tc_dev *tdev = to_tc_dev(dev);
 	struct fb_info *info = dev_get_drvdata(dev);
@@ -388,7 +388,7 @@ static struct tc_driver pmagbbfb_driver = {
 		.name	= "pmagbbfb",
 		.bus	= &tc_bus_type,
 		.probe	= pmagbbfb_probe,
-		.remove	= __exit_p(pmagbbfb_remove),
+		.remove	= pmagbbfb_remove,
 	},
 };
 
-- 
cgit v1.2.3


From 2988f8f946e6cd6713e9ee06181bed3a1f90a62a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 20 Mar 2017 18:31:00 +0100
Subject: video: fbdev: pmag-aa-fb: remove incorrect __exit markups

Even if bus is not hot-pluggable, devices can be unbound from the
driver via sysfs, so we should not be using __exit annotations on
remove() methods. The only exception is drivers registered with
platform_driver_probe() which specifically disables sysfs bind/unbind
attributes.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/pmag-aa-fb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/pmag-aa-fb.c b/drivers/video/fbdev/pmag-aa-fb.c
index ffe2dd482f84..39922f072db4 100644
--- a/drivers/video/fbdev/pmag-aa-fb.c
+++ b/drivers/video/fbdev/pmag-aa-fb.c
@@ -247,7 +247,7 @@ err_alloc:
 	return err;
 }
 
-static int __exit pmagaafb_remove(struct device *dev)
+static int pmagaafb_remove(struct device *dev)
 {
 	struct tc_dev *tdev = to_tc_dev(dev);
 	struct fb_info *info = dev_get_drvdata(dev);
@@ -280,7 +280,7 @@ static struct tc_driver pmagaafb_driver = {
 		.name	= "pmagaafb",
 		.bus	= &tc_bus_type,
 		.probe	= pmagaafb_probe,
-		.remove	= __exit_p(pmagaafb_remove),
+		.remove	= pmagaafb_remove,
 	},
 };
 
-- 
cgit v1.2.3


From 8c6457504f9a8806409c4885a620faedf1348dca Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 20 Mar 2017 18:31:00 +0100
Subject: video: fbdev: i810: remove incorrect __exit markups

Even if bus is not hot-pluggable, devices can be unbound from the
driver via sysfs, so we should not be using __exit annotations on
remove() methods. The only exception is drivers registered with
platform_driver_probe() which specifically disables sysfs bind/unbind
attributes.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Antonino Daplas <adaplas@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/i810/i810_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/video/fbdev/i810/i810_main.c b/drivers/video/fbdev/i810/i810_main.c
index 483ab2592d0c..2488baab7c89 100644
--- a/drivers/video/fbdev/i810/i810_main.c
+++ b/drivers/video/fbdev/i810/i810_main.c
@@ -81,7 +81,7 @@ static u32 voffset;
 static int i810fb_cursor(struct fb_info *info, struct fb_cursor *cursor);
 static int i810fb_init_pci(struct pci_dev *dev,
 			   const struct pci_device_id *entry);
-static void __exit i810fb_remove_pci(struct pci_dev *dev);
+static void i810fb_remove_pci(struct pci_dev *dev);
 static int i810fb_resume(struct pci_dev *dev);
 static int i810fb_suspend(struct pci_dev *dev, pm_message_t state);
 
@@ -128,7 +128,7 @@ static struct pci_driver i810fb_driver = {
 	.name     =	"i810fb",
 	.id_table =	i810fb_pci_tbl,
 	.probe    =	i810fb_init_pci,
-	.remove   =	__exit_p(i810fb_remove_pci),
+	.remove   =	i810fb_remove_pci,
 	.suspend  =     i810fb_suspend,
 	.resume   =     i810fb_resume,
 };
@@ -2123,7 +2123,7 @@ static void i810fb_release_resource(struct fb_info *info,
 
 }
 
-static void __exit i810fb_remove_pci(struct pci_dev *dev)
+static void i810fb_remove_pci(struct pci_dev *dev)
 {
 	struct fb_info *info = pci_get_drvdata(dev);
 	struct i810fb_par *par = info->par;
-- 
cgit v1.2.3


From 3fc4f2f94c9a7c5689d6c70774bab1bb4ff8f297 Mon Sep 17 00:00:00 2001
From: Takeshi Kihara <takeshi.kihara.df@renesas.com>
Date: Mon, 20 Mar 2017 18:31:00 +0100
Subject: video/logo: tidyup fb_logo_late_init initcall timing

Some video driver might returns -EPROBE_DEFER when probe timing,
but logo init function doesn't care it.
Thus, such kernel can't have logo when boot time.

This patch solves this issue by exchanging current
late_initcall to late_initcall_sync

Signed-off-by: Takeshi Kihara <takeshi.kihara.df@renesas.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Laurent <laurent.pinchart@ideasonboard.com>
Cc: Fabian Frederick <fabf@skynet.be>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: shimoda <yoshihiro.shimoda.uh@renesas.com>
Cc: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
Cc: shiiba <naoya.shiiba.nx@renesas.com>
Cc: sakato <ryusuke.sakato.bx@renesas.com>
Cc: Hosoya <yoshifumi.hosoya.wj@renesas.com>
Cc: Fukawa <tomoharu.fukawa.eb@renesas.com>
Cc: Hien Dang <hien.dang.eb@renesas.com>
Cc: Khiem Nguyen <khiem.nguyen.xt@renesas.com>
Cc: Kouei Abe <kouei.abe.cp@renesas.com>
Cc: Harunobu Kurokawa <harunobu.kurokawa.dn@renesas.com>
Cc: Hiroyuki Yokoyama <hiroyuki.yokoyama.vx@renesas.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/logo/logo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c
index b6bc4a0bda2a..4d50bfd13e7c 100644
--- a/drivers/video/logo/logo.c
+++ b/drivers/video/logo/logo.c
@@ -34,7 +34,7 @@ static int __init fb_logo_late_init(void)
 	return 0;
 }
 
-late_initcall(fb_logo_late_init);
+late_initcall_sync(fb_logo_late_init);
 
 /* logo's are marked __initdata. Use __ref to tell
  * modpost that it is intended that this function uses data
-- 
cgit v1.2.3


From a843ed3f6c3e856f9091b042c6b4ed34c02a3187 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 28 Feb 2017 17:31:59 +0100
Subject: clk: renesas: r8a7795: Correct parent clock and sort order for Audio
 DMACs

The parent clock of the Audio DMACs is the "ZS" AXI bus clock, which
maps to S3D1 on R-Car H3 ES1.x.
All module clocks must be sorted by clock ID.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
---
 drivers/clk/renesas/r8a7795-cpg-mssr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
index 2add8218e0f7..cde470ce81e4 100644
--- a/drivers/clk/renesas/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
@@ -142,8 +142,8 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("rwdt0",		 402,	R8A7795_CLK_R),
 	DEF_MOD("intc-ex",		 407,	R8A7795_CLK_CP),
 	DEF_MOD("intc-ap",		 408,	R8A7795_CLK_S3D1),
-	DEF_MOD("audmac0",		 502,	R8A7795_CLK_S3D4),
-	DEF_MOD("audmac1",		 501,	R8A7795_CLK_S3D4),
+	DEF_MOD("audmac1",		 501,	R8A7795_CLK_S3D1),
+	DEF_MOD("audmac0",		 502,	R8A7795_CLK_S3D1),
 	DEF_MOD("drif7",		 508,	R8A7795_CLK_S3D2),
 	DEF_MOD("drif6",		 509,	R8A7795_CLK_S3D2),
 	DEF_MOD("drif5",		 510,	R8A7795_CLK_S3D2),
-- 
cgit v1.2.3


From 2122b56d30e4fb25b383f137e83e6b901e5b05ae Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 28 Feb 2017 17:17:31 +0100
Subject: clk: renesas: r8a7795: Correct name of watchdog clock

There's only a single watchdog clock, and it's named "rwdt".

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/r8a7795-cpg-mssr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
index cde470ce81e4..4e176e7f958b 100644
--- a/drivers/clk/renesas/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
@@ -139,7 +139,7 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("usb3-if0",		 328,	R8A7795_CLK_S3D1),
 	DEF_MOD("usb-dmac0",		 330,	R8A7795_CLK_S3D1),
 	DEF_MOD("usb-dmac1",		 331,	R8A7795_CLK_S3D1),
-	DEF_MOD("rwdt0",		 402,	R8A7795_CLK_R),
+	DEF_MOD("rwdt",			 402,	R8A7795_CLK_R),
 	DEF_MOD("intc-ex",		 407,	R8A7795_CLK_CP),
 	DEF_MOD("intc-ap",		 408,	R8A7795_CLK_S3D1),
 	DEF_MOD("audmac1",		 501,	R8A7795_CLK_S3D1),
-- 
cgit v1.2.3


From 89aa58a3951bcf242c7755075a7429d0ed6640de Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 28 Feb 2017 17:18:08 +0100
Subject: clk: renesas: r8a7796: Correct name of watchdog clock

There's only a single watchdog clock, and it's named "rwdt".

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/r8a7796-cpg-mssr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/renesas/r8a7796-cpg-mssr.c b/drivers/clk/renesas/r8a7796-cpg-mssr.c
index 12a23c18bc1e..55003194a256 100644
--- a/drivers/clk/renesas/r8a7796-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7796-cpg-mssr.c
@@ -135,7 +135,7 @@ static const struct mssr_mod_clk r8a7796_mod_clks[] __initconst = {
 	DEF_MOD("sdif2",		 312,	R8A7796_CLK_SD2),
 	DEF_MOD("sdif1",		 313,	R8A7796_CLK_SD1),
 	DEF_MOD("sdif0",		 314,	R8A7796_CLK_SD0),
-	DEF_MOD("rwdt0",		 402,	R8A7796_CLK_R),
+	DEF_MOD("rwdt",			 402,	R8A7796_CLK_R),
 	DEF_MOD("intc-ap",		 408,	R8A7796_CLK_S3D1),
 	DEF_MOD("drif7",		 508,	R8A7796_CLK_S3D2),
 	DEF_MOD("drif6",		 509,	R8A7796_CLK_S3D2),
-- 
cgit v1.2.3


From 3c969cec16176e98f9d8c976c163d2bb519c7c87 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 10 Nov 2016 13:16:57 +0100
Subject: clk: renesas: r8a7795: Reformat core clock table

For easier comparison with other clock drivers.
No functional changes.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/r8a7795-cpg-mssr.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
index 4e176e7f958b..608178618da8 100644
--- a/drivers/clk/renesas/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
@@ -53,8 +53,8 @@ enum clk_ids {
 
 static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
 	/* External Clock Inputs */
-	DEF_INPUT("extal",  CLK_EXTAL),
-	DEF_INPUT("extalr", CLK_EXTALR),
+	DEF_INPUT("extal",      CLK_EXTAL),
+	DEF_INPUT("extalr",     CLK_EXTALR),
 
 	/* Internal Core Clocks */
 	DEF_BASE(".main",       CLK_MAIN, CLK_TYPE_GEN3_MAIN, CLK_EXTAL),
@@ -89,23 +89,23 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
 	DEF_FIXED("s3d2",       R8A7795_CLK_S3D2,  CLK_S3,         2, 1),
 	DEF_FIXED("s3d4",       R8A7795_CLK_S3D4,  CLK_S3,         4, 1),
 
-	DEF_GEN3_SD("sd0",      R8A7795_CLK_SD0,   CLK_SDSRC,     0x0074),
-	DEF_GEN3_SD("sd1",      R8A7795_CLK_SD1,   CLK_SDSRC,     0x0078),
-	DEF_GEN3_SD("sd2",      R8A7795_CLK_SD2,   CLK_SDSRC,     0x0268),
-	DEF_GEN3_SD("sd3",      R8A7795_CLK_SD3,   CLK_SDSRC,     0x026c),
+	DEF_GEN3_SD("sd0",      R8A7795_CLK_SD0,   CLK_SDSRC,     0x074),
+	DEF_GEN3_SD("sd1",      R8A7795_CLK_SD1,   CLK_SDSRC,     0x078),
+	DEF_GEN3_SD("sd2",      R8A7795_CLK_SD2,   CLK_SDSRC,     0x268),
+	DEF_GEN3_SD("sd3",      R8A7795_CLK_SD3,   CLK_SDSRC,     0x26c),
 
 	DEF_FIXED("cl",         R8A7795_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
 	DEF_FIXED("cp",         R8A7795_CLK_CP,    CLK_EXTAL,      2, 1),
 
-	DEF_DIV6P1("mso",       R8A7795_CLK_MSO,   CLK_PLL1_DIV4, 0x014),
-	DEF_DIV6P1("hdmi",      R8A7795_CLK_HDMI,  CLK_PLL1_DIV4, 0x250),
 	DEF_DIV6P1("canfd",     R8A7795_CLK_CANFD, CLK_PLL1_DIV4, 0x244),
 	DEF_DIV6P1("csi0",      R8A7795_CLK_CSI0,  CLK_PLL1_DIV4, 0x00c),
+	DEF_DIV6P1("mso",       R8A7795_CLK_MSO,   CLK_PLL1_DIV4, 0x014),
+	DEF_DIV6P1("hdmi",      R8A7795_CLK_HDMI,  CLK_PLL1_DIV4, 0x250),
 
-	DEF_DIV6_RO("osc",      R8A7795_CLK_OSC,   CLK_EXTAL, CPG_RCKCR, 8),
+	DEF_DIV6_RO("osc",      R8A7795_CLK_OSC,   CLK_EXTAL, CPG_RCKCR,  8),
 	DEF_DIV6_RO("r_int",    CLK_RINT,          CLK_EXTAL, CPG_RCKCR, 32),
 
-	DEF_BASE("r",           R8A7795_CLK_R, CLK_TYPE_GEN3_R, CLK_RINT),
+	DEF_BASE("r",           R8A7795_CLK_R,     CLK_TYPE_GEN3_R, CLK_RINT),
 };
 
 static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
-- 
cgit v1.2.3


From c013fc7d23ca5b29f0cdc37d58b2466ead4fd5f6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 10 Nov 2016 13:18:25 +0100
Subject: clk: renesas: r8a7796: Reformat core clock table

For easier comparison with other clock drivers.
No functional changes.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/r8a7796-cpg-mssr.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/renesas/r8a7796-cpg-mssr.c b/drivers/clk/renesas/r8a7796-cpg-mssr.c
index 55003194a256..f7787101b8d0 100644
--- a/drivers/clk/renesas/r8a7796-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7796-cpg-mssr.c
@@ -54,8 +54,8 @@ enum clk_ids {
 
 static const struct cpg_core_clk r8a7796_core_clks[] __initconst = {
 	/* External Clock Inputs */
-	DEF_INPUT("extal",  CLK_EXTAL),
-	DEF_INPUT("extalr", CLK_EXTALR),
+	DEF_INPUT("extal",      CLK_EXTAL),
+	DEF_INPUT("extalr",     CLK_EXTALR),
 
 	/* Internal Core Clocks */
 	DEF_BASE(".main",       CLK_MAIN, CLK_TYPE_GEN3_MAIN, CLK_EXTAL),
@@ -95,10 +95,10 @@ static const struct cpg_core_clk r8a7796_core_clks[] __initconst = {
 	DEF_FIXED("s3d2",       R8A7796_CLK_S3D2,  CLK_S3,         2, 1),
 	DEF_FIXED("s3d4",       R8A7796_CLK_S3D4,  CLK_S3,         4, 1),
 
-	DEF_GEN3_SD("sd0",      R8A7796_CLK_SD0,   CLK_SDSRC,    0x0074),
-	DEF_GEN3_SD("sd1",      R8A7796_CLK_SD1,   CLK_SDSRC,    0x0078),
-	DEF_GEN3_SD("sd2",      R8A7796_CLK_SD2,   CLK_SDSRC,    0x0268),
-	DEF_GEN3_SD("sd3",      R8A7796_CLK_SD3,   CLK_SDSRC,    0x026c),
+	DEF_GEN3_SD("sd0",      R8A7796_CLK_SD0,   CLK_SDSRC,     0x074),
+	DEF_GEN3_SD("sd1",      R8A7796_CLK_SD1,   CLK_SDSRC,     0x078),
+	DEF_GEN3_SD("sd2",      R8A7796_CLK_SD2,   CLK_SDSRC,     0x268),
+	DEF_GEN3_SD("sd3",      R8A7796_CLK_SD3,   CLK_SDSRC,     0x26c),
 
 	DEF_FIXED("cl",         R8A7796_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
 	DEF_FIXED("cp",         R8A7796_CLK_CP,    CLK_EXTAL,      2, 1),
-- 
cgit v1.2.3


From 5f3a432a44b135db002d22446827cfa061fc0bfb Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 10 Mar 2017 11:36:33 +0100
Subject: clk: renesas: rcar-gen3-cpg: Pass mode pins to rcar_gen3_cpg_init()

Pass the mode pin states from the SoC-specific CPG/MSSR driver to the
R-Car Gen3 CPG driver core, as their state will be needed to make some
core clock configuration decisions.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/r8a7795-cpg-mssr.c | 2 +-
 drivers/clk/renesas/r8a7796-cpg-mssr.c | 2 +-
 drivers/clk/renesas/rcar-gen3-cpg.c    | 4 +++-
 drivers/clk/renesas/rcar-gen3-cpg.h    | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
index 608178618da8..4699f416e275 100644
--- a/drivers/clk/renesas/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
@@ -330,7 +330,7 @@ static int __init r8a7795_cpg_mssr_init(struct device *dev)
 		return -EINVAL;
 	}
 
-	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR);
+	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
 }
 
 const struct cpg_mssr_info r8a7795_cpg_mssr_info __initconst = {
diff --git a/drivers/clk/renesas/r8a7796-cpg-mssr.c b/drivers/clk/renesas/r8a7796-cpg-mssr.c
index f7787101b8d0..9d114b31b073 100644
--- a/drivers/clk/renesas/r8a7796-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7796-cpg-mssr.c
@@ -273,7 +273,7 @@ static int __init r8a7796_cpg_mssr_init(struct device *dev)
 		return -EINVAL;
 	}
 
-	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR);
+	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
 }
 
 const struct cpg_mssr_info r8a7796_cpg_mssr_info __initconst = {
diff --git a/drivers/clk/renesas/rcar-gen3-cpg.c b/drivers/clk/renesas/rcar-gen3-cpg.c
index 742f6dc7c156..d395bb8c22f5 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.c
+++ b/drivers/clk/renesas/rcar-gen3-cpg.c
@@ -247,6 +247,7 @@ static struct clk * __init cpg_sd_clk_register(const struct cpg_core_clk *core,
 
 static const struct rcar_gen3_cpg_pll_config *cpg_pll_config __initdata;
 static unsigned int cpg_clk_extalr __initdata;
+static u32 cpg_mode __initdata;
 
 struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 	const struct cpg_core_clk *core, const struct cpg_mssr_info *info,
@@ -334,9 +335,10 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 }
 
 int __init rcar_gen3_cpg_init(const struct rcar_gen3_cpg_pll_config *config,
-			      unsigned int clk_extalr)
+			      unsigned int clk_extalr, u32 mode)
 {
 	cpg_pll_config = config;
 	cpg_clk_extalr = clk_extalr;
+	cpg_mode = mode;
 	return 0;
 }
diff --git a/drivers/clk/renesas/rcar-gen3-cpg.h b/drivers/clk/renesas/rcar-gen3-cpg.h
index f788f481dd42..073be54b5d03 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.h
+++ b/drivers/clk/renesas/rcar-gen3-cpg.h
@@ -37,6 +37,6 @@ struct clk *rcar_gen3_cpg_clk_register(struct device *dev,
 	const struct cpg_core_clk *core, const struct cpg_mssr_info *info,
 	struct clk **clks, void __iomem *base);
 int rcar_gen3_cpg_init(const struct rcar_gen3_cpg_pll_config *config,
-		       unsigned int clk_extalr);
+		       unsigned int clk_extalr, u32 mode);
 
 #endif
-- 
cgit v1.2.3


From cecbe87d73006cb321dec79b349e3fefd1a80962 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 10 Mar 2017 11:46:10 +0100
Subject: clk: renesas: rcar-gen3: Add workaround for PLL0/2/4 errata on H3
 ES1.0

Add a workaround for errata on R-Car H3 ES1.0, where the PLL0, PLL2, and
PLL4 clock frequencies are off by a factor of two.

Inspired by a patch by Dien Pham in the BSP.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Dien Pham <dien.pham.ry@renesas.com>
---
 drivers/clk/renesas/rcar-gen3-cpg.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/clk/renesas/rcar-gen3-cpg.c b/drivers/clk/renesas/rcar-gen3-cpg.c
index d395bb8c22f5..e5247e3dc897 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.c
+++ b/drivers/clk/renesas/rcar-gen3-cpg.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/sys_soc.h>
 
 #include "renesas-cpg-mssr.h"
 #include "rcar-gen3-cpg.h"
@@ -248,6 +249,17 @@ static struct clk * __init cpg_sd_clk_register(const struct cpg_core_clk *core,
 static const struct rcar_gen3_cpg_pll_config *cpg_pll_config __initdata;
 static unsigned int cpg_clk_extalr __initdata;
 static u32 cpg_mode __initdata;
+static u32 cpg_quirks __initdata;
+
+#define PLL_ERRATA	BIT(0)		/* Missing PLL0/2/4 post-divider */
+
+static const struct soc_device_attribute cpg_quirks_match[] __initconst = {
+	{
+		.soc_id = "r8a7795", .revision = "ES1.0",
+		.data = (void *)PLL_ERRATA,
+	},
+	{ /* sentinel */ }
+};
 
 struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 	const struct cpg_core_clk *core, const struct cpg_mssr_info *info,
@@ -276,6 +288,8 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 		 */
 		value = readl(base + CPG_PLL0CR);
 		mult = (((value >> 24) & 0x7f) + 1) * 2;
+		if (cpg_quirks & PLL_ERRATA)
+			mult *= 2;
 		break;
 
 	case CLK_TYPE_GEN3_PLL1:
@@ -291,6 +305,8 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 		 */
 		value = readl(base + CPG_PLL2CR);
 		mult = (((value >> 24) & 0x7f) + 1) * 2;
+		if (cpg_quirks & PLL_ERRATA)
+			mult *= 2;
 		break;
 
 	case CLK_TYPE_GEN3_PLL3:
@@ -306,6 +322,8 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 		 */
 		value = readl(base + CPG_PLL4CR);
 		mult = (((value >> 24) & 0x7f) + 1) * 2;
+		if (cpg_quirks & PLL_ERRATA)
+			mult *= 2;
 		break;
 
 	case CLK_TYPE_GEN3_SD:
@@ -337,8 +355,14 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 int __init rcar_gen3_cpg_init(const struct rcar_gen3_cpg_pll_config *config,
 			      unsigned int clk_extalr, u32 mode)
 {
+	const struct soc_device_attribute *attr;
+
 	cpg_pll_config = config;
 	cpg_clk_extalr = clk_extalr;
 	cpg_mode = mode;
+	attr = soc_device_match(cpg_quirks_match);
+	if (attr)
+		cpg_quirks = (uintptr_t)attr->data;
+	pr_debug("%s: mode = 0x%x quirks = 0x%x\n", __func__, mode, cpg_quirks);
 	return 0;
 }
-- 
cgit v1.2.3


From 6f9655b1b81f2aa9207ec3837641299406a26b69 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 21 Mar 2017 16:13:10 +0100
Subject: drivers/video: Convert remaining uses of pr_warning to pr_warn

To enable eventual removal of pr_warning

This makes pr_warn use consistent for drivers/video

Prior to this patch, there were 6 uses of pr_warning and
25 uses of pr_warn in drivers/video

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/aty/radeon_base.c | 4 ++--
 drivers/video/fbdev/core/fbmon.c      | 4 ++--
 drivers/video/fbdev/pxafb.c           | 7 +++----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c
index 218339a4edaa..6b4c7872b375 100644
--- a/drivers/video/fbdev/aty/radeon_base.c
+++ b/drivers/video/fbdev/aty/radeon_base.c
@@ -2453,8 +2453,8 @@ static int radeonfb_pci_register(struct pci_dev *pdev,
 		err |= sysfs_create_bin_file(&rinfo->pdev->dev.kobj,
 						&edid2_attr);
 	if (err)
-		pr_warning("%s() Creating sysfs files failed, continuing\n",
-			   __func__);
+		pr_warn("%s() Creating sysfs files failed, continuing\n",
+			__func__);
 
 	/* save current mode regs before we switch into the new one
 	 * so we can restore this upon __exit
diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c
index 62c0cf79674f..687ebb053438 100644
--- a/drivers/video/fbdev/core/fbmon.c
+++ b/drivers/video/fbdev/core/fbmon.c
@@ -1073,9 +1073,9 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 	for (i = specs->modedb_len + num; i < specs->modedb_len + num + svd_n; i++) {
 		int idx = svd[i - specs->modedb_len - num];
 		if (!idx || idx >= ARRAY_SIZE(cea_modes)) {
-			pr_warning("Reserved SVD code %d\n", idx);
+			pr_warn("Reserved SVD code %d\n", idx);
 		} else if (!cea_modes[idx].xres) {
-			pr_warning("Unimplemented SVD code %d\n", idx);
+			pr_warn("Unimplemented SVD code %d\n", idx);
 		} else {
 			memcpy(&m[i], cea_modes + idx, sizeof(m[i]));
 			pr_debug("Adding SVD #%d: %ux%u@%u\n", idx,
diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index ef73f14d7ba0..b21a89b03fb4 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c
@@ -645,7 +645,7 @@ static void overlay1fb_disable(struct pxafb_layer *ofb)
 	lcd_writel(ofb->fbi, FBR1, ofb->fbi->fdadr[DMA_OV1] | 0x3);
 
 	if (wait_for_completion_timeout(&ofb->branch_done, 1 * HZ) == 0)
-		pr_warning("%s: timeout disabling overlay1\n", __func__);
+		pr_warn("%s: timeout disabling overlay1\n", __func__);
 
 	lcd_writel(ofb->fbi, LCCR5, lccr5);
 }
@@ -710,7 +710,7 @@ static void overlay2fb_disable(struct pxafb_layer *ofb)
 	lcd_writel(ofb->fbi, FBR4, ofb->fbi->fdadr[DMA_OV2_Cr] | 0x3);
 
 	if (wait_for_completion_timeout(&ofb->branch_done, 1 * HZ) == 0)
-		pr_warning("%s: timeout disabling overlay2\n", __func__);
+		pr_warn("%s: timeout disabling overlay2\n", __func__);
 }
 
 static struct pxafb_layer_ops ofb_ops[] = {
@@ -1187,8 +1187,7 @@ int pxafb_smart_flush(struct fb_info *info)
 	lcd_writel(fbi, LCCR0, fbi->reg_lccr0 | LCCR0_ENB);
 
 	if (wait_for_completion_timeout(&fbi->command_done, HZ/2) == 0) {
-		pr_warning("%s: timeout waiting for command done\n",
-				__func__);
+		pr_warn("%s: timeout waiting for command done\n", __func__);
 		ret = -ETIMEDOUT;
 	}
 
-- 
cgit v1.2.3


From c834f0e8a8bb3025aac38e802fca2e686720f544 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 20 Mar 2017 17:14:11 -0700
Subject: Kbuild: make designated_init attribute fatal

If a structure is marked with __attribute__((designated_init)) from
GCC or Sparse, it needs to have all static initializers using designated
initialization. Fail the build for any missing cases. This attribute will
be used by the randstruct plugin to make sure randomized structures are
being correctly initialized.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Makefile b/Makefile
index 515aeea7e702..ae49fef98ca3 100644
--- a/Makefile
+++ b/Makefile
@@ -795,6 +795,9 @@ KBUILD_CFLAGS   += $(call cc-option,-Werror=date-time)
 # enforce correct pointer usage
 KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
 
+# Require designated initializers for all marked structures
+KBUILD_CFLAGS   += $(call cc-option,-Werror=designated-init)
+
 # use the deterministic mode of AR if available
 KBUILD_ARFLAGS := $(call ar-option,D)
 
-- 
cgit v1.2.3


From 8326c5d2057a06adb082f3547c5d1f75cfd33e57 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 16 Mar 2017 16:23:51 +0200
Subject: iommu/dmar: Rectify return code handling in detect_intel_iommu()

There is inconsistency in return codes across the functions called from
detect_intel_iommu().

Make it consistent and propagate return code to the caller.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 36e3f430d265..edcf7410f736 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -551,7 +551,7 @@ static int __init dmar_table_detect(void)
 		status = AE_NOT_FOUND;
 	}
 
-	return (ACPI_SUCCESS(status) ? 1 : 0);
+	return ACPI_SUCCESS(status) ? 0 : -ENOENT;
 }
 
 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
@@ -891,17 +891,17 @@ int __init detect_intel_iommu(void)
 
 	down_write(&dmar_global_lock);
 	ret = dmar_table_detect();
-	if (ret)
-		ret = !dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
-					    &validate_drhd_cb);
-	if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
+	if (!ret)
+		ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
+					   &validate_drhd_cb);
+	if (!ret && !no_iommu && !iommu_detected && !dmar_disabled) {
 		iommu_detected = 1;
 		/* Make sure ACS will be enabled */
 		pci_request_acs();
 	}
 
 #ifdef CONFIG_X86
-	if (ret)
+	if (!ret)
 		x86_init.iommu.iommu_init = intel_iommu_init;
 #endif
 
@@ -911,10 +911,9 @@ int __init detect_intel_iommu(void)
 	}
 	up_write(&dmar_global_lock);
 
-	return ret ? 1 : -ENODEV;
+	return ret ? ret : 1;
 }
 
-
 static void unmap_iommu(struct intel_iommu *iommu)
 {
 	iounmap(iommu->reg);
-- 
cgit v1.2.3


From 4a8ed2b819402ae450e3c53a1fe5eec59e3f423e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 16 Mar 2017 16:23:52 +0200
Subject: iommu/dmar: Return directly from a loop in dmar_dev_scope_status()

There is no need to have a temporary variable.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index edcf7410f736..71d774f1d406 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -557,11 +557,10 @@ static int __init dmar_table_detect(void)
 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
 				       size_t len, struct dmar_res_callback *cb)
 {
-	int ret = 0;
 	struct acpi_dmar_header *iter, *next;
 	struct acpi_dmar_header *end = ((void *)start) + len;
 
-	for (iter = start; iter < end && ret == 0; iter = next) {
+	for (iter = start; iter < end; iter = next) {
 		next = (void *)iter + iter->length;
 		if (iter->length == 0) {
 			/* Avoid looping forever on bad ACPI tables */
@@ -570,8 +569,7 @@ static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
 		} else if (next > end) {
 			/* Avoid passing table end */
 			pr_warn(FW_BUG "Record passes table end\n");
-			ret = -EINVAL;
-			break;
+			return -EINVAL;
 		}
 
 		if (cb->print_entry)
@@ -582,15 +580,19 @@ static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
 			pr_debug("Unknown DMAR structure type %d\n",
 				 iter->type);
 		} else if (cb->cb[iter->type]) {
+			int ret;
+
 			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
+			if (ret)
+				return ret;
 		} else if (!cb->ignore_unhandled) {
 			pr_warn("No handler for DMAR structure type %d\n",
 				iter->type);
-			ret = -EINVAL;
+			return -EINVAL;
 		}
 	}
 
-	return ret;
+	return 0;
 }
 
 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
-- 
cgit v1.2.3


From 3f6db6591a2decad5f223a7dcfc01d2a3c15e187 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 16 Mar 2017 16:23:53 +0200
Subject: iommu/dmar: Remove redundant assignment of ret

There is no need to assign ret to 0 in some cases. Moreover it might
shadow some errors in the future.

Remove such assignments.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 71d774f1d406..9a44e20d7d88 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -391,7 +391,7 @@ static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_hardware_unit *drhd;
 	struct dmar_drhd_unit *dmaru;
-	int ret = 0;
+	int ret;
 
 	drhd = (struct acpi_dmar_hardware_unit *)header;
 	dmaru = dmar_find_dmaru(drhd);
@@ -609,8 +609,8 @@ static int __init
 parse_dmar_table(void)
 {
 	struct acpi_table_dmar *dmar;
-	int ret = 0;
 	int drhd_count = 0;
+	int ret;
 	struct dmar_res_callback cb = {
 		.print_entry = true,
 		.ignore_unhandled = true,
-- 
cgit v1.2.3


From f9808079aa7626d71215a71661dbed5c4ff101d2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 16 Mar 2017 16:23:54 +0200
Subject: iommu/dmar: Remove redundant ' != 0' when check return code

Usual pattern when we check for return code, which might be negative
errno, is either (ret) or (!ret).

Remove extra ' != 0' from condition.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 9a44e20d7d88..cbf7763d8091 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -311,7 +311,7 @@ static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
 				((void *)drhd) + drhd->header.length,
 				dmaru->segment,
 				dmaru->devices, dmaru->devices_cnt);
-		if (ret != 0)
+		if (ret)
 			break;
 	}
 	if (ret >= 0)
-- 
cgit v1.2.3


From 61012985eb132a2fa5e4a3eddbc33528334fa377 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 16 Mar 2017 16:23:55 +0200
Subject: iommu/vt-d: Use lo_hi_readq() / lo_hi_writeq()

There is already helper functions to do 64-bit I/O on 32-bit machines or
buses, thus we don't need to reinvent the wheel.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index c573a52ae440..485a5b48f038 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -30,6 +30,8 @@
 #include <linux/mmu_notifier.h>
 #include <linux/list.h>
 #include <linux/iommu.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 
@@ -72,24 +74,8 @@
 
 #define OFFSET_STRIDE		(9)
 
-#ifdef CONFIG_64BIT
 #define dmar_readq(a) readq(a)
 #define dmar_writeq(a,v) writeq(v,a)
-#else
-static inline u64 dmar_readq(void __iomem *addr)
-{
-	u32 lo, hi;
-	lo = readl(addr);
-	hi = readl(addr + 4);
-	return (((u64) hi) << 32) + lo;
-}
-
-static inline void dmar_writeq(void __iomem *addr, u64 val)
-{
-	writel((u32)val, addr);
-	writel((u32)(val >> 32), addr + 4);
-}
-#endif
 
 #define DMAR_VER_MAJOR(v)		(((v) & 0xf0) >> 4)
 #define DMAR_VER_MINOR(v)		((v) & 0x0f)
-- 
cgit v1.2.3


From 938f1bbe35e3a7cb07e1fa7c512e2ef8bb866bdf Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 16 Mar 2017 17:00:17 +0000
Subject: iommu/dma: Don't reserve PCI I/O windows

Even if a host controller's CPU-side MMIO windows into PCI I/O space do
happen to leak into PCI memory space such that it might treat them as
peer addresses, trying to reserve the corresponding I/O space addresses
doesn't do anything to help solve that problem. Stop doing a silly thing.

Fixes: fade1ec055dc ("iommu/dma: Avoid PCI host bridge windows")
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 48d36ce59efb..1e0983488a8d 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -175,8 +175,7 @@ static void iova_reserve_pci_windows(struct pci_dev *dev,
 	unsigned long lo, hi;
 
 	resource_list_for_each_entry(window, &bridge->windows) {
-		if (resource_type(window->res) != IORESOURCE_MEM &&
-		    resource_type(window->res) != IORESOURCE_IO)
+		if (resource_type(window->res) != IORESOURCE_MEM)
 			continue;
 
 		lo = iova_pfn(iovad, window->res->start - window->offset);
-- 
cgit v1.2.3


From 7c1b058c8b5a310f2f0439aff14e454aa9afe502 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 16 Mar 2017 17:00:18 +0000
Subject: iommu/dma: Handle IOMMU API reserved regions

Now that it's simple to discover the necessary reservations for a given
device/IOMMU combination, let's wire up the appropriate handling. Basic
reserved regions and direct-mapped regions we simply have to carve out
of IOVA space (the IOMMU core having already mapped the latter before
attaching the device). For hardware MSI regions, we also pre-populate
the cookie with matching msi_pages. That way, irqchip drivers which
normally assume MSIs to require mapping at the IOMMU can keep working
without having to special-case their iommu_dma_map_msi_msg() hook, or
indeed be aware at all of quirks preventing the IOMMU from translating
certain addresses.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 76 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1e0983488a8d..5787f919f4ec 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -184,6 +184,66 @@ static void iova_reserve_pci_windows(struct pci_dev *dev,
 	}
 }
 
+static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
+		phys_addr_t start, phys_addr_t end)
+{
+	struct iova_domain *iovad = &cookie->iovad;
+	struct iommu_dma_msi_page *msi_page;
+	int i, num_pages;
+
+	start -= iova_offset(iovad, start);
+	num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
+
+	msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL);
+	if (!msi_page)
+		return -ENOMEM;
+
+	for (i = 0; i < num_pages; i++) {
+		msi_page[i].phys = start;
+		msi_page[i].iova = start;
+		INIT_LIST_HEAD(&msi_page[i].list);
+		list_add(&msi_page[i].list, &cookie->msi_page_list);
+		start += iovad->granule;
+	}
+
+	return 0;
+}
+
+static int iova_reserve_iommu_regions(struct device *dev,
+		struct iommu_domain *domain)
+{
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
+	struct iommu_resv_region *region;
+	LIST_HEAD(resv_regions);
+	int ret = 0;
+
+	if (dev_is_pci(dev))
+		iova_reserve_pci_windows(to_pci_dev(dev), iovad);
+
+	iommu_get_resv_regions(dev, &resv_regions);
+	list_for_each_entry(region, &resv_regions, list) {
+		unsigned long lo, hi;
+
+		/* We ARE the software that manages these! */
+		if (region->type == IOMMU_RESV_SW_MSI)
+			continue;
+
+		lo = iova_pfn(iovad, region->start);
+		hi = iova_pfn(iovad, region->start + region->length - 1);
+		reserve_iova(iovad, lo, hi);
+
+		if (region->type == IOMMU_RESV_MSI)
+			ret = cookie_init_hw_msi_region(cookie, region->start,
+					region->start + region->length);
+		if (ret)
+			break;
+	}
+	iommu_put_resv_regions(dev, &resv_regions);
+
+	return ret;
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -202,7 +262,6 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
 	unsigned long order, base_pfn, end_pfn;
-	bool pci = dev && dev_is_pci(dev);
 
 	if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
 		return -EINVAL;
@@ -232,7 +291,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 	 * leave the cache limit at the top of their range to save an rb_last()
 	 * traversal on every allocation.
 	 */
-	if (pci)
+	if (dev && dev_is_pci(dev))
 		end_pfn &= DMA_BIT_MASK(32) >> order;
 
 	/* start_pfn is always nonzero for an already-initialised domain */
@@ -247,12 +306,15 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 		 * area cache limit down for the benefit of the smaller one.
 		 */
 		iovad->dma_32bit_pfn = min(end_pfn, iovad->dma_32bit_pfn);
-	} else {
-		init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
-		if (pci)
-			iova_reserve_pci_windows(to_pci_dev(dev), iovad);
+
+		return 0;
 	}
-	return 0;
+
+	init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
+	if (!dev)
+		return 0;
+
+	return iova_reserve_iommu_regions(dev, domain);
 }
 EXPORT_SYMBOL(iommu_dma_init_domain);
 
-- 
cgit v1.2.3


From 273df9635385b2156851c7ee49f40658d7bcb29d Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 16 Mar 2017 17:00:19 +0000
Subject: iommu/dma: Make PCI window reservation generic

Now that we're applying the IOMMU API reserved regions to our IOVA
domains, we shouldn't need to privately special-case PCI windows, or
indeed anything else which isn't specific to our iommu-dma layer.
However, since those aren't IOMMU-specific either, rather than start
duplicating code into IOMMU drivers let's transform the existing
function into an iommu_get_resv_regions() helper that they can share.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm-smmu-v3.c |  2 ++
 drivers/iommu/arm-smmu.c    |  2 ++
 drivers/iommu/dma-iommu.c   | 38 ++++++++++++++++++++++++++++----------
 include/linux/dma-iommu.h   |  5 +++++
 4 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 591bb96047c9..bbd46efbe075 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1893,6 +1893,8 @@ static void arm_smmu_get_resv_regions(struct device *dev,
 		return;
 
 	list_add_tail(&region->list, head);
+
+	iommu_dma_get_resv_regions(dev, head);
 }
 
 static void arm_smmu_put_resv_regions(struct device *dev,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index b493c99e17f7..9b33700b7c69 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1613,6 +1613,8 @@ static void arm_smmu_get_resv_regions(struct device *dev,
 		return;
 
 	list_add_tail(&region->list, head);
+
+	iommu_dma_get_resv_regions(dev, head);
 }
 
 static void arm_smmu_put_resv_regions(struct device *dev,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 5787f919f4ec..85652110c8ff 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -167,22 +167,43 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
 }
 EXPORT_SYMBOL(iommu_put_dma_cookie);
 
-static void iova_reserve_pci_windows(struct pci_dev *dev,
-		struct iova_domain *iovad)
+/**
+ * iommu_dma_get_resv_regions - Reserved region driver helper
+ * @dev: Device from iommu_get_resv_regions()
+ * @list: Reserved region list from iommu_get_resv_regions()
+ *
+ * IOMMU drivers can use this to implement their .get_resv_regions callback
+ * for general non-IOMMU-specific reservations. Currently, this covers host
+ * bridge windows for PCI devices.
+ */
+void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
 {
-	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+	struct pci_host_bridge *bridge;
 	struct resource_entry *window;
-	unsigned long lo, hi;
 
+	if (!dev_is_pci(dev))
+		return;
+
+	bridge = pci_find_host_bridge(to_pci_dev(dev)->bus);
 	resource_list_for_each_entry(window, &bridge->windows) {
+		struct iommu_resv_region *region;
+		phys_addr_t start;
+		size_t length;
+
 		if (resource_type(window->res) != IORESOURCE_MEM)
 			continue;
 
-		lo = iova_pfn(iovad, window->res->start - window->offset);
-		hi = iova_pfn(iovad, window->res->end - window->offset);
-		reserve_iova(iovad, lo, hi);
+		start = window->res->start - window->offset;
+		length = window->res->end - window->res->start + 1;
+		region = iommu_alloc_resv_region(start, length, 0,
+				IOMMU_RESV_RESERVED);
+		if (!region)
+			return;
+
+		list_add_tail(&region->list, list);
 	}
 }
+EXPORT_SYMBOL(iommu_dma_get_resv_regions);
 
 static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
 		phys_addr_t start, phys_addr_t end)
@@ -218,9 +239,6 @@ static int iova_reserve_iommu_regions(struct device *dev,
 	LIST_HEAD(resv_regions);
 	int ret = 0;
 
-	if (dev_is_pci(dev))
-		iova_reserve_pci_windows(to_pci_dev(dev), iovad);
-
 	iommu_get_resv_regions(dev, &resv_regions);
 	list_for_each_entry(region, &resv_regions, list) {
 		unsigned long lo, hi;
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 5725c94b1f12..b6635a46fc7c 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -71,6 +71,7 @@ int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
 /* The DMA API isn't _quite_ the whole story, though... */
 void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg);
+void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
 
 #else
 
@@ -100,6 +101,10 @@ static inline void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
 {
 }
 
+static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
+{
+}
+
 #endif	/* CONFIG_IOMMU_DMA */
 #endif	/* __KERNEL__ */
 #endif	/* __DMA_IOMMU_H */
-- 
cgit v1.2.3


From b61753a4c44eb372078e5e719686c383f91834b2 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Fri, 17 Mar 2017 18:18:37 +0100
Subject: dt-bindings: rk1108-cru: rename RK1108 to RV1108

Rockchip finally named the SOC as RV1108, so change it.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 .../bindings/clock/rockchip,rk1108-cru.txt         | 59 ----------------------
 .../bindings/clock/rockchip,rv1108-cru.txt         | 59 ++++++++++++++++++++++
 2 files changed, 59 insertions(+), 59 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/clock/rockchip,rk1108-cru.txt
 create mode 100644 Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt

diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk1108-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk1108-cru.txt
deleted file mode 100644
index 4da126116cf0..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rk1108-cru.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-* Rockchip RK1108 Clock and Reset Unit
-
-The RK1108 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: should be "rockchip,rk1108-cru"
-- reg: physical base address of the controller and length of memory mapped
-  region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files"
-  If missing pll rates are not changeable, due to the missing pll lock status.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk1108-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "ext_vip" - external VIP clock - optional
- - "ext_i2s" - external I2S clock - optional
- - "ext_gmac" - external GMAC clock - optional
- - "hdmiphy" - external clock input derived from HDMI PHY - optional
- - "usbphy" - external clock input derived from USB PHY - optional
-
-Example: Clock controller node:
-
-	cru: cru@20200000 {
-		compatible = "rockchip,rk1108-cru";
-		reg = <0x20200000 0x1000>;
-		rockchip,grf = <&grf>;
-
-		#clock-cells = <1>;
-		#reset-cells = <1>;
-	};
-
-Example: UART controller node that consumes the clock generated by the clock
-  controller:
-
-	uart0: serial@10230000 {
-		compatible = "rockchip,rk1108-uart", "snps,dw-apb-uart";
-		reg = <0x10230000 0x100>;
-		interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
-		reg-shift = <2>;
-		reg-io-width = <4>;
-		clocks = <&cru SCLK_UART0>;
-	};
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt
new file mode 100644
index 000000000000..161326a4f9c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt
@@ -0,0 +1,59 @@
+* Rockchip RV1108 Clock and Reset Unit
+
+The RV1108 clock controller generates and supplies clock to various
+controllers within the SoC and also implements a reset controller for SoC
+peripherals.
+
+Required Properties:
+
+- compatible: should be "rockchip,rv1108-cru"
+- reg: physical base address of the controller and length of memory mapped
+  region.
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Optional Properties:
+
+- rockchip,grf: phandle to the syscon managing the "general register files"
+  If missing pll rates are not changeable, due to the missing pll lock status.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. All available clocks are defined as
+preprocessor macros in the dt-bindings/clock/rv1108-cru.h headers and can be
+used in device tree sources. Similar macros exist for the reset sources in
+these files.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "xin24m" - crystal input - required,
+ - "ext_vip" - external VIP clock - optional
+ - "ext_i2s" - external I2S clock - optional
+ - "ext_gmac" - external GMAC clock - optional
+ - "hdmiphy" - external clock input derived from HDMI PHY - optional
+ - "usbphy" - external clock input derived from USB PHY - optional
+
+Example: Clock controller node:
+
+	cru: cru@20200000 {
+		compatible = "rockchip,rv1108-cru";
+		reg = <0x20200000 0x1000>;
+		rockchip,grf = <&grf>;
+
+		#clock-cells = <1>;
+		#reset-cells = <1>;
+	};
+
+Example: UART controller node that consumes the clock generated by the clock
+  controller:
+
+	uart0: serial@10230000 {
+		compatible = "rockchip,rv1108-uart", "snps,dw-apb-uart";
+		reg = <0x10230000 0x100>;
+		interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
+		reg-shift = <2>;
+		reg-io-width = <4>;
+		clocks = <&cru SCLK_UART0>;
+	};
-- 
cgit v1.2.3


From 7e2a9035c1dbc4632f1897a8fe580fc90f33c013 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Fri, 17 Mar 2017 18:18:38 +0100
Subject: clk: rockchip: rename RK1108 to RV1108

Rockchip finally named the SOC as RV1108, so change it.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>

[include rename in rk1108.dtsi to prevent compile errors]
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm/boot/dts/rk1108.dtsi          |   2 +-
 drivers/clk/rockchip/Makefile          |   2 +-
 drivers/clk/rockchip/clk-rk1108.c      | 531 ---------------------------------
 drivers/clk/rockchip/clk-rv1108.c      | 531 +++++++++++++++++++++++++++++++++
 drivers/clk/rockchip/clk.h             |  28 +-
 include/dt-bindings/clock/rk1108-cru.h | 269 -----------------
 include/dt-bindings/clock/rv1108-cru.h | 269 +++++++++++++++++
 7 files changed, 816 insertions(+), 816 deletions(-)
 delete mode 100644 drivers/clk/rockchip/clk-rk1108.c
 create mode 100644 drivers/clk/rockchip/clk-rv1108.c
 delete mode 100644 include/dt-bindings/clock/rk1108-cru.h
 create mode 100644 include/dt-bindings/clock/rv1108-cru.h

diff --git a/arch/arm/boot/dts/rk1108.dtsi b/arch/arm/boot/dts/rk1108.dtsi
index d6194bff7afe..4867342b88d4 100644
--- a/arch/arm/boot/dts/rk1108.dtsi
+++ b/arch/arm/boot/dts/rk1108.dtsi
@@ -41,7 +41,7 @@
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
-#include <dt-bindings/clock/rk1108-cru.h>
+#include <dt-bindings/clock/rv1108-cru.h>
 #include <dt-bindings/pinctrl/rockchip.h>
 / {
 	#address-cells = <1>;
diff --git a/drivers/clk/rockchip/Makefile b/drivers/clk/rockchip/Makefile
index 141971488f40..26b220c988b2 100644
--- a/drivers/clk/rockchip/Makefile
+++ b/drivers/clk/rockchip/Makefile
@@ -12,7 +12,7 @@ obj-y	+= clk-muxgrf.o
 obj-y	+= clk-ddr.o
 obj-$(CONFIG_RESET_CONTROLLER)	+= softrst.o
 
-obj-y	+= clk-rk1108.o
+obj-y	+= clk-rv1108.o
 obj-y	+= clk-rk3036.o
 obj-y	+= clk-rk3188.o
 obj-y	+= clk-rk3228.o
diff --git a/drivers/clk/rockchip/clk-rk1108.c b/drivers/clk/rockchip/clk-rk1108.c
deleted file mode 100644
index 92750d798e5d..000000000000
--- a/drivers/clk/rockchip/clk-rk1108.c
+++ /dev/null
@@ -1,531 +0,0 @@
-/*
- * Copyright (c) 2016 Rockchip Electronics Co. Ltd.
- * Author: Shawn Lin <shawn.lin@rock-chips.com>
- *         Andy Yan <andy.yan@rock-chips.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/clk-provider.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/syscore_ops.h>
-#include <dt-bindings/clock/rk1108-cru.h>
-#include "clk.h"
-
-#define RK1108_GRF_SOC_STATUS0	0x480
-
-enum rk1108_plls {
-	apll, dpll, gpll,
-};
-
-static struct rockchip_pll_rate_table rk1108_pll_rates[] = {
-	/* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */
-	RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0),
-	RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0),
-	RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 984000000, 1, 82, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 960000000, 1, 80, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 936000000, 1, 78, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 912000000, 1, 76, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 900000000, 4, 300, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 888000000, 1, 74, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 864000000, 1, 72, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 840000000, 1, 70, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 816000000, 1, 68, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 800000000, 6, 400, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 700000000, 6, 350, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 696000000, 1, 58, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 600000000, 1, 75, 3, 1, 1, 0),
-	RK3036_PLL_RATE( 594000000, 2, 99, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 504000000, 1, 63, 3, 1, 1, 0),
-	RK3036_PLL_RATE( 500000000, 6, 250, 2, 1, 1, 0),
-	RK3036_PLL_RATE( 408000000, 1, 68, 2, 2, 1, 0),
-	RK3036_PLL_RATE( 312000000, 1, 52, 2, 2, 1, 0),
-	RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0),
-	RK3036_PLL_RATE(  96000000, 1, 64, 4, 4, 1, 0),
-	{ /* sentinel */ },
-};
-
-#define RK1108_DIV_CORE_MASK		0xf
-#define RK1108_DIV_CORE_SHIFT		4
-
-#define RK1108_CLKSEL0(_core_peri_div)	\
-	{				\
-		.reg = RK1108_CLKSEL_CON(1),	\
-		.val = HIWORD_UPDATE(_core_peri_div, RK1108_DIV_CORE_MASK,\
-				RK1108_DIV_CORE_SHIFT)	\
-	}
-
-#define RK1108_CPUCLK_RATE(_prate, _core_peri_div)			\
-	{								\
-		.prate = _prate,					\
-		.divs = {						\
-			RK1108_CLKSEL0(_core_peri_div),		\
-		},							\
-	}
-
-static struct rockchip_cpuclk_rate_table rk1108_cpuclk_rates[] __initdata = {
-	RK1108_CPUCLK_RATE(816000000, 4),
-	RK1108_CPUCLK_RATE(600000000, 4),
-	RK1108_CPUCLK_RATE(312000000, 4),
-};
-
-static const struct rockchip_cpuclk_reg_data rk1108_cpuclk_data = {
-	.core_reg = RK1108_CLKSEL_CON(0),
-	.div_core_shift = 0,
-	.div_core_mask = 0x1f,
-	.mux_core_alt = 1,
-	.mux_core_main = 0,
-	.mux_core_shift = 8,
-	.mux_core_mask = 0x1,
-};
-
-PNAME(mux_pll_p)		= { "xin24m", "xin24m"};
-PNAME(mux_ddrphy_p)		= { "dpll_ddr", "gpll_ddr", "apll_ddr" };
-PNAME(mux_armclk_p)		= { "apll_core", "gpll_core", "dpll_core" };
-PNAME(mux_usb480m_pre_p)	= { "usbphy", "xin24m" };
-PNAME(mux_hdmiphy_phy_p)	= { "hdmiphy", "xin24m" };
-PNAME(mux_dclk_hdmiphy_pre_p)	= { "dclk_hdmiphy_src_gpll", "dclk_hdmiphy_src_dpll" };
-PNAME(mux_pll_src_4plls_p)	= { "dpll", "hdmiphy", "gpll", "usb480m" };
-PNAME(mux_pll_src_3plls_p)	= { "apll", "gpll", "dpll" };
-PNAME(mux_pll_src_2plls_p)	= { "dpll", "gpll" };
-PNAME(mux_pll_src_apll_gpll_p)	= { "apll", "gpll" };
-PNAME(mux_aclk_peri_src_p)	= { "aclk_peri_src_dpll", "aclk_peri_src_gpll" };
-PNAME(mux_aclk_bus_src_p)	= { "aclk_bus_src_gpll", "aclk_bus_src_apll", "aclk_bus_src_dpll" };
-PNAME(mux_mmc_src_p)		= { "dpll", "gpll", "xin24m", "usb480m" };
-PNAME(mux_pll_src_dpll_gpll_usb480m_p)	= { "dpll", "gpll", "usb480m" };
-PNAME(mux_uart0_p)		= { "uart0_src", "uart0_frac", "xin24m" };
-PNAME(mux_uart1_p)		= { "uart1_src", "uart1_frac", "xin24m" };
-PNAME(mux_uart2_p)		= { "uart2_src", "uart2_frac", "xin24m" };
-PNAME(mux_sclk_macphy_p)	= { "sclk_macphy_pre", "ext_gmac" };
-PNAME(mux_i2s0_pre_p)		= { "i2s0_src", "i2s0_frac", "ext_i2s", "xin12m" };
-PNAME(mux_i2s_out_p)		= { "i2s0_pre", "xin12m" };
-PNAME(mux_i2s1_p)		= { "i2s1_src", "i2s1_frac", "xin12m" };
-PNAME(mux_i2s2_p)		= { "i2s2_src", "i2s2_frac", "xin12m" };
-
-static struct rockchip_pll_clock rk1108_pll_clks[] __initdata = {
-	[apll] = PLL(pll_rk3399, PLL_APLL, "apll", mux_pll_p, 0, RK1108_PLL_CON(0),
-		     RK1108_PLL_CON(3), 8, 31, 0, rk1108_pll_rates),
-	[dpll] = PLL(pll_rk3399, PLL_DPLL, "dpll", mux_pll_p, 0, RK1108_PLL_CON(8),
-		     RK1108_PLL_CON(11), 8, 31, 0, NULL),
-	[gpll] = PLL(pll_rk3399, PLL_GPLL, "gpll", mux_pll_p, 0, RK1108_PLL_CON(16),
-		     RK1108_PLL_CON(19), 8, 31, ROCKCHIP_PLL_SYNC_RATE, rk1108_pll_rates),
-};
-
-#define MFLAGS CLK_MUX_HIWORD_MASK
-#define DFLAGS CLK_DIVIDER_HIWORD_MASK
-#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE)
-#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK
-
-static struct rockchip_clk_branch rk1108_uart0_fracmux __initdata =
-	MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(13), 8, 2, MFLAGS);
-
-static struct rockchip_clk_branch rk1108_uart1_fracmux __initdata =
-	MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(14), 8, 2, MFLAGS);
-
-static struct rockchip_clk_branch rk1108_uart2_fracmux __initdata =
-	MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(15), 8, 2, MFLAGS);
-
-static struct rockchip_clk_branch rk1108_i2s0_fracmux __initdata =
-	MUX(0, "i2s0_pre", mux_i2s0_pre_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(5), 12, 2, MFLAGS);
-
-static struct rockchip_clk_branch rk1108_i2s1_fracmux __initdata =
-	MUX(0, "i2s1_pre", mux_i2s1_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(6), 12, 2, MFLAGS);
-
-static struct rockchip_clk_branch rk1108_i2s2_fracmux __initdata =
-	MUX(0, "i2s2_pre", mux_i2s2_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(7), 12, 2, MFLAGS);
-
-static struct rockchip_clk_branch rk1108_clk_branches[] __initdata = {
-	MUX(0, "hdmi_phy", mux_hdmiphy_phy_p, CLK_SET_RATE_PARENT,
-			RK1108_MISC_CON, 13, 2, MFLAGS),
-	MUX(0, "usb480m", mux_usb480m_pre_p, CLK_SET_RATE_PARENT,
-			RK1108_MISC_CON, 15, 2, MFLAGS),
-	/*
-	 * Clock-Architecture Diagram 2
-	 */
-
-	/* PD_CORE */
-	GATE(0, "dpll_core", "dpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 1, GFLAGS),
-	GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 0, GFLAGS),
-	GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 2, GFLAGS),
-	COMPOSITE_NOMUX(0, "pclken_dbg", "armclk", CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(1), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY,
-			RK1108_CLKGATE_CON(0), 5, GFLAGS),
-	COMPOSITE_NOMUX(ACLK_ENMCORE, "aclkenm_core", "armclk", CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(1), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
-			RK1108_CLKGATE_CON(0), 4, GFLAGS),
-	GATE(ACLK_CORE, "aclk_core", "aclkenm_core", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(11), 0, GFLAGS),
-	GATE(0, "pclk_dbg", "pclken_dbg", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(11), 1, GFLAGS),
-
-	/* PD_RKVENC */
-
-	/* PD_RKVDEC */
-
-	/* PD_PMU_wrapper */
-	COMPOSITE_NOMUX(0, "pmu_24m_ena", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(38), 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(8), 12, GFLAGS),
-	GATE(0, "pmu", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 0, GFLAGS),
-	GATE(0, "intmem1", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 1, GFLAGS),
-	GATE(0, "gpio0_pmu", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 2, GFLAGS),
-	GATE(0, "pmugrf", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 3, GFLAGS),
-	GATE(0, "pmu_noc", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 4, GFLAGS),
-	GATE(0, "i2c0_pmu_pclk", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 5, GFLAGS),
-	GATE(0, "pwm0_pmu_pclk", "pmu_24m_ena", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(10), 6, GFLAGS),
-	COMPOSITE(0, "pwm0_pmu_clk", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(12), 7, 1, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(8), 15, GFLAGS),
-	COMPOSITE(0, "i2c0_pmu_clk", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(19), 7, 1, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(8), 14, GFLAGS),
-	GATE(0, "pvtm_pmu", "xin24m", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(8), 13, GFLAGS),
-
-	/*
-	 * Clock-Architecture Diagram 4
-	 */
-	COMPOSITE(0, "aclk_vio0_2wrap_occ", mux_pll_src_4plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(6), 0, GFLAGS),
-	GATE(0, "aclk_vio0_pre", "aclk_vio0_2wrap_occ", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(17), 0, GFLAGS),
-	COMPOSITE_NOMUX(0, "hclk_vio_pre", "aclk_vio0_pre", 0,
-			RK1108_CLKSEL_CON(29), 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(7), 2, GFLAGS),
-	COMPOSITE_NOMUX(0, "pclk_vio_pre", "aclk_vio0_pre", 0,
-			RK1108_CLKSEL_CON(29), 8, 5, DFLAGS,
-			RK1108_CLKGATE_CON(7), 3, GFLAGS),
-
-	INVERTER(0, "pclk_vip", "ext_vip",
-			RK1108_CLKSEL_CON(31), 8, IFLAGS),
-	GATE(0, "pclk_isp_pre", "pclk_vip", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(7), 6, GFLAGS),
-	GATE(0, "pclk_isp", "pclk_isp_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(18), 10, GFLAGS),
-	GATE(0, "dclk_hdmiphy_src_gpll", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(6), 5, GFLAGS),
-	GATE(0, "dclk_hdmiphy_src_dpll", "dpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(6), 4, GFLAGS),
-	COMPOSITE_NOGATE(0, "dclk_hdmiphy", mux_dclk_hdmiphy_pre_p, 0,
-			RK1108_CLKSEL_CON(32), 6, 2, MFLAGS, 8, 6, DFLAGS),
-
-	/*
-	 * Clock-Architecture Diagram 5
-	 */
-
-	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
-
-	COMPOSITE(0, "i2s0_src", mux_pll_src_2plls_p, 0,
-			RK1108_CLKSEL_CON(5), 8, 1, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(2), 0, GFLAGS),
-	COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(8), 0,
-			RK1108_CLKGATE_CON(2), 1, GFLAGS,
-			&rk1108_i2s0_fracmux),
-	GATE(SCLK_I2S0, "sclk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT,
-			RK1108_CLKGATE_CON(2), 2, GFLAGS),
-	COMPOSITE_NODIV(0, "i2s_out", mux_i2s_out_p, 0,
-			RK1108_CLKSEL_CON(5), 15, 1, MFLAGS,
-			RK1108_CLKGATE_CON(2), 3, GFLAGS),
-
-	COMPOSITE(0, "i2s1_src", mux_pll_src_2plls_p, 0,
-			RK1108_CLKSEL_CON(6), 8, 1, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(2), 4, GFLAGS),
-	COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT,
-			RK2928_CLKSEL_CON(9), 0,
-			RK2928_CLKGATE_CON(2), 5, GFLAGS,
-			&rk1108_i2s1_fracmux),
-	GATE(SCLK_I2S1, "sclk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT,
-			RK1108_CLKGATE_CON(2), 6, GFLAGS),
-
-	COMPOSITE(0, "i2s2_src", mux_pll_src_2plls_p, 0,
-			RK1108_CLKSEL_CON(7), 8, 1, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 8, GFLAGS),
-	COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_src", CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(10), 0,
-			RK1108_CLKGATE_CON(2), 9, GFLAGS,
-			&rk1108_i2s2_fracmux),
-	GATE(SCLK_I2S2, "sclk_i2s2", "i2s2_pre", CLK_SET_RATE_PARENT,
-			RK1108_CLKGATE_CON(2), 10, GFLAGS),
-
-	/* PD_BUS */
-	GATE(0, "aclk_bus_src_gpll", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 0, GFLAGS),
-	GATE(0, "aclk_bus_src_apll", "apll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 1, GFLAGS),
-	GATE(0, "aclk_bus_src_dpll", "dpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 2, GFLAGS),
-	COMPOSITE_NOGATE(ACLK_PRE, "aclk_bus_pre", mux_aclk_bus_src_p, 0,
-			RK1108_CLKSEL_CON(2), 8, 2, MFLAGS, 0, 5, DFLAGS),
-	COMPOSITE_NOMUX(0, "hclk_bus_pre", "aclk_bus_2wrap_occ", 0,
-			RK1108_CLKSEL_CON(3), 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(1), 4, GFLAGS),
-	COMPOSITE_NOMUX(0, "pclken_bus", "aclk_bus_2wrap_occ", 0,
-			RK1108_CLKSEL_CON(3), 8, 5, DFLAGS,
-			RK1108_CLKGATE_CON(1), 5, GFLAGS),
-	GATE(0, "pclk_bus_pre", "pclken_bus", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 6, GFLAGS),
-	GATE(0, "pclk_top_pre", "pclken_bus", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 7, GFLAGS),
-	GATE(0, "pclk_ddr_pre", "pclken_bus", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 8, GFLAGS),
-	GATE(0, "clk_timer0", "mux_pll_p", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 9, GFLAGS),
-	GATE(0, "clk_timer1", "mux_pll_p", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(1), 10, GFLAGS),
-	GATE(0, "pclk_timer", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 4, GFLAGS),
-
-	COMPOSITE(0, "uart0_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(13), 12, 2, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 1, GFLAGS),
-	COMPOSITE(0, "uart1_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 3, GFLAGS),
-	COMPOSITE(0, "uart21_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(15), 12, 2, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 5, GFLAGS),
-
-	COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(16), 0,
-			RK1108_CLKGATE_CON(3), 2, GFLAGS,
-			&rk1108_uart0_fracmux),
-	COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(17), 0,
-			RK1108_CLKGATE_CON(3), 4, GFLAGS,
-			&rk1108_uart1_fracmux),
-	COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(18), 0,
-			RK1108_CLKGATE_CON(3), 6, GFLAGS,
-			&rk1108_uart2_fracmux),
-	GATE(PCLK_UART0, "pclk_uart0", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 10, GFLAGS),
-	GATE(PCLK_UART1, "pclk_uart1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 11, GFLAGS),
-	GATE(PCLK_UART2, "pclk_uart2", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 12, GFLAGS),
-
-	COMPOSITE(0, "clk_i2c1", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(19), 15, 2, MFLAGS, 8, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 7, GFLAGS),
-	COMPOSITE(0, "clk_i2c2", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(20), 7, 2, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 8, GFLAGS),
-	COMPOSITE(0, "clk_i2c3", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(20), 15, 2, MFLAGS, 8, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 9, GFLAGS),
-	GATE(0, "pclk_i2c1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 0, GFLAGS),
-	GATE(0, "pclk_i2c2", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 1, GFLAGS),
-	GATE(0, "pclk_i2c3", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 2, GFLAGS),
-	COMPOSITE(0, "clk_pwm1", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(12), 15, 2, MFLAGS, 8, 7, DFLAGS,
-			RK1108_CLKGATE_CON(3), 10, GFLAGS),
-	GATE(0, "pclk_pwm1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 6, GFLAGS),
-	GATE(0, "pclk_wdt", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 3, GFLAGS),
-	GATE(0, "pclk_gpio1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 7, GFLAGS),
-	GATE(0, "pclk_gpio2", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 8, GFLAGS),
-	GATE(0, "pclk_gpio3", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(13), 9, GFLAGS),
-
-	GATE(0, "pclk_grf", "pclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(14), 0, GFLAGS),
-
-	GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_pre", 0,
-	     RK1108_CLKGATE_CON(12), 2, GFLAGS),
-	GATE(0, "hclk_rom", "hclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(12), 3, GFLAGS),
-	GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(12), 1, GFLAGS),
-
-	/* PD_DDR */
-	GATE(0, "apll_ddr", "apll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 8, GFLAGS),
-	GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 9, GFLAGS),
-	GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 10, GFLAGS),
-	COMPOSITE(0, "ddrphy4x", mux_ddrphy_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(4), 8, 2, MFLAGS, 0, 3,
-			DFLAGS | CLK_DIVIDER_POWER_OF_TWO,
-			RK1108_CLKGATE_CON(10), 9, GFLAGS),
-	GATE(0, "ddrupctl", "ddrphy_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(12), 4, GFLAGS),
-	GATE(0, "ddrc", "ddrphy", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(12), 5, GFLAGS),
-	GATE(0, "ddrmon", "ddrphy_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(12), 6, GFLAGS),
-	GATE(0, "timer_clk", "xin24m", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(0), 11, GFLAGS),
-
-	/*
-	 * Clock-Architecture Diagram 6
-	 */
-
-	/* PD_PERI */
-	COMPOSITE_NOMUX(0, "pclk_periph_pre", "gpll", 0,
-			RK1108_CLKSEL_CON(23), 10, 5, DFLAGS,
-			RK1108_CLKGATE_CON(4), 5, GFLAGS),
-	GATE(0, "pclk_periph", "pclk_periph_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(15), 13, GFLAGS),
-	COMPOSITE_NOMUX(0, "hclk_periph_pre", "gpll", 0,
-			RK1108_CLKSEL_CON(23), 5, 5, DFLAGS,
-			RK1108_CLKGATE_CON(4), 4, GFLAGS),
-	GATE(0, "hclk_periph", "hclk_periph_pre", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(15), 12, GFLAGS),
-
-	GATE(0, "aclk_peri_src_dpll", "dpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(4), 1, GFLAGS),
-	GATE(0, "aclk_peri_src_gpll", "gpll", CLK_IGNORE_UNUSED,
-			RK1108_CLKGATE_CON(4), 2, GFLAGS),
-	COMPOSITE(0, "aclk_periph", mux_aclk_peri_src_p, CLK_IGNORE_UNUSED,
-			RK1108_CLKSEL_CON(23), 15, 2, MFLAGS, 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(15), 11, GFLAGS),
-
-	COMPOSITE(SCLK_SDMMC, "sclk_sdmmc0", mux_mmc_src_p, 0,
-			RK1108_CLKSEL_CON(25), 8, 2, MFLAGS, 0, 8, DFLAGS,
-			RK1108_CLKGATE_CON(5), 0, GFLAGS),
-
-	COMPOSITE_NODIV(0, "sclk_sdio_src", mux_mmc_src_p, 0,
-			RK1108_CLKSEL_CON(25), 10, 2, MFLAGS,
-			RK1108_CLKGATE_CON(5), 2, GFLAGS),
-	DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0,
-			RK1108_CLKSEL_CON(26), 0, 8, DFLAGS),
-
-	COMPOSITE_NODIV(0, "sclk_emmc_src", mux_mmc_src_p, 0,
-			RK1108_CLKSEL_CON(25), 12, 2, MFLAGS,
-			RK1108_CLKGATE_CON(5), 1, GFLAGS),
-	DIV(SCLK_EMMC, "sclk_emmc", "sclk_emmc_src", 0,
-			RK2928_CLKSEL_CON(26), 8, 8, DFLAGS),
-	GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_periph", 0, RK1108_CLKGATE_CON(15), 0, GFLAGS),
-	GATE(HCLK_SDIO, "hclk_sdio", "hclk_periph", 0, RK1108_CLKGATE_CON(15), 1, GFLAGS),
-	GATE(HCLK_EMMC, "hclk_emmc", "hclk_periph", 0, RK1108_CLKGATE_CON(15), 2, GFLAGS),
-
-	COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_2plls_p, 0,
-			RK1108_CLKSEL_CON(27), 14, 2, MFLAGS, 8, 5, DFLAGS,
-			RK1108_CLKGATE_CON(5), 3, GFLAGS),
-	GATE(HCLK_NANDC, "hclk_nandc", "hclk_periph", 0, RK1108_CLKGATE_CON(15), 3, GFLAGS),
-
-	COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_2plls_p, 0,
-			RK1108_CLKSEL_CON(27), 7, 2, MFLAGS, 0, 7, DFLAGS,
-			RK1108_CLKGATE_CON(5), 4, GFLAGS),
-	GATE(HCLK_SFC, "hclk_sfc", "hclk_periph", 0, RK1108_CLKGATE_CON(15), 10, GFLAGS),
-
-	COMPOSITE(0, "sclk_macphy_pre", mux_pll_src_apll_gpll_p, 0,
-			RK1108_CLKSEL_CON(24), 12, 2, MFLAGS, 0, 5, DFLAGS,
-			RK1108_CLKGATE_CON(4), 10, GFLAGS),
-	MUX(0, "sclk_macphy", mux_sclk_macphy_p, CLK_SET_RATE_PARENT,
-			RK1108_CLKSEL_CON(24), 8, 2, MFLAGS),
-	GATE(0, "sclk_macphy_rx", "sclk_macphy", 0, RK1108_CLKGATE_CON(4), 8, GFLAGS),
-	GATE(0, "sclk_mac_ref", "sclk_macphy", 0, RK1108_CLKGATE_CON(4), 6, GFLAGS),
-	GATE(0, "sclk_mac_refout", "sclk_macphy", 0, RK1108_CLKGATE_CON(4), 7, GFLAGS),
-
-	MMC(SCLK_SDMMC_DRV,    "sdmmc_drv",    "sclk_sdmmc", RK1108_SDMMC_CON0, 1),
-	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK1108_SDMMC_CON1, 1),
-
-	MMC(SCLK_SDIO_DRV,     "sdio_drv",     "sclk_sdio",  RK1108_SDIO_CON0,  1),
-	MMC(SCLK_SDIO_SAMPLE,  "sdio_sample",  "sclk_sdio",  RK1108_SDIO_CON1,  1),
-
-	MMC(SCLK_EMMC_DRV,     "emmc_drv",     "sclk_emmc",  RK1108_EMMC_CON0,  1),
-	MMC(SCLK_EMMC_SAMPLE,  "emmc_sample",  "sclk_emmc",  RK1108_EMMC_CON1,  1),
-};
-
-static const char *const rk1108_critical_clocks[] __initconst = {
-	"aclk_core",
-	"aclk_bus_src_gpll",
-	"aclk_periph",
-	"hclk_periph",
-	"pclk_periph",
-};
-
-static void __init rk1108_clk_init(struct device_node *np)
-{
-	struct rockchip_clk_provider *ctx;
-	void __iomem *reg_base;
-
-	reg_base = of_iomap(np, 0);
-	if (!reg_base) {
-		pr_err("%s: could not map cru region\n", __func__);
-		return;
-	}
-
-	ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
-	if (IS_ERR(ctx)) {
-		pr_err("%s: rockchip clk init failed\n", __func__);
-		iounmap(reg_base);
-		return;
-	}
-
-	rockchip_clk_register_plls(ctx, rk1108_pll_clks,
-				   ARRAY_SIZE(rk1108_pll_clks),
-				   RK1108_GRF_SOC_STATUS0);
-	rockchip_clk_register_branches(ctx, rk1108_clk_branches,
-				  ARRAY_SIZE(rk1108_clk_branches));
-	rockchip_clk_protect_critical(rk1108_critical_clocks,
-				      ARRAY_SIZE(rk1108_critical_clocks));
-
-	rockchip_clk_register_armclk(ctx, ARMCLK, "armclk",
-			mux_armclk_p, ARRAY_SIZE(mux_armclk_p),
-			&rk1108_cpuclk_data, rk1108_cpuclk_rates,
-			ARRAY_SIZE(rk1108_cpuclk_rates));
-
-	rockchip_register_softrst(np, 13, reg_base + RK1108_SOFTRST_CON(0),
-				  ROCKCHIP_SOFTRST_HIWORD_MASK);
-
-	rockchip_register_restart_notifier(ctx, RK1108_GLB_SRST_FST, NULL);
-
-	rockchip_clk_of_add_provider(np, ctx);
-}
-CLK_OF_DECLARE(rk1108_cru, "rockchip,rk1108-cru", rk1108_clk_init);
diff --git a/drivers/clk/rockchip/clk-rv1108.c b/drivers/clk/rockchip/clk-rv1108.c
new file mode 100644
index 000000000000..7c05ab366348
--- /dev/null
+++ b/drivers/clk/rockchip/clk-rv1108.c
@@ -0,0 +1,531 @@
+/*
+ * Copyright (c) 2016 Rockchip Electronics Co. Ltd.
+ * Author: Shawn Lin <shawn.lin@rock-chips.com>
+ *         Andy Yan <andy.yan@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/syscore_ops.h>
+#include <dt-bindings/clock/rv1108-cru.h>
+#include "clk.h"
+
+#define RV1108_GRF_SOC_STATUS0	0x480
+
+enum rv1108_plls {
+	apll, dpll, gpll,
+};
+
+static struct rockchip_pll_rate_table rv1108_pll_rates[] = {
+	/* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */
+	RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0),
+	RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 984000000, 1, 82, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 960000000, 1, 80, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 936000000, 1, 78, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 912000000, 1, 76, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 900000000, 4, 300, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 888000000, 1, 74, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 864000000, 1, 72, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 840000000, 1, 70, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 816000000, 1, 68, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 800000000, 6, 400, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 700000000, 6, 350, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 696000000, 1, 58, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 600000000, 1, 75, 3, 1, 1, 0),
+	RK3036_PLL_RATE( 594000000, 2, 99, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 504000000, 1, 63, 3, 1, 1, 0),
+	RK3036_PLL_RATE( 500000000, 6, 250, 2, 1, 1, 0),
+	RK3036_PLL_RATE( 408000000, 1, 68, 2, 2, 1, 0),
+	RK3036_PLL_RATE( 312000000, 1, 52, 2, 2, 1, 0),
+	RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0),
+	RK3036_PLL_RATE(  96000000, 1, 64, 4, 4, 1, 0),
+	{ /* sentinel */ },
+};
+
+#define RV1108_DIV_CORE_MASK		0xf
+#define RV1108_DIV_CORE_SHIFT		4
+
+#define RV1108_CLKSEL0(_core_peri_div)	\
+	{				\
+		.reg = RV1108_CLKSEL_CON(1),	\
+		.val = HIWORD_UPDATE(_core_peri_div, RV1108_DIV_CORE_MASK,\
+				RV1108_DIV_CORE_SHIFT)	\
+	}
+
+#define RV1108_CPUCLK_RATE(_prate, _core_peri_div)			\
+	{								\
+		.prate = _prate,					\
+		.divs = {						\
+			RV1108_CLKSEL0(_core_peri_div),		\
+		},							\
+	}
+
+static struct rockchip_cpuclk_rate_table rv1108_cpuclk_rates[] __initdata = {
+	RV1108_CPUCLK_RATE(816000000, 4),
+	RV1108_CPUCLK_RATE(600000000, 4),
+	RV1108_CPUCLK_RATE(312000000, 4),
+};
+
+static const struct rockchip_cpuclk_reg_data rv1108_cpuclk_data = {
+	.core_reg = RV1108_CLKSEL_CON(0),
+	.div_core_shift = 0,
+	.div_core_mask = 0x1f,
+	.mux_core_alt = 1,
+	.mux_core_main = 0,
+	.mux_core_shift = 8,
+	.mux_core_mask = 0x1,
+};
+
+PNAME(mux_pll_p)		= { "xin24m", "xin24m"};
+PNAME(mux_ddrphy_p)		= { "dpll_ddr", "gpll_ddr", "apll_ddr" };
+PNAME(mux_armclk_p)		= { "apll_core", "gpll_core", "dpll_core" };
+PNAME(mux_usb480m_pre_p)	= { "usbphy", "xin24m" };
+PNAME(mux_hdmiphy_phy_p)	= { "hdmiphy", "xin24m" };
+PNAME(mux_dclk_hdmiphy_pre_p)	= { "dclk_hdmiphy_src_gpll", "dclk_hdmiphy_src_dpll" };
+PNAME(mux_pll_src_4plls_p)	= { "dpll", "hdmiphy", "gpll", "usb480m" };
+PNAME(mux_pll_src_3plls_p)	= { "apll", "gpll", "dpll" };
+PNAME(mux_pll_src_2plls_p)	= { "dpll", "gpll" };
+PNAME(mux_pll_src_apll_gpll_p)	= { "apll", "gpll" };
+PNAME(mux_aclk_peri_src_p)	= { "aclk_peri_src_dpll", "aclk_peri_src_gpll" };
+PNAME(mux_aclk_bus_src_p)	= { "aclk_bus_src_gpll", "aclk_bus_src_apll", "aclk_bus_src_dpll" };
+PNAME(mux_mmc_src_p)		= { "dpll", "gpll", "xin24m", "usb480m" };
+PNAME(mux_pll_src_dpll_gpll_usb480m_p)	= { "dpll", "gpll", "usb480m" };
+PNAME(mux_uart0_p)		= { "uart0_src", "uart0_frac", "xin24m" };
+PNAME(mux_uart1_p)		= { "uart1_src", "uart1_frac", "xin24m" };
+PNAME(mux_uart2_p)		= { "uart2_src", "uart2_frac", "xin24m" };
+PNAME(mux_sclk_macphy_p)	= { "sclk_macphy_pre", "ext_gmac" };
+PNAME(mux_i2s0_pre_p)		= { "i2s0_src", "i2s0_frac", "ext_i2s", "xin12m" };
+PNAME(mux_i2s_out_p)		= { "i2s0_pre", "xin12m" };
+PNAME(mux_i2s1_p)		= { "i2s1_src", "i2s1_frac", "xin12m" };
+PNAME(mux_i2s2_p)		= { "i2s2_src", "i2s2_frac", "xin12m" };
+
+static struct rockchip_pll_clock rv1108_pll_clks[] __initdata = {
+	[apll] = PLL(pll_rk3399, PLL_APLL, "apll", mux_pll_p, 0, RV1108_PLL_CON(0),
+		     RV1108_PLL_CON(3), 8, 31, 0, rv1108_pll_rates),
+	[dpll] = PLL(pll_rk3399, PLL_DPLL, "dpll", mux_pll_p, 0, RV1108_PLL_CON(8),
+		     RV1108_PLL_CON(11), 8, 31, 0, NULL),
+	[gpll] = PLL(pll_rk3399, PLL_GPLL, "gpll", mux_pll_p, 0, RV1108_PLL_CON(16),
+		     RV1108_PLL_CON(19), 8, 31, ROCKCHIP_PLL_SYNC_RATE, rv1108_pll_rates),
+};
+
+#define MFLAGS CLK_MUX_HIWORD_MASK
+#define DFLAGS CLK_DIVIDER_HIWORD_MASK
+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE)
+#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK
+
+static struct rockchip_clk_branch rv1108_uart0_fracmux __initdata =
+	MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(13), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rv1108_uart1_fracmux __initdata =
+	MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(14), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rv1108_uart2_fracmux __initdata =
+	MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(15), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rv1108_i2s0_fracmux __initdata =
+	MUX(0, "i2s0_pre", mux_i2s0_pre_p, CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(5), 12, 2, MFLAGS);
+
+static struct rockchip_clk_branch rv1108_i2s1_fracmux __initdata =
+	MUX(0, "i2s1_pre", mux_i2s1_p, CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(6), 12, 2, MFLAGS);
+
+static struct rockchip_clk_branch rv1108_i2s2_fracmux __initdata =
+	MUX(0, "i2s2_pre", mux_i2s2_p, CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(7), 12, 2, MFLAGS);
+
+static struct rockchip_clk_branch rv1108_clk_branches[] __initdata = {
+	MUX(0, "hdmi_phy", mux_hdmiphy_phy_p, CLK_SET_RATE_PARENT,
+			RV1108_MISC_CON, 13, 2, MFLAGS),
+	MUX(0, "usb480m", mux_usb480m_pre_p, CLK_SET_RATE_PARENT,
+			RV1108_MISC_CON, 15, 2, MFLAGS),
+	/*
+	 * Clock-Architecture Diagram 2
+	 */
+
+	/* PD_CORE */
+	GATE(0, "dpll_core", "dpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(0), 1, GFLAGS),
+	GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(0), 0, GFLAGS),
+	GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(0), 2, GFLAGS),
+	COMPOSITE_NOMUX(0, "pclken_dbg", "armclk", CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(1), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY,
+			RV1108_CLKGATE_CON(0), 5, GFLAGS),
+	COMPOSITE_NOMUX(ACLK_ENMCORE, "aclkenm_core", "armclk", CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(1), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
+			RV1108_CLKGATE_CON(0), 4, GFLAGS),
+	GATE(ACLK_CORE, "aclk_core", "aclkenm_core", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(11), 0, GFLAGS),
+	GATE(0, "pclk_dbg", "pclken_dbg", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(11), 1, GFLAGS),
+
+	/* PD_RKVENC */
+
+	/* PD_RKVDEC */
+
+	/* PD_PMU_wrapper */
+	COMPOSITE_NOMUX(0, "pmu_24m_ena", "gpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(38), 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(8), 12, GFLAGS),
+	GATE(0, "pmu", "pmu_24m_ena", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(10), 0, GFLAGS),
+	GATE(0, "intmem1", "pmu_24m_ena", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(10), 1, GFLAGS),
+	GATE(0, "gpio0_pmu", "pmu_24m_ena", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(10), 2, GFLAGS),
+	GATE(0, "pmugrf", "pmu_24m_ena", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(10), 3, GFLAGS),
+	GATE(0, "pmu_noc", "pmu_24m_ena", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(10), 4, GFLAGS),
+	GATE(0, "i2c0_pmu_pclk", "pmu_24m_ena", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(10), 5, GFLAGS),
+	GATE(0, "pwm0_pmu_pclk", "pmu_24m_ena", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(10), 6, GFLAGS),
+	COMPOSITE(0, "pwm0_pmu_clk", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(12), 7, 1, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(8), 15, GFLAGS),
+	COMPOSITE(0, "i2c0_pmu_clk", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(19), 7, 1, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(8), 14, GFLAGS),
+	GATE(0, "pvtm_pmu", "xin24m", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(8), 13, GFLAGS),
+
+	/*
+	 * Clock-Architecture Diagram 4
+	 */
+	COMPOSITE(0, "aclk_vio0_2wrap_occ", mux_pll_src_4plls_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(6), 0, GFLAGS),
+	GATE(0, "aclk_vio0_pre", "aclk_vio0_2wrap_occ", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(17), 0, GFLAGS),
+	COMPOSITE_NOMUX(0, "hclk_vio_pre", "aclk_vio0_pre", 0,
+			RV1108_CLKSEL_CON(29), 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(7), 2, GFLAGS),
+	COMPOSITE_NOMUX(0, "pclk_vio_pre", "aclk_vio0_pre", 0,
+			RV1108_CLKSEL_CON(29), 8, 5, DFLAGS,
+			RV1108_CLKGATE_CON(7), 3, GFLAGS),
+
+	INVERTER(0, "pclk_vip", "ext_vip",
+			RV1108_CLKSEL_CON(31), 8, IFLAGS),
+	GATE(0, "pclk_isp_pre", "pclk_vip", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(7), 6, GFLAGS),
+	GATE(0, "pclk_isp", "pclk_isp_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(18), 10, GFLAGS),
+	GATE(0, "dclk_hdmiphy_src_gpll", "gpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(6), 5, GFLAGS),
+	GATE(0, "dclk_hdmiphy_src_dpll", "dpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(6), 4, GFLAGS),
+	COMPOSITE_NOGATE(0, "dclk_hdmiphy", mux_dclk_hdmiphy_pre_p, 0,
+			RV1108_CLKSEL_CON(32), 6, 2, MFLAGS, 8, 6, DFLAGS),
+
+	/*
+	 * Clock-Architecture Diagram 5
+	 */
+
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
+	COMPOSITE(0, "i2s0_src", mux_pll_src_2plls_p, 0,
+			RV1108_CLKSEL_CON(5), 8, 1, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(2), 0, GFLAGS),
+	COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(8), 0,
+			RV1108_CLKGATE_CON(2), 1, GFLAGS,
+			&rv1108_i2s0_fracmux),
+	GATE(SCLK_I2S0, "sclk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT,
+			RV1108_CLKGATE_CON(2), 2, GFLAGS),
+	COMPOSITE_NODIV(0, "i2s_out", mux_i2s_out_p, 0,
+			RV1108_CLKSEL_CON(5), 15, 1, MFLAGS,
+			RV1108_CLKGATE_CON(2), 3, GFLAGS),
+
+	COMPOSITE(0, "i2s1_src", mux_pll_src_2plls_p, 0,
+			RV1108_CLKSEL_CON(6), 8, 1, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(2), 4, GFLAGS),
+	COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT,
+			RK2928_CLKSEL_CON(9), 0,
+			RK2928_CLKGATE_CON(2), 5, GFLAGS,
+			&rv1108_i2s1_fracmux),
+	GATE(SCLK_I2S1, "sclk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT,
+			RV1108_CLKGATE_CON(2), 6, GFLAGS),
+
+	COMPOSITE(0, "i2s2_src", mux_pll_src_2plls_p, 0,
+			RV1108_CLKSEL_CON(7), 8, 1, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 8, GFLAGS),
+	COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_src", CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(10), 0,
+			RV1108_CLKGATE_CON(2), 9, GFLAGS,
+			&rv1108_i2s2_fracmux),
+	GATE(SCLK_I2S2, "sclk_i2s2", "i2s2_pre", CLK_SET_RATE_PARENT,
+			RV1108_CLKGATE_CON(2), 10, GFLAGS),
+
+	/* PD_BUS */
+	GATE(0, "aclk_bus_src_gpll", "gpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(1), 0, GFLAGS),
+	GATE(0, "aclk_bus_src_apll", "apll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(1), 1, GFLAGS),
+	GATE(0, "aclk_bus_src_dpll", "dpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(1), 2, GFLAGS),
+	COMPOSITE_NOGATE(ACLK_PRE, "aclk_bus_pre", mux_aclk_bus_src_p, 0,
+			RV1108_CLKSEL_CON(2), 8, 2, MFLAGS, 0, 5, DFLAGS),
+	COMPOSITE_NOMUX(0, "hclk_bus_pre", "aclk_bus_2wrap_occ", 0,
+			RV1108_CLKSEL_CON(3), 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(1), 4, GFLAGS),
+	COMPOSITE_NOMUX(0, "pclken_bus", "aclk_bus_2wrap_occ", 0,
+			RV1108_CLKSEL_CON(3), 8, 5, DFLAGS,
+			RV1108_CLKGATE_CON(1), 5, GFLAGS),
+	GATE(0, "pclk_bus_pre", "pclken_bus", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(1), 6, GFLAGS),
+	GATE(0, "pclk_top_pre", "pclken_bus", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(1), 7, GFLAGS),
+	GATE(0, "pclk_ddr_pre", "pclken_bus", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(1), 8, GFLAGS),
+	GATE(0, "clk_timer0", "mux_pll_p", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(1), 9, GFLAGS),
+	GATE(0, "clk_timer1", "mux_pll_p", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(1), 10, GFLAGS),
+	GATE(0, "pclk_timer", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 4, GFLAGS),
+
+	COMPOSITE(0, "uart0_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(13), 12, 2, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 1, GFLAGS),
+	COMPOSITE(0, "uart1_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 3, GFLAGS),
+	COMPOSITE(0, "uart21_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(15), 12, 2, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 5, GFLAGS),
+
+	COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(16), 0,
+			RV1108_CLKGATE_CON(3), 2, GFLAGS,
+			&rv1108_uart0_fracmux),
+	COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(17), 0,
+			RV1108_CLKGATE_CON(3), 4, GFLAGS,
+			&rv1108_uart1_fracmux),
+	COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(18), 0,
+			RV1108_CLKGATE_CON(3), 6, GFLAGS,
+			&rv1108_uart2_fracmux),
+	GATE(PCLK_UART0, "pclk_uart0", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 10, GFLAGS),
+	GATE(PCLK_UART1, "pclk_uart1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 11, GFLAGS),
+	GATE(PCLK_UART2, "pclk_uart2", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 12, GFLAGS),
+
+	COMPOSITE(0, "clk_i2c1", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(19), 15, 2, MFLAGS, 8, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 7, GFLAGS),
+	COMPOSITE(0, "clk_i2c2", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(20), 7, 2, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 8, GFLAGS),
+	COMPOSITE(0, "clk_i2c3", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(20), 15, 2, MFLAGS, 8, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 9, GFLAGS),
+	GATE(0, "pclk_i2c1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 0, GFLAGS),
+	GATE(0, "pclk_i2c2", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 1, GFLAGS),
+	GATE(0, "pclk_i2c3", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 2, GFLAGS),
+	COMPOSITE(0, "clk_pwm1", mux_pll_src_2plls_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(12), 15, 2, MFLAGS, 8, 7, DFLAGS,
+			RV1108_CLKGATE_CON(3), 10, GFLAGS),
+	GATE(0, "pclk_pwm1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 6, GFLAGS),
+	GATE(0, "pclk_wdt", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 3, GFLAGS),
+	GATE(0, "pclk_gpio1", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 7, GFLAGS),
+	GATE(0, "pclk_gpio2", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 8, GFLAGS),
+	GATE(0, "pclk_gpio3", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(13), 9, GFLAGS),
+
+	GATE(0, "pclk_grf", "pclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(14), 0, GFLAGS),
+
+	GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_pre", 0,
+	     RV1108_CLKGATE_CON(12), 2, GFLAGS),
+	GATE(0, "hclk_rom", "hclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(12), 3, GFLAGS),
+	GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(12), 1, GFLAGS),
+
+	/* PD_DDR */
+	GATE(0, "apll_ddr", "apll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(0), 8, GFLAGS),
+	GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(0), 9, GFLAGS),
+	GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(0), 10, GFLAGS),
+	COMPOSITE(0, "ddrphy4x", mux_ddrphy_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(4), 8, 2, MFLAGS, 0, 3,
+			DFLAGS | CLK_DIVIDER_POWER_OF_TWO,
+			RV1108_CLKGATE_CON(10), 9, GFLAGS),
+	GATE(0, "ddrupctl", "ddrphy_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(12), 4, GFLAGS),
+	GATE(0, "ddrc", "ddrphy", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(12), 5, GFLAGS),
+	GATE(0, "ddrmon", "ddrphy_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(12), 6, GFLAGS),
+	GATE(0, "timer_clk", "xin24m", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(0), 11, GFLAGS),
+
+	/*
+	 * Clock-Architecture Diagram 6
+	 */
+
+	/* PD_PERI */
+	COMPOSITE_NOMUX(0, "pclk_periph_pre", "gpll", 0,
+			RV1108_CLKSEL_CON(23), 10, 5, DFLAGS,
+			RV1108_CLKGATE_CON(4), 5, GFLAGS),
+	GATE(0, "pclk_periph", "pclk_periph_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(15), 13, GFLAGS),
+	COMPOSITE_NOMUX(0, "hclk_periph_pre", "gpll", 0,
+			RV1108_CLKSEL_CON(23), 5, 5, DFLAGS,
+			RV1108_CLKGATE_CON(4), 4, GFLAGS),
+	GATE(0, "hclk_periph", "hclk_periph_pre", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(15), 12, GFLAGS),
+
+	GATE(0, "aclk_peri_src_dpll", "dpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(4), 1, GFLAGS),
+	GATE(0, "aclk_peri_src_gpll", "gpll", CLK_IGNORE_UNUSED,
+			RV1108_CLKGATE_CON(4), 2, GFLAGS),
+	COMPOSITE(0, "aclk_periph", mux_aclk_peri_src_p, CLK_IGNORE_UNUSED,
+			RV1108_CLKSEL_CON(23), 15, 2, MFLAGS, 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(15), 11, GFLAGS),
+
+	COMPOSITE(SCLK_SDMMC, "sclk_sdmmc0", mux_mmc_src_p, 0,
+			RV1108_CLKSEL_CON(25), 8, 2, MFLAGS, 0, 8, DFLAGS,
+			RV1108_CLKGATE_CON(5), 0, GFLAGS),
+
+	COMPOSITE_NODIV(0, "sclk_sdio_src", mux_mmc_src_p, 0,
+			RV1108_CLKSEL_CON(25), 10, 2, MFLAGS,
+			RV1108_CLKGATE_CON(5), 2, GFLAGS),
+	DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0,
+			RV1108_CLKSEL_CON(26), 0, 8, DFLAGS),
+
+	COMPOSITE_NODIV(0, "sclk_emmc_src", mux_mmc_src_p, 0,
+			RV1108_CLKSEL_CON(25), 12, 2, MFLAGS,
+			RV1108_CLKGATE_CON(5), 1, GFLAGS),
+	DIV(SCLK_EMMC, "sclk_emmc", "sclk_emmc_src", 0,
+			RK2928_CLKSEL_CON(26), 8, 8, DFLAGS),
+	GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 0, GFLAGS),
+	GATE(HCLK_SDIO, "hclk_sdio", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 1, GFLAGS),
+	GATE(HCLK_EMMC, "hclk_emmc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 2, GFLAGS),
+
+	COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_2plls_p, 0,
+			RV1108_CLKSEL_CON(27), 14, 2, MFLAGS, 8, 5, DFLAGS,
+			RV1108_CLKGATE_CON(5), 3, GFLAGS),
+	GATE(HCLK_NANDC, "hclk_nandc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 3, GFLAGS),
+
+	COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_2plls_p, 0,
+			RV1108_CLKSEL_CON(27), 7, 2, MFLAGS, 0, 7, DFLAGS,
+			RV1108_CLKGATE_CON(5), 4, GFLAGS),
+	GATE(HCLK_SFC, "hclk_sfc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 10, GFLAGS),
+
+	COMPOSITE(0, "sclk_macphy_pre", mux_pll_src_apll_gpll_p, 0,
+			RV1108_CLKSEL_CON(24), 12, 2, MFLAGS, 0, 5, DFLAGS,
+			RV1108_CLKGATE_CON(4), 10, GFLAGS),
+	MUX(0, "sclk_macphy", mux_sclk_macphy_p, CLK_SET_RATE_PARENT,
+			RV1108_CLKSEL_CON(24), 8, 2, MFLAGS),
+	GATE(0, "sclk_macphy_rx", "sclk_macphy", 0, RV1108_CLKGATE_CON(4), 8, GFLAGS),
+	GATE(0, "sclk_mac_ref", "sclk_macphy", 0, RV1108_CLKGATE_CON(4), 6, GFLAGS),
+	GATE(0, "sclk_mac_refout", "sclk_macphy", 0, RV1108_CLKGATE_CON(4), 7, GFLAGS),
+
+	MMC(SCLK_SDMMC_DRV,    "sdmmc_drv",    "sclk_sdmmc", RV1108_SDMMC_CON0, 1),
+	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RV1108_SDMMC_CON1, 1),
+
+	MMC(SCLK_SDIO_DRV,     "sdio_drv",     "sclk_sdio",  RV1108_SDIO_CON0,  1),
+	MMC(SCLK_SDIO_SAMPLE,  "sdio_sample",  "sclk_sdio",  RV1108_SDIO_CON1,  1),
+
+	MMC(SCLK_EMMC_DRV,     "emmc_drv",     "sclk_emmc",  RV1108_EMMC_CON0,  1),
+	MMC(SCLK_EMMC_SAMPLE,  "emmc_sample",  "sclk_emmc",  RV1108_EMMC_CON1,  1),
+};
+
+static const char *const rv1108_critical_clocks[] __initconst = {
+	"aclk_core",
+	"aclk_bus_src_gpll",
+	"aclk_periph",
+	"hclk_periph",
+	"pclk_periph",
+};
+
+static void __init rv1108_clk_init(struct device_node *np)
+{
+	struct rockchip_clk_provider *ctx;
+	void __iomem *reg_base;
+
+	reg_base = of_iomap(np, 0);
+	if (!reg_base) {
+		pr_err("%s: could not map cru region\n", __func__);
+		return;
+	}
+
+	ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
+	if (IS_ERR(ctx)) {
+		pr_err("%s: rockchip clk init failed\n", __func__);
+		iounmap(reg_base);
+		return;
+	}
+
+	rockchip_clk_register_plls(ctx, rv1108_pll_clks,
+				   ARRAY_SIZE(rv1108_pll_clks),
+				   RV1108_GRF_SOC_STATUS0);
+	rockchip_clk_register_branches(ctx, rv1108_clk_branches,
+				  ARRAY_SIZE(rv1108_clk_branches));
+	rockchip_clk_protect_critical(rv1108_critical_clocks,
+				      ARRAY_SIZE(rv1108_critical_clocks));
+
+	rockchip_clk_register_armclk(ctx, ARMCLK, "armclk",
+			mux_armclk_p, ARRAY_SIZE(mux_armclk_p),
+			&rv1108_cpuclk_data, rv1108_cpuclk_rates,
+			ARRAY_SIZE(rv1108_cpuclk_rates));
+
+	rockchip_register_softrst(np, 13, reg_base + RV1108_SOFTRST_CON(0),
+				  ROCKCHIP_SOFTRST_HIWORD_MASK);
+
+	rockchip_register_restart_notifier(ctx, RV1108_GLB_SRST_FST, NULL);
+
+	rockchip_clk_of_add_provider(np, ctx);
+}
+CLK_OF_DECLARE(rv1108_cru, "rockchip,rv1108-cru", rv1108_clk_init);
diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h
index 7c15473ea72b..ef601dded32c 100644
--- a/drivers/clk/rockchip/clk.h
+++ b/drivers/clk/rockchip/clk.h
@@ -34,20 +34,20 @@ struct clk;
 #define HIWORD_UPDATE(val, mask, shift) \
 		((val) << (shift) | (mask) << ((shift) + 16))
 
-/* register positions shared by RK1108, RK2928, RK3036, RK3066, RK3188 and RK3228 */
-#define RK1108_PLL_CON(x)		((x) * 0x4)
-#define RK1108_CLKSEL_CON(x)		((x) * 0x4 + 0x60)
-#define RK1108_CLKGATE_CON(x)		((x) * 0x4 + 0x120)
-#define RK1108_SOFTRST_CON(x)		((x) * 0x4 + 0x180)
-#define RK1108_GLB_SRST_FST		0x1c0
-#define RK1108_GLB_SRST_SND		0x1c4
-#define RK1108_MISC_CON			0x1cc
-#define RK1108_SDMMC_CON0		0x1d8
-#define RK1108_SDMMC_CON1		0x1dc
-#define RK1108_SDIO_CON0		0x1e0
-#define RK1108_SDIO_CON1		0x1e4
-#define RK1108_EMMC_CON0		0x1e8
-#define RK1108_EMMC_CON1		0x1ec
+/* register positions shared by RV1108, RK2928, RK3036, RK3066, RK3188 and RK3228 */
+#define RV1108_PLL_CON(x)		((x) * 0x4)
+#define RV1108_CLKSEL_CON(x)		((x) * 0x4 + 0x60)
+#define RV1108_CLKGATE_CON(x)		((x) * 0x4 + 0x120)
+#define RV1108_SOFTRST_CON(x)		((x) * 0x4 + 0x180)
+#define RV1108_GLB_SRST_FST		0x1c0
+#define RV1108_GLB_SRST_SND		0x1c4
+#define RV1108_MISC_CON			0x1cc
+#define RV1108_SDMMC_CON0		0x1d8
+#define RV1108_SDMMC_CON1		0x1dc
+#define RV1108_SDIO_CON0		0x1e0
+#define RV1108_SDIO_CON1		0x1e4
+#define RV1108_EMMC_CON0		0x1e8
+#define RV1108_EMMC_CON1		0x1ec
 
 #define RK2928_PLL_CON(x)		((x) * 0x4)
 #define RK2928_MODE_CON		0x40
diff --git a/include/dt-bindings/clock/rk1108-cru.h b/include/dt-bindings/clock/rk1108-cru.h
deleted file mode 100644
index 9350a5527a36..000000000000
--- a/include/dt-bindings/clock/rk1108-cru.h
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (c) 2016 Rockchip Electronics Co. Ltd.
- * Author: Shawn Lin <shawn.lin@rock-chips.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RK1108_H
-#define _DT_BINDINGS_CLK_ROCKCHIP_RK1108_H
-
-/* pll id */
-#define PLL_APLL			0
-#define PLL_DPLL			1
-#define PLL_GPLL			2
-#define ARMCLK				3
-
-/* sclk gates (special clocks) */
-#define SCLK_SPI0			65
-#define SCLK_NANDC			67
-#define SCLK_SDMMC			68
-#define SCLK_SDIO			69
-#define SCLK_EMMC			71
-#define SCLK_UART0			72
-#define SCLK_UART1			73
-#define SCLK_UART2			74
-#define SCLK_I2S0			75
-#define SCLK_I2S1			76
-#define SCLK_I2S2			77
-#define SCLK_TIMER0			78
-#define SCLK_TIMER1			79
-#define SCLK_SFC			80
-#define SCLK_SDMMC_DRV			81
-#define SCLK_SDIO_DRV			82
-#define SCLK_EMMC_DRV			83
-#define SCLK_SDMMC_SAMPLE		84
-#define SCLK_SDIO_SAMPLE		85
-#define SCLK_EMMC_SAMPLE		86
-
-/* aclk gates */
-#define ACLK_DMAC			192
-#define ACLK_PRE			193
-#define ACLK_CORE			194
-#define ACLK_ENMCORE			195
-
-/* pclk gates */
-#define PCLK_GPIO1			256
-#define PCLK_GPIO2			257
-#define PCLK_GPIO3			258
-#define PCLK_GRF			259
-#define PCLK_I2C1			260
-#define PCLK_I2C2			261
-#define PCLK_I2C3			262
-#define PCLK_SPI			263
-#define PCLK_SFC			264
-#define PCLK_UART0			265
-#define PCLK_UART1			266
-#define PCLK_UART2			267
-#define PCLK_TSADC			268
-#define PCLK_PWM			269
-#define PCLK_TIMER			270
-#define PCLK_PERI			271
-
-/* hclk gates */
-#define HCLK_I2S0_8CH			320
-#define HCLK_I2S1_8CH			321
-#define HCLK_I2S2_2CH			322
-#define HCLK_NANDC			323
-#define HCLK_SDMMC			324
-#define HCLK_SDIO			325
-#define HCLK_EMMC			326
-#define HCLK_PERI			327
-#define HCLK_SFC			328
-
-#define CLK_NR_CLKS			(HCLK_SFC + 1)
-
-/* reset id */
-#define SRST_CORE_PO_AD		0
-#define SRST_CORE_AD			1
-#define SRST_L2_AD			2
-#define SRST_CPU_NIU_AD		3
-#define SRST_CORE_PO			4
-#define SRST_CORE			5
-#define SRST_L2			6
-#define SRST_CORE_DBG			8
-#define PRST_DBG			9
-#define RST_DAP			10
-#define PRST_DBG_NIU			11
-#define ARST_STRC_SYS_AD		15
-
-#define SRST_DDRPHY_CLKDIV		16
-#define SRST_DDRPHY			17
-#define PRST_DDRPHY			18
-#define PRST_HDMIPHY			19
-#define PRST_VDACPHY			20
-#define PRST_VADCPHY			21
-#define PRST_MIPI_CSI_PHY		22
-#define PRST_MIPI_DSI_PHY		23
-#define PRST_ACODEC			24
-#define ARST_BUS_NIU			25
-#define PRST_TOP_NIU			26
-#define ARST_INTMEM			27
-#define HRST_ROM			28
-#define ARST_DMAC			29
-#define SRST_MSCH_NIU			30
-#define PRST_MSCH_NIU			31
-
-#define PRST_DDRUPCTL			32
-#define NRST_DDRUPCTL			33
-#define PRST_DDRMON			34
-#define HRST_I2S0_8CH			35
-#define MRST_I2S0_8CH			36
-#define HRST_I2S1_2CH			37
-#define MRST_IS21_2CH			38
-#define HRST_I2S2_2CH			39
-#define MRST_I2S2_2CH			40
-#define HRST_CRYPTO			41
-#define SRST_CRYPTO			42
-#define PRST_SPI			43
-#define SRST_SPI			44
-#define PRST_UART0			45
-#define PRST_UART1			46
-#define PRST_UART2			47
-
-#define SRST_UART0			48
-#define SRST_UART1			49
-#define SRST_UART2			50
-#define PRST_I2C1			51
-#define PRST_I2C2			52
-#define PRST_I2C3			53
-#define SRST_I2C1			54
-#define SRST_I2C2			55
-#define SRST_I2C3			56
-#define PRST_PWM1			58
-#define SRST_PWM1			60
-#define PRST_WDT			61
-#define PRST_GPIO1			62
-#define PRST_GPIO2			63
-
-#define PRST_GPIO3			64
-#define PRST_GRF			65
-#define PRST_EFUSE			66
-#define PRST_EFUSE512			67
-#define PRST_TIMER0			68
-#define SRST_TIMER0			69
-#define SRST_TIMER1			70
-#define PRST_TSADC			71
-#define SRST_TSADC			72
-#define PRST_SARADC			73
-#define SRST_SARADC			74
-#define HRST_SYSBUS			75
-#define PRST_USBGRF			76
-
-#define ARST_PERIPH_NIU		80
-#define HRST_PERIPH_NIU		81
-#define PRST_PERIPH_NIU		82
-#define HRST_PERIPH			83
-#define HRST_SDMMC			84
-#define HRST_SDIO			85
-#define HRST_EMMC			86
-#define HRST_NANDC			87
-#define NRST_NANDC			88
-#define HRST_SFC			89
-#define SRST_SFC			90
-#define ARST_GMAC			91
-#define HRST_OTG			92
-#define SRST_OTG			93
-#define SRST_OTG_ADP			94
-#define HRST_HOST0			95
-
-#define HRST_HOST0_AUX			96
-#define HRST_HOST0_ARB			97
-#define SRST_HOST0_EHCIPHY		98
-#define SRST_HOST0_UTMI		99
-#define SRST_USBPOR			100
-#define SRST_UTMI0			101
-#define SRST_UTMI1			102
-
-#define ARST_VIO0_NIU			102
-#define ARST_VIO1_NIU			103
-#define HRST_VIO_NIU			104
-#define PRST_VIO_NIU			105
-#define ARST_VOP			106
-#define HRST_VOP			107
-#define DRST_VOP			108
-#define ARST_IEP			109
-#define HRST_IEP			110
-#define ARST_RGA			111
-#define HRST_RGA			112
-#define SRST_RGA			113
-#define PRST_CVBS			114
-#define PRST_HDMI			115
-#define SRST_HDMI			116
-#define PRST_MIPI_DSI			117
-
-#define ARST_ISP_NIU			118
-#define HRST_ISP_NIU			119
-#define HRST_ISP			120
-#define SRST_ISP			121
-#define ARST_VIP0			122
-#define HRST_VIP0			123
-#define PRST_VIP0			124
-#define ARST_VIP1			125
-#define HRST_VIP1			126
-#define PRST_VIP1			127
-#define ARST_VIP2			128
-#define HRST_VIP2			129
-#define PRST_VIP2			120
-#define ARST_VIP3			121
-#define HRST_VIP3			122
-#define PRST_VIP4			123
-
-#define PRST_CIF1TO4			124
-#define SRST_CVBS_CLK			125
-#define HRST_CVBS			126
-
-#define ARST_VPU_NIU			140
-#define HRST_VPU_NIU			141
-#define ARST_VPU			142
-#define HRST_VPU			143
-#define ARST_RKVDEC_NIU		144
-#define HRST_RKVDEC_NIU		145
-#define ARST_RKVDEC			146
-#define HRST_RKVDEC			147
-#define SRST_RKVDEC_CABAC		148
-#define SRST_RKVDEC_CORE		149
-#define ARST_RKVENC_NIU		150
-#define HRST_RKVENC_NIU		151
-#define ARST_RKVENC			152
-#define HRST_RKVENC			153
-#define SRST_RKVENC_CORE		154
-
-#define SRST_DSP_CORE			156
-#define SRST_DSP_SYS			157
-#define SRST_DSP_GLOBAL		158
-#define SRST_DSP_OECM			159
-#define PRST_DSP_IOP_NIU		160
-#define ARST_DSP_EPP_NIU		161
-#define ARST_DSP_EDP_NIU		162
-#define PRST_DSP_DBG_NIU		163
-#define PRST_DSP_CFG_NIU		164
-#define PRST_DSP_GRF			165
-#define PRST_DSP_MAILBOX		166
-#define PRST_DSP_INTC			167
-#define PRST_DSP_PFM_MON		169
-#define SRST_DSP_PFM_MON		170
-#define ARST_DSP_EDAP_NIU		171
-
-#define SRST_PMU			172
-#define SRST_PMU_I2C0			173
-#define PRST_PMU_I2C0			174
-#define PRST_PMU_GPIO0			175
-#define PRST_PMU_INTMEM		176
-#define PRST_PMU_PWM0			177
-#define SRST_PMU_PWM0			178
-#define PRST_PMU_GRF			179
-#define SRST_PMU_NIU			180
-#define SRST_PMU_PVTM			181
-#define ARST_DSP_EDP_PERF		184
-#define ARST_DSP_EPP_PERF		185
-
-#endif /* _DT_BINDINGS_CLK_ROCKCHIP_RK1108_H */
diff --git a/include/dt-bindings/clock/rv1108-cru.h b/include/dt-bindings/clock/rv1108-cru.h
new file mode 100644
index 000000000000..ae26f8105914
--- /dev/null
+++ b/include/dt-bindings/clock/rv1108-cru.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2016 Rockchip Electronics Co. Ltd.
+ * Author: Shawn Lin <shawn.lin@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RV1108_H
+#define _DT_BINDINGS_CLK_ROCKCHIP_RV1108_H
+
+/* pll id */
+#define PLL_APLL			0
+#define PLL_DPLL			1
+#define PLL_GPLL			2
+#define ARMCLK				3
+
+/* sclk gates (special clocks) */
+#define SCLK_SPI0			65
+#define SCLK_NANDC			67
+#define SCLK_SDMMC			68
+#define SCLK_SDIO			69
+#define SCLK_EMMC			71
+#define SCLK_UART0			72
+#define SCLK_UART1			73
+#define SCLK_UART2			74
+#define SCLK_I2S0			75
+#define SCLK_I2S1			76
+#define SCLK_I2S2			77
+#define SCLK_TIMER0			78
+#define SCLK_TIMER1			79
+#define SCLK_SFC			80
+#define SCLK_SDMMC_DRV			81
+#define SCLK_SDIO_DRV			82
+#define SCLK_EMMC_DRV			83
+#define SCLK_SDMMC_SAMPLE		84
+#define SCLK_SDIO_SAMPLE		85
+#define SCLK_EMMC_SAMPLE		86
+
+/* aclk gates */
+#define ACLK_DMAC			192
+#define ACLK_PRE			193
+#define ACLK_CORE			194
+#define ACLK_ENMCORE			195
+
+/* pclk gates */
+#define PCLK_GPIO1			256
+#define PCLK_GPIO2			257
+#define PCLK_GPIO3			258
+#define PCLK_GRF			259
+#define PCLK_I2C1			260
+#define PCLK_I2C2			261
+#define PCLK_I2C3			262
+#define PCLK_SPI			263
+#define PCLK_SFC			264
+#define PCLK_UART0			265
+#define PCLK_UART1			266
+#define PCLK_UART2			267
+#define PCLK_TSADC			268
+#define PCLK_PWM			269
+#define PCLK_TIMER			270
+#define PCLK_PERI			271
+
+/* hclk gates */
+#define HCLK_I2S0_8CH			320
+#define HCLK_I2S1_8CH			321
+#define HCLK_I2S2_2CH			322
+#define HCLK_NANDC			323
+#define HCLK_SDMMC			324
+#define HCLK_SDIO			325
+#define HCLK_EMMC			326
+#define HCLK_PERI			327
+#define HCLK_SFC			328
+
+#define CLK_NR_CLKS			(HCLK_SFC + 1)
+
+/* reset id */
+#define SRST_CORE_PO_AD		0
+#define SRST_CORE_AD			1
+#define SRST_L2_AD			2
+#define SRST_CPU_NIU_AD		3
+#define SRST_CORE_PO			4
+#define SRST_CORE			5
+#define SRST_L2			6
+#define SRST_CORE_DBG			8
+#define PRST_DBG			9
+#define RST_DAP			10
+#define PRST_DBG_NIU			11
+#define ARST_STRC_SYS_AD		15
+
+#define SRST_DDRPHY_CLKDIV		16
+#define SRST_DDRPHY			17
+#define PRST_DDRPHY			18
+#define PRST_HDMIPHY			19
+#define PRST_VDACPHY			20
+#define PRST_VADCPHY			21
+#define PRST_MIPI_CSI_PHY		22
+#define PRST_MIPI_DSI_PHY		23
+#define PRST_ACODEC			24
+#define ARST_BUS_NIU			25
+#define PRST_TOP_NIU			26
+#define ARST_INTMEM			27
+#define HRST_ROM			28
+#define ARST_DMAC			29
+#define SRST_MSCH_NIU			30
+#define PRST_MSCH_NIU			31
+
+#define PRST_DDRUPCTL			32
+#define NRST_DDRUPCTL			33
+#define PRST_DDRMON			34
+#define HRST_I2S0_8CH			35
+#define MRST_I2S0_8CH			36
+#define HRST_I2S1_2CH			37
+#define MRST_IS21_2CH			38
+#define HRST_I2S2_2CH			39
+#define MRST_I2S2_2CH			40
+#define HRST_CRYPTO			41
+#define SRST_CRYPTO			42
+#define PRST_SPI			43
+#define SRST_SPI			44
+#define PRST_UART0			45
+#define PRST_UART1			46
+#define PRST_UART2			47
+
+#define SRST_UART0			48
+#define SRST_UART1			49
+#define SRST_UART2			50
+#define PRST_I2C1			51
+#define PRST_I2C2			52
+#define PRST_I2C3			53
+#define SRST_I2C1			54
+#define SRST_I2C2			55
+#define SRST_I2C3			56
+#define PRST_PWM1			58
+#define SRST_PWM1			60
+#define PRST_WDT			61
+#define PRST_GPIO1			62
+#define PRST_GPIO2			63
+
+#define PRST_GPIO3			64
+#define PRST_GRF			65
+#define PRST_EFUSE			66
+#define PRST_EFUSE512			67
+#define PRST_TIMER0			68
+#define SRST_TIMER0			69
+#define SRST_TIMER1			70
+#define PRST_TSADC			71
+#define SRST_TSADC			72
+#define PRST_SARADC			73
+#define SRST_SARADC			74
+#define HRST_SYSBUS			75
+#define PRST_USBGRF			76
+
+#define ARST_PERIPH_NIU		80
+#define HRST_PERIPH_NIU		81
+#define PRST_PERIPH_NIU		82
+#define HRST_PERIPH			83
+#define HRST_SDMMC			84
+#define HRST_SDIO			85
+#define HRST_EMMC			86
+#define HRST_NANDC			87
+#define NRST_NANDC			88
+#define HRST_SFC			89
+#define SRST_SFC			90
+#define ARST_GMAC			91
+#define HRST_OTG			92
+#define SRST_OTG			93
+#define SRST_OTG_ADP			94
+#define HRST_HOST0			95
+
+#define HRST_HOST0_AUX			96
+#define HRST_HOST0_ARB			97
+#define SRST_HOST0_EHCIPHY		98
+#define SRST_HOST0_UTMI		99
+#define SRST_USBPOR			100
+#define SRST_UTMI0			101
+#define SRST_UTMI1			102
+
+#define ARST_VIO0_NIU			102
+#define ARST_VIO1_NIU			103
+#define HRST_VIO_NIU			104
+#define PRST_VIO_NIU			105
+#define ARST_VOP			106
+#define HRST_VOP			107
+#define DRST_VOP			108
+#define ARST_IEP			109
+#define HRST_IEP			110
+#define ARST_RGA			111
+#define HRST_RGA			112
+#define SRST_RGA			113
+#define PRST_CVBS			114
+#define PRST_HDMI			115
+#define SRST_HDMI			116
+#define PRST_MIPI_DSI			117
+
+#define ARST_ISP_NIU			118
+#define HRST_ISP_NIU			119
+#define HRST_ISP			120
+#define SRST_ISP			121
+#define ARST_VIP0			122
+#define HRST_VIP0			123
+#define PRST_VIP0			124
+#define ARST_VIP1			125
+#define HRST_VIP1			126
+#define PRST_VIP1			127
+#define ARST_VIP2			128
+#define HRST_VIP2			129
+#define PRST_VIP2			120
+#define ARST_VIP3			121
+#define HRST_VIP3			122
+#define PRST_VIP4			123
+
+#define PRST_CIF1TO4			124
+#define SRST_CVBS_CLK			125
+#define HRST_CVBS			126
+
+#define ARST_VPU_NIU			140
+#define HRST_VPU_NIU			141
+#define ARST_VPU			142
+#define HRST_VPU			143
+#define ARST_RKVDEC_NIU		144
+#define HRST_RKVDEC_NIU		145
+#define ARST_RKVDEC			146
+#define HRST_RKVDEC			147
+#define SRST_RKVDEC_CABAC		148
+#define SRST_RKVDEC_CORE		149
+#define ARST_RKVENC_NIU		150
+#define HRST_RKVENC_NIU		151
+#define ARST_RKVENC			152
+#define HRST_RKVENC			153
+#define SRST_RKVENC_CORE		154
+
+#define SRST_DSP_CORE			156
+#define SRST_DSP_SYS			157
+#define SRST_DSP_GLOBAL		158
+#define SRST_DSP_OECM			159
+#define PRST_DSP_IOP_NIU		160
+#define ARST_DSP_EPP_NIU		161
+#define ARST_DSP_EDP_NIU		162
+#define PRST_DSP_DBG_NIU		163
+#define PRST_DSP_CFG_NIU		164
+#define PRST_DSP_GRF			165
+#define PRST_DSP_MAILBOX		166
+#define PRST_DSP_INTC			167
+#define PRST_DSP_PFM_MON		169
+#define SRST_DSP_PFM_MON		170
+#define ARST_DSP_EDAP_NIU		171
+
+#define SRST_PMU			172
+#define SRST_PMU_I2C0			173
+#define PRST_PMU_I2C0			174
+#define PRST_PMU_GPIO0			175
+#define PRST_PMU_INTMEM		176
+#define PRST_PMU_PWM0			177
+#define SRST_PMU_PWM0			178
+#define PRST_PMU_GRF			179
+#define SRST_PMU_NIU			180
+#define SRST_PMU_PVTM			181
+#define ARST_DSP_EDP_PERF		184
+#define ARST_DSP_EPP_PERF		185
+
+#endif /* _DT_BINDINGS_CLK_ROCKCHIP_RV1108_H */
-- 
cgit v1.2.3


From 9be83448951a404a6fd5cf43ee0245a9bccc02c1 Mon Sep 17 00:00:00 2001
From: Elaine Zhang <zhangqing@rock-chips.com>
Date: Wed, 22 Feb 2017 10:59:55 +0800
Subject: clk: rockchip: add pll_wait_lock for pll_enable

If pll is power down,when power up pll need wait pll lock.
The reference documents section:
	PLL frequency change and lock check

Signed-off-by: Elaine Zhang <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 drivers/clk/rockchip/clk-pll.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c
index eec51893a7e6..dd0433d4753e 100644
--- a/drivers/clk/rockchip/clk-pll.c
+++ b/drivers/clk/rockchip/clk-pll.c
@@ -269,6 +269,7 @@ static int rockchip_rk3036_pll_enable(struct clk_hw *hw)
 
 	writel(HIWORD_UPDATE(0, RK3036_PLLCON1_PWRDOWN, 0),
 	       pll->reg_base + RK3036_PLLCON(1));
+	rockchip_pll_wait_lock(pll);
 
 	return 0;
 }
@@ -501,6 +502,7 @@ static int rockchip_rk3066_pll_enable(struct clk_hw *hw)
 
 	writel(HIWORD_UPDATE(0, RK3066_PLLCON3_PWRDOWN, 0),
 	       pll->reg_base + RK3066_PLLCON(3));
+	rockchip_pll_wait_lock(pll);
 
 	return 0;
 }
@@ -746,6 +748,7 @@ static int rockchip_rk3399_pll_enable(struct clk_hw *hw)
 
 	writel(HIWORD_UPDATE(0, RK3399_PLLCON3_PWRDOWN, 0),
 	       pll->reg_base + RK3399_PLLCON(3));
+	rockchip_rk3399_pll_wait_lock(pll);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 2f5ad7f0f3e16772bafa692ad42b5af4dea292f6 Mon Sep 17 00:00:00 2001
From: "mar.krzeminski" <mar.krzeminski@gmail.com>
Date: Fri, 6 Jan 2017 18:19:00 +0100
Subject: mtd: spi-nor: Fix whole chip erasing for stacked chips.

Currently it is possible to disable chip erase for spi-nor driver.
Some modern stacked (multi die) flash chips do not support chip
erase opcode at all but spi-nor framework needs to cope with them too.
This commit extends existing functionality to allow disable
chip erase for a single flash chip.

Signed-off-by: Marcin Krzeminski <mar.krzeminski@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 561e46de8faa..c29a3516bf04 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -85,6 +85,7 @@ struct flash_info {
 					 * Use dedicated 4byte address op codes
 					 * to support memory size above 128Mib.
 					 */
+#define NO_CHIP_ERASE		BIT(12) /* Chip does not support chip erase */
 };
 
 #define JEDEC_MFR(info)	((info)->id[0])
@@ -1631,6 +1632,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 		nor->flags |= SNOR_F_USE_FSR;
 	if (info->flags & SPI_NOR_HAS_TB)
 		nor->flags |= SNOR_F_HAS_SR_TB;
+	if (info->flags & NO_CHIP_ERASE)
+		nor->flags |= SNOR_F_NO_OP_CHIP_ERASE;
 
 #ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
 	/* prefer "small sector" erase if possible */
-- 
cgit v1.2.3


From 193fb3c1127d0c0fbc5c5a6ba63a99083b775ffd Mon Sep 17 00:00:00 2001
From: "mar.krzeminski" <mar.krzeminski@gmail.com>
Date: Fri, 6 Jan 2017 18:19:01 +0100
Subject: mtd: spi-nor: Disable chip erase for Micron n25q00.

Micron n25q00 are stacked chips, thus do not support chip erase.
>From now spi-nor framework will not send chip erase command,
instead will use sector at time erase procedure.

Signed-off-by: Marcin Krzeminski <mar.krzeminski@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index c29a3516bf04..32d1849b084c 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1036,8 +1036,8 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
 	{ "n25q512ax3",  INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
-	{ "n25q00",      INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
-	{ "n25q00a",     INFO(0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
+	{ "n25q00",      INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
+	{ "n25q00a",     INFO(0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
 
 	/* PMC */
 	{ "pm25lv512",   INFO(0,        0, 32 * 1024,    2, SECT_4K_PMC) },
-- 
cgit v1.2.3


From 34fc99dbf32d8cca7a2c6b85949bc3433767a113 Mon Sep 17 00:00:00 2001
From: Alexander Kurz <akurz@blala.de>
Date: Sat, 11 Mar 2017 20:01:04 +0100
Subject: drivers mtd: spi-nor: add Winbond W25Q20 variants

Winbond W25Q20BW devices are used in 4/5th generation Kindle ebook readers.
Add this spi-nor device and the similar W25Q20 devices to the list of known
devices.

Signed-off-by: Alexander Kurz <akurz@blala.de>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 32d1849b084c..619b24505b27 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1131,6 +1131,9 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "w25x80", INFO(0xef3014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25x16", INFO(0xef3015, 0, 64 * 1024,  32, SECT_4K) },
 	{ "w25x32", INFO(0xef3016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25q20cl", INFO(0xef4012, 0, 64 * 1024,  4, SECT_4K) },
+	{ "w25q20bw", INFO(0xef5012, 0, 64 * 1024,  4, SECT_4K) },
+	{ "w25q20ew", INFO(0xef6012, 0, 64 * 1024,  4, SECT_4K) },
 	{ "w25q32", INFO(0xef4016, 0, 64 * 1024,  64, SECT_4K) },
 	{
 		"w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64,
-- 
cgit v1.2.3


From 9f3cd4537da071cfa4987ef7315990417585f169 Mon Sep 17 00:00:00 2001
From: Alexander Kurz <akurz@blala.de>
Date: Sat, 11 Mar 2017 20:01:05 +0100
Subject: drivers mtd: spi-nor: add Macronix MX25Ux033E and MX25Ux035 variants

Macronix MX25U2033E, MX25U4033E and MX25U4035 devices are used in 4/5/6th
generation Kindle ebook readers. Both MX25U403x variants share the same
JEDEC id. Add those spi-nor variants and the similar MX25U8035 mentioned
in the same set of datasheets.

Signed-off-by: Alexander Kurz <akurz@blala.de>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 619b24505b27..d3cb44b28490 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1016,6 +1016,9 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "mx25l3205d",  INFO(0xc22016, 0, 64 * 1024,  64, SECT_4K) },
 	{ "mx25l3255e",  INFO(0xc29e16, 0, 64 * 1024,  64, SECT_4K) },
 	{ "mx25l6405d",  INFO(0xc22017, 0, 64 * 1024, 128, SECT_4K) },
+	{ "mx25u2033e",  INFO(0xc22532, 0, 64 * 1024,   4, SECT_4K) },
+	{ "mx25u4035",   INFO(0xc22533, 0, 64 * 1024,   8, SECT_4K) },
+	{ "mx25u8035",   INFO(0xc22534, 0, 64 * 1024,  16, SECT_4K) },
 	{ "mx25u6435f",  INFO(0xc22537, 0, 64 * 1024, 128, SECT_4K) },
 	{ "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
 	{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
-- 
cgit v1.2.3


From 4ca8c1d4979cc60cc129318ce0a89d075c9eb49f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 22 Mar 2017 12:01:45 +0300
Subject: mtd: nand: hynix: Fix an error code in init

We should be return -ENOMEM instead of success.

Fixes: 626994e07480 ("mtd: nand: hynix: Add read-retry support for 1x nm MLC NANDs")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_hynix.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/nand_hynix.c b/drivers/mtd/nand/nand_hynix.c
index 2a5d0efea498..b12dc7325378 100644
--- a/drivers/mtd/nand/nand_hynix.c
+++ b/drivers/mtd/nand/nand_hynix.c
@@ -270,8 +270,10 @@ static int hynix_mlc_1xnm_rr_init(struct nand_chip *chip,
 		goto out;
 
 	rr = kzalloc(sizeof(*rr) + (nregs * nmodes), GFP_KERNEL);
-	if (!rr)
+	if (!rr) {
+		ret = -ENOMEM;
 		goto out;
+	}
 
 	for (i = 0; i < nmodes; i++) {
 		for (j = 0; j < nregs; j++) {
-- 
cgit v1.2.3


From ee56874f23e5c11576540bd695177a5ebc4f4352 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:03:53 +0100
Subject: mtd: nand: fsmc: fix NAND width handling

In commit eea628199d5b ("mtd: Add device-tree support to fsmc_nand"),
Device Tree support was added to the fmsc_nand driver. However, this
code has a bug in how it handles the bank-width DT property to set the
bus width.

Indeed, in the function fsmc_nand_probe_config_dt() that parses the
Device Tree, it sets pdata->width to either 8 or 16 depending on the
value of the bank-width DT property.

Then, the ->probe() function will test if pdata->width is equal to
FSMC_NAND_BW16 (which is 2) to set NAND_BUSWIDTH_16 in
nand->options. Therefore, with the DT probing, this condition will never
match.

This commit fixes that by removing the "width" field from
fsmc_nand_platform_data and instead have the fsmc_nand_probe_config_dt()
function directly set the appropriate nand->options value.

It is worth mentioning that if this commit gets backported to older
kernels, prior to the drop of non-DT probing, then non-DT probing will
be broken because nand->options will no longer be set to
NAND_BUSWIDTH_16.

Fixes: eea628199d5b ("mtd: Add device-tree support to fsmc_nand")
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index bda1e4667138..66aece9cc2cc 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -150,7 +150,6 @@ struct fsmc_nand_platform_data {
 	struct mtd_partition	*partitions;
 	unsigned int		nr_partitions;
 	unsigned int		options;
-	unsigned int		width;
 	unsigned int		bank;
 
 	enum access_mode	mode;
@@ -844,18 +843,19 @@ static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
 	u32 val;
 	int ret;
 
-	/* Set default NAND width to 8 bits */
-	pdata->width = 8;
+	pdata->options = 0;
+
 	if (!of_property_read_u32(np, "bank-width", &val)) {
 		if (val == 2) {
-			pdata->width = 16;
+			pdata->options |= NAND_BUSWIDTH_16;
 		} else if (val != 1) {
 			dev_err(&pdev->dev, "invalid bank-width %u\n", val);
 			return -EINVAL;
 		}
 	}
+
 	if (of_get_property(np, "nand-skip-bbtscan", NULL))
-		pdata->options = NAND_SKIP_BBTSCAN;
+		pdata->options |= NAND_SKIP_BBTSCAN;
 
 	pdata->nand_timings = devm_kzalloc(&pdev->dev,
 				sizeof(*pdata->nand_timings), GFP_KERNEL);
@@ -992,9 +992,6 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	nand->badblockbits = 7;
 	nand_set_flash_node(nand, np);
 
-	if (pdata->width == FSMC_NAND_BW16)
-		nand->options |= NAND_BUSWIDTH_16;
-
 	switch (host->mode) {
 	case USE_DMA_ACCESS:
 		dma_cap_zero(mask);
-- 
cgit v1.2.3


From e7cda017f66e869738b1aba7e0cfe5ead000f155 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:03:56 +0100
Subject: mtd: nand: fsmc: move fsmc_nand_data definition

This commit simply moves the "struct fsmc_nand_data" definition to be
towards the beginning of the file, with the other defines and type
definitions, instead of in the middle of the driver code. This is much
more consistent with what most Linux drivers do.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 98 ++++++++++++++++++++++----------------------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 66aece9cc2cc..0dd3c5551950 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -161,6 +161,55 @@ struct fsmc_nand_platform_data {
 	void			*write_dma_priv;
 };
 
+/**
+ * struct fsmc_nand_data - structure for FSMC NAND device state
+ *
+ * @pid:		Part ID on the AMBA PrimeCell format
+ * @mtd:		MTD info for a NAND flash.
+ * @nand:		Chip related info for a NAND flash.
+ * @partitions:		Partition info for a NAND Flash.
+ * @nr_partitions:	Total number of partition of a NAND flash.
+ *
+ * @bank:		Bank number for probed device.
+ * @clk:		Clock structure for FSMC.
+ *
+ * @read_dma_chan:	DMA channel for read access
+ * @write_dma_chan:	DMA channel for write access to NAND
+ * @dma_access_complete: Completion structure
+ *
+ * @data_pa:		NAND Physical port for Data.
+ * @data_va:		NAND port for Data.
+ * @cmd_va:		NAND port for Command.
+ * @addr_va:		NAND port for Address.
+ * @regs_va:		FSMC regs base address.
+ */
+struct fsmc_nand_data {
+	u32			pid;
+	struct nand_chip	nand;
+	struct mtd_partition	*partitions;
+	unsigned int		nr_partitions;
+
+	unsigned int		bank;
+	struct device		*dev;
+	enum access_mode	mode;
+	struct clk		*clk;
+
+	/* DMA related objects */
+	struct dma_chan		*read_dma_chan;
+	struct dma_chan		*write_dma_chan;
+	struct completion	dma_access_complete;
+
+	struct fsmc_nand_timings *dev_timings;
+
+	dma_addr_t		data_pa;
+	void __iomem		*data_va;
+	void __iomem		*cmd_va;
+	void __iomem		*addr_va;
+	void __iomem		*regs_va;
+
+	void			(*select_chip)(uint32_t bank, uint32_t busw);
+};
+
 static int fsmc_ecc1_ooblayout_ecc(struct mtd_info *mtd, int section,
 				   struct mtd_oob_region *oobregion)
 {
@@ -245,55 +294,6 @@ static const struct mtd_ooblayout_ops fsmc_ecc4_ooblayout_ops = {
 	.free = fsmc_ecc4_ooblayout_free,
 };
 
-/**
- * struct fsmc_nand_data - structure for FSMC NAND device state
- *
- * @pid:		Part ID on the AMBA PrimeCell format
- * @mtd:		MTD info for a NAND flash.
- * @nand:		Chip related info for a NAND flash.
- * @partitions:		Partition info for a NAND Flash.
- * @nr_partitions:	Total number of partition of a NAND flash.
- *
- * @bank:		Bank number for probed device.
- * @clk:		Clock structure for FSMC.
- *
- * @read_dma_chan:	DMA channel for read access
- * @write_dma_chan:	DMA channel for write access to NAND
- * @dma_access_complete: Completion structure
- *
- * @data_pa:		NAND Physical port for Data.
- * @data_va:		NAND port for Data.
- * @cmd_va:		NAND port for Command.
- * @addr_va:		NAND port for Address.
- * @regs_va:		FSMC regs base address.
- */
-struct fsmc_nand_data {
-	u32			pid;
-	struct nand_chip	nand;
-	struct mtd_partition	*partitions;
-	unsigned int		nr_partitions;
-
-	unsigned int		bank;
-	struct device		*dev;
-	enum access_mode	mode;
-	struct clk		*clk;
-
-	/* DMA related objects */
-	struct dma_chan		*read_dma_chan;
-	struct dma_chan		*write_dma_chan;
-	struct completion	dma_access_complete;
-
-	struct fsmc_nand_timings *dev_timings;
-
-	dma_addr_t		data_pa;
-	void __iomem		*data_va;
-	void __iomem		*cmd_va;
-	void __iomem		*addr_va;
-	void __iomem		*regs_va;
-
-	void			(*select_chip)(uint32_t bank, uint32_t busw);
-};
-
 static inline struct fsmc_nand_data *mtd_to_fsmc(struct mtd_info *mtd)
 {
 	return container_of(mtd_to_nand(mtd), struct fsmc_nand_data, nand);
-- 
cgit v1.2.3


From 6324fb93a2594fa7e748ec45751033eee1fd7fee Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:03:57 +0100
Subject: mtd: nand: fsmc: remove ->select_bank() from fsmc_nand_platform_data

Since commit 4404d7d821c3 ("mtd: nand: fsmc: remove stale non-DT probe
path"), only DT probing is used for the fsmc_nand driver. Due to this,
the ->select_bank() field of fsmc_nand_platform_data is never used, so
this commit gets rid of it.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 0dd3c5551950..2f05c4cadfb8 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -141,7 +141,6 @@ enum access_mode {
  * @options: different options for the driver
  * @width: bus width
  * @bank: default bank
- * @select_bank: callback to select a certain bank, this is
  * platform-specific. If the controller only supports one bank
  * this may be set to NULL
  */
@@ -154,8 +153,6 @@ struct fsmc_nand_platform_data {
 
 	enum access_mode	mode;
 
-	void			(*select_bank)(uint32_t bank, uint32_t busw);
-
 	/* priv structures for dma accesses */
 	void			*read_dma_priv;
 	void			*write_dma_priv;
@@ -958,7 +955,6 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 		 AMBA_REV_BITS(pid), AMBA_CONFIG_BITS(pid));
 
 	host->bank = pdata->bank;
-	host->select_chip = pdata->select_bank;
 	host->partitions = pdata->partitions;
 	host->nr_partitions = pdata->nr_partitions;
 	host->dev = &pdev->dev;
-- 
cgit v1.2.3


From a04271a723c716da534ee6c20531aee487129dbd Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:03:58 +0100
Subject: mtd: nand: fsmc: remove fsmc_select_chip()

host->select_chip used to point to the ->select_bank() function provided
by the platform data, but the latter no longer exists. Therefore
host->select_chip is always NULL.

Due to this, the fsmc_select_chip() does nothing, except:

  chip->cmd_ctrl(mtd, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);

when chipnr is -1, which is exactly what the default implementation of
->select_chip() does in the NAND framework. So, this commit kills
fsmc_select_chip() entirely.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 2f05c4cadfb8..aa3c16098b98 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -203,8 +203,6 @@ struct fsmc_nand_data {
 	void __iomem		*cmd_va;
 	void __iomem		*addr_va;
 	void __iomem		*regs_va;
-
-	void			(*select_chip)(uint32_t bank, uint32_t busw);
 };
 
 static int fsmc_ecc1_ooblayout_ecc(struct mtd_info *mtd, int section,
@@ -296,32 +294,6 @@ static inline struct fsmc_nand_data *mtd_to_fsmc(struct mtd_info *mtd)
 	return container_of(mtd_to_nand(mtd), struct fsmc_nand_data, nand);
 }
 
-/* Assert CS signal based on chipnr */
-static void fsmc_select_chip(struct mtd_info *mtd, int chipnr)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct fsmc_nand_data *host;
-
-	host = mtd_to_fsmc(mtd);
-
-	switch (chipnr) {
-	case -1:
-		chip->cmd_ctrl(mtd, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
-		break;
-	case 0:
-	case 1:
-	case 2:
-	case 3:
-		if (host->select_chip)
-			host->select_chip(chipnr,
-					chip->options & NAND_BUSWIDTH_16);
-		break;
-
-	default:
-		dev_err(host->dev, "unsupported chip-select %d\n", chipnr);
-	}
-}
-
 /*
  * fsmc_cmd_ctrl - For facilitaing Hardware access
  * This routine allows hardware specific access to control-lines(ALE,CLE)
@@ -984,7 +956,6 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	nand->ecc.hwctl = fsmc_enable_hwecc;
 	nand->ecc.size = 512;
 	nand->options = pdata->options;
-	nand->select_chip = fsmc_select_chip;
 	nand->badblockbits = 7;
 	nand_set_flash_node(nand, np);
 
-- 
cgit v1.2.3


From feb1e57ee583502849fca9bbe8258327b3f4c61c Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:03:59 +0100
Subject: mtd: nand: fmsc: kill {read, write}_dma_priv from
 fsmc_nand_platform_data

The read_dma_priv and write_dma_priv fields of fsmc_nand_platform_data
are never set, so this commit removes them.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index aa3c16098b98..742248a32615 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -152,10 +152,6 @@ struct fsmc_nand_platform_data {
 	unsigned int		bank;
 
 	enum access_mode	mode;
-
-	/* priv structures for dma accesses */
-	void			*read_dma_priv;
-	void			*write_dma_priv;
 };
 
 /**
@@ -963,14 +959,12 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	case USE_DMA_ACCESS:
 		dma_cap_zero(mask);
 		dma_cap_set(DMA_MEMCPY, mask);
-		host->read_dma_chan = dma_request_channel(mask, filter,
-				pdata->read_dma_priv);
+		host->read_dma_chan = dma_request_channel(mask, filter, NULL);
 		if (!host->read_dma_chan) {
 			dev_err(&pdev->dev, "Unable to get read dma channel\n");
 			goto err_req_read_chnl;
 		}
-		host->write_dma_chan = dma_request_channel(mask, filter,
-				pdata->write_dma_priv);
+		host->write_dma_chan = dma_request_channel(mask, filter, NULL);
 		if (!host->write_dma_chan) {
 			dev_err(&pdev->dev, "Unable to get write dma channel\n");
 			goto err_req_write_chnl;
-- 
cgit v1.2.3


From ede29a020ec3b493748c5eb780aa9d5977f72db8 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:04:00 +0100
Subject: mtd: nand: fsmc: kill {nr_, }partitions structure fields

The ->partitions and ->nr_partitions fields of struct
fsmc_nand_platform_data are never set anywhere, so they are always
NULL/0. The corresponding fields in 'struct fsmc_nand_data' are set to the
value of the same fields in fsmc_nand_platform_data, i.e NULL/0.

Therefore, we remove those two fields, and pass NULL/0 directly to
mtd_device_register(), like many other NAND drivers already do.

At the same time, we remove the comment about the fact that we pass
partition info, since we are no longer doing this.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 742248a32615..abfaa1d6e22e 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -146,8 +146,6 @@ enum access_mode {
  */
 struct fsmc_nand_platform_data {
 	struct fsmc_nand_timings *nand_timings;
-	struct mtd_partition	*partitions;
-	unsigned int		nr_partitions;
 	unsigned int		options;
 	unsigned int		bank;
 
@@ -179,8 +177,6 @@ struct fsmc_nand_platform_data {
 struct fsmc_nand_data {
 	u32			pid;
 	struct nand_chip	nand;
-	struct mtd_partition	*partitions;
-	unsigned int		nr_partitions;
 
 	unsigned int		bank;
 	struct device		*dev;
@@ -923,8 +919,6 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 		 AMBA_REV_BITS(pid), AMBA_CONFIG_BITS(pid));
 
 	host->bank = pdata->bank;
-	host->partitions = pdata->partitions;
-	host->nr_partitions = pdata->nr_partitions;
 	host->dev = &pdev->dev;
 	host->dev_timings = pdata->nand_timings;
 	host->mode = pdata->mode;
@@ -1065,18 +1059,8 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_probe;
 
-	/*
-	 * The partition information can is accessed by (in the same precedence)
-	 *
-	 * command line through Bootloader,
-	 * platform data,
-	 * default partition information present in driver.
-	 */
-	/*
-	 * Check for partition info passed
-	 */
 	mtd->name = "nand";
-	ret = mtd_device_register(mtd, host->partitions, host->nr_partitions);
+	ret = mtd_device_register(mtd, NULL, 0);
 	if (ret)
 		goto err_probe;
 
-- 
cgit v1.2.3


From c0d218c81621d847472e01f227eab3e4f0902ce2 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:04:01 +0100
Subject: mtd: nand: fsmc: remove duplicate nand_set_flash_node()

It is already done a few lines before.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index abfaa1d6e22e..63055d7dcfef 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -947,7 +947,6 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	nand->ecc.size = 512;
 	nand->options = pdata->options;
 	nand->badblockbits = 7;
-	nand_set_flash_node(nand, np);
 
 	switch (host->mode) {
 	case USE_DMA_ACCESS:
-- 
cgit v1.2.3


From a1b1e1d5bdfe79e3d3b28387dd43bb16531be5a4 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:04:02 +0100
Subject: mtd: nand: fsmc: finally remove fsmc_nand_platform_data

Since the driver now only supports DT probing, it doesn't make a lot of
sense to have a private data structure called platform_data, fill it in
with information coming from the DT, and then copying this into the
driver-specific structure fsmc_nand_data.

So instead, we remove fsmc_nand_platform_data entirely, and have
fsmc_nand_probe_config_dt() fill in the fsmc_nand_data structure
directly.

This requires calling fsmc_nand_probe_config_dt() after fsmc_nand_data
has been allocated instead of before.

Also, as an added bonus, we now propagate properly the return value of
fsmc_nand_probe_config_dt() instead of returning -ENODEV on failure. The
error message is also removed, since it no longer made any sense.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 73 +++++++++++++-------------------------------
 1 file changed, 21 insertions(+), 52 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 63055d7dcfef..4c6e239dcee8 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -132,26 +132,6 @@ enum access_mode {
 	USE_WORD_ACCESS,
 };
 
-/**
- * fsmc_nand_platform_data - platform specific NAND controller config
- * @nand_timings: timing setup for the physical NAND interface
- * @partitions: partition table for the platform, use a default fallback
- * if this is NULL
- * @nr_partitions: the number of partitions in the previous entry
- * @options: different options for the driver
- * @width: bus width
- * @bank: default bank
- * platform-specific. If the controller only supports one bank
- * this may be set to NULL
- */
-struct fsmc_nand_platform_data {
-	struct fsmc_nand_timings *nand_timings;
-	unsigned int		options;
-	unsigned int		bank;
-
-	enum access_mode	mode;
-};
-
 /**
  * struct fsmc_nand_data - structure for FSMC NAND device state
  *
@@ -798,17 +778,18 @@ static bool filter(struct dma_chan *chan, void *slave)
 }
 
 static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
-				     struct device_node *np)
+				     struct fsmc_nand_data *host,
+				     struct nand_chip *nand)
 {
-	struct fsmc_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct device_node *np = pdev->dev.of_node;
 	u32 val;
 	int ret;
 
-	pdata->options = 0;
+	nand->options = 0;
 
 	if (!of_property_read_u32(np, "bank-width", &val)) {
 		if (val == 2) {
-			pdata->options |= NAND_BUSWIDTH_16;
+			nand->options |= NAND_BUSWIDTH_16;
 		} else if (val != 1) {
 			dev_err(&pdev->dev, "invalid bank-width %u\n", val);
 			return -EINVAL;
@@ -816,27 +797,27 @@ static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
 	}
 
 	if (of_get_property(np, "nand-skip-bbtscan", NULL))
-		pdata->options |= NAND_SKIP_BBTSCAN;
+		nand->options |= NAND_SKIP_BBTSCAN;
 
-	pdata->nand_timings = devm_kzalloc(&pdev->dev,
-				sizeof(*pdata->nand_timings), GFP_KERNEL);
-	if (!pdata->nand_timings)
+	host->dev_timings = devm_kzalloc(&pdev->dev,
+				sizeof(*host->dev_timings), GFP_KERNEL);
+	if (!host->dev_timings)
 		return -ENOMEM;
-	ret = of_property_read_u8_array(np, "timings", (u8 *)pdata->nand_timings,
-						sizeof(*pdata->nand_timings));
+	ret = of_property_read_u8_array(np, "timings", (u8 *)host->dev_timings,
+						sizeof(*host->dev_timings));
 	if (ret) {
 		dev_info(&pdev->dev, "No timings in dts specified, using default timings!\n");
-		pdata->nand_timings = NULL;
+		host->dev_timings = NULL;
 	}
 
 	/* Set default NAND bank to 0 */
-	pdata->bank = 0;
+	host->bank = 0;
 	if (!of_property_read_u32(np, "bank", &val)) {
 		if (val > 3) {
 			dev_err(&pdev->dev, "invalid bank %u\n", val);
 			return -EINVAL;
 		}
-		pdata->bank = val;
+		host->bank = val;
 	}
 	return 0;
 }
@@ -847,8 +828,6 @@ static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
  */
 static int __init fsmc_nand_probe(struct platform_device *pdev)
 {
-	struct fsmc_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
-	struct device_node __maybe_unused *np = pdev->dev.of_node;
 	struct fsmc_nand_data *host;
 	struct mtd_info *mtd;
 	struct nand_chip *nand;
@@ -858,22 +837,17 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	u32 pid;
 	int i;
 
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return -ENOMEM;
-
-	pdev->dev.platform_data = pdata;
-	ret = fsmc_nand_probe_config_dt(pdev, np);
-	if (ret) {
-		dev_err(&pdev->dev, "no platform data\n");
-		return -ENODEV;
-	}
-
 	/* Allocate memory for the device structure (and zero it) */
 	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
 	if (!host)
 		return -ENOMEM;
 
+	nand = &host->nand;
+
+	ret = fsmc_nand_probe_config_dt(pdev, host, nand);
+	if (ret)
+		return ret;
+
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
 	host->data_va = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(host->data_va))
@@ -918,19 +892,15 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 		 AMBA_PART_BITS(pid), AMBA_MANF_BITS(pid),
 		 AMBA_REV_BITS(pid), AMBA_CONFIG_BITS(pid));
 
-	host->bank = pdata->bank;
 	host->dev = &pdev->dev;
-	host->dev_timings = pdata->nand_timings;
-	host->mode = pdata->mode;
 
 	if (host->mode == USE_DMA_ACCESS)
 		init_completion(&host->dma_access_complete);
 
 	/* Link all private pointers */
 	mtd = nand_to_mtd(&host->nand);
-	nand = &host->nand;
 	nand_set_controller_data(nand, host);
-	nand_set_flash_node(nand, np);
+	nand_set_flash_node(nand, pdev->dev.of_node);
 
 	mtd->dev.parent = &pdev->dev;
 	nand->IO_ADDR_R = host->data_va;
@@ -945,7 +915,6 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	nand->ecc.mode = NAND_ECC_HW;
 	nand->ecc.hwctl = fsmc_enable_hwecc;
 	nand->ecc.size = 512;
-	nand->options = pdata->options;
 	nand->badblockbits = 7;
 
 	switch (host->mode) {
-- 
cgit v1.2.3


From fb8ed2ca432443ef1898aad7501c6e63c5c94f8e Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:04:03 +0100
Subject: mtd: nand: fsmc: use devm_clk_get()

This commit switches the fsmc_nand driver from clk_get() to
devm_clk_get(), which saves a few clk_put().

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 4c6e239dcee8..f611eb04184c 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -870,7 +870,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	if (IS_ERR(host->regs_va))
 		return PTR_ERR(host->regs_va);
 
-	host->clk = clk_get(&pdev->dev, NULL);
+	host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(host->clk)) {
 		dev_err(&pdev->dev, "failed to fetch block clock\n");
 		return PTR_ERR(host->clk);
@@ -878,7 +878,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 
 	ret = clk_prepare_enable(host->clk);
 	if (ret)
-		goto err_clk_prepare_enable;
+		return ret;
 
 	/*
 	 * This device ID is actually a common AMBA ID as used on the
@@ -1045,8 +1045,6 @@ err_req_write_chnl:
 		dma_release_channel(host->read_dma_chan);
 err_req_read_chnl:
 	clk_disable_unprepare(host->clk);
-err_clk_prepare_enable:
-	clk_put(host->clk);
 	return ret;
 }
 
@@ -1065,7 +1063,6 @@ static int fsmc_nand_remove(struct platform_device *pdev)
 			dma_release_channel(host->read_dma_chan);
 		}
 		clk_disable_unprepare(host->clk);
-		clk_put(host->clk);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 77cc88d2b9848bac16c32c01eebf45f060f81f45 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:04:04 +0100
Subject: mtd: nand: fsmc: remove unused definitions

These definitions are not used anywhere in the driver, so remove them.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index f611eb04184c..e35cabb2f51e 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -38,15 +38,6 @@
 #include <linux/amba/bus.h>
 #include <mtd/mtd-abi.h>
 
-#define FSMC_NAND_BW8		1
-#define FSMC_NAND_BW16		2
-
-#define FSMC_MAX_NOR_BANKS	4
-#define FSMC_MAX_NAND_BANKS	4
-
-#define FSMC_FLASH_WIDTH8	1
-#define FSMC_FLASH_WIDTH16	2
-
 /* fsmc controller registers for NOR flash */
 #define CTRL			0x0
 	/* ctrl register definitions */
-- 
cgit v1.2.3


From 33575b25fff26085c17675b5b63d60e889cfe959 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 21 Mar 2017 11:04:05 +0100
Subject: mtd: nand: fsmc: remove CONFIG_OF conditional

Since commit 4404d7d821c33 ("mtd: nand: fsmc: remove stale non-DT probe
path"), the fsmc NAND driver only supports Device Tree probing, and
therefore has a "depends on OF" in its Kconfig option.

Due to this the #ifdef CONFIG_OF ... #endif condition in the driver code
is no longer necessary.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index e35cabb2f51e..cea50d2f218c 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -1083,20 +1083,18 @@ static int fsmc_nand_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(fsmc_nand_pm_ops, fsmc_nand_suspend, fsmc_nand_resume);
 
-#ifdef CONFIG_OF
 static const struct of_device_id fsmc_nand_id_table[] = {
 	{ .compatible = "st,spear600-fsmc-nand" },
 	{ .compatible = "stericsson,fsmc-nand" },
 	{}
 };
 MODULE_DEVICE_TABLE(of, fsmc_nand_id_table);
-#endif
 
 static struct platform_driver fsmc_nand_driver = {
 	.remove = fsmc_nand_remove,
 	.driver = {
 		.name = "fsmc-nand",
-		.of_match_table = of_match_ptr(fsmc_nand_id_table),
+		.of_match_table = fsmc_nand_id_table,
 		.pm = &fsmc_nand_pm_ops,
 	},
 };
-- 
cgit v1.2.3


From 9fe4b66efb77fec71a9c11de58e5e51c5c826190 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:00 +0900
Subject: mtd: nand: allow to set only one of ECC size and ECC strength from DT

Currently, it is valid to specify both "nand-ecc-step-size" and
"nand-ecc-strength", but not allowed to set only one of them.

This requirement has a conflict with "nand-ecc-maximize"; this flag
is used when you want the driver to choose the best ECC strength.
If "nand-ecc-maximize" is set, "nand-ecc-strength" is very likely to
be unset.

It would be possible to make the if-conditional more complex by
adding the check for the NAND_ECC_MAXIMIZE flag, but I chose to drop
the check entirely.  I thought of the situation where the hardware
has a fixed ECC step size (so it can be hard-coded in the driver),
whereas the ECC strength is configurable by software.  In that case,
we may want to only set "nand-ecc-strength" (or "nand-ecc-maximize")
in DT.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 685376d8ceab..d3545c9a792a 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4225,12 +4225,6 @@ static int nand_dt_init(struct nand_chip *chip)
 	ecc_strength = of_get_nand_ecc_strength(dn);
 	ecc_step = of_get_nand_ecc_step_size(dn);
 
-	if ((ecc_step >= 0 && !(ecc_strength >= 0)) ||
-	    (!(ecc_step >= 0) && ecc_strength >= 0)) {
-		pr_err("must set both strength and step size in DT\n");
-		return -EINVAL;
-	}
-
 	if (ecc_mode >= 0)
 		chip->ecc.mode = ecc_mode;
 
-- 
cgit v1.2.3


From c120e75e0e7deff88119376298342df139b9b17c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:01 +0900
Subject: mtd: nand: use read_oob() instead of cmdfunc() for bad block check

The nand_default_block_markbad() and scan_block_fast() use high
level APIs to get access to the BBM.

On the other hand, nand_block_bad (the default implementation of
->block_bad) calls the lower level ->cmdfunc hook.  This prevents
drivers from using ->ecc.read_oob() even if optimized read operation
is implemented.  Besides, some NAND controllers may protect the BBM
with ECC.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index d3545c9a792a..afbb80f4375a 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -354,40 +354,32 @@ static void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
  */
 static int nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
-	int page, res = 0, i = 0;
+	int page, page_end, res;
 	struct nand_chip *chip = mtd_to_nand(mtd);
-	u16 bad;
+	u8 bad;
 
 	if (chip->bbt_options & NAND_BBT_SCANLASTPAGE)
 		ofs += mtd->erasesize - mtd->writesize;
 
 	page = (int)(ofs >> chip->page_shift) & chip->pagemask;
+	page_end = page + (chip->bbt_options & NAND_BBT_SCAN2NDPAGE ? 2 : 1);
 
-	do {
-		if (chip->options & NAND_BUSWIDTH_16) {
-			chip->cmdfunc(mtd, NAND_CMD_READOOB,
-					chip->badblockpos & 0xFE, page);
-			bad = cpu_to_le16(chip->read_word(mtd));
-			if (chip->badblockpos & 0x1)
-				bad >>= 8;
-			else
-				bad &= 0xFF;
-		} else {
-			chip->cmdfunc(mtd, NAND_CMD_READOOB, chip->badblockpos,
-					page);
-			bad = chip->read_byte(mtd);
-		}
+	for (; page < page_end; page++) {
+		res = chip->ecc.read_oob(mtd, chip, page);
+		if (res)
+			return res;
+
+		bad = chip->oob_poi[chip->badblockpos];
 
 		if (likely(chip->badblockbits == 8))
 			res = bad != 0xFF;
 		else
 			res = hweight8(bad) < chip->badblockbits;
-		ofs += mtd->writesize;
-		page = (int)(ofs >> chip->page_shift) & chip->pagemask;
-		i++;
-	} while (!res && i < 2 && (chip->bbt_options & NAND_BBT_SCAN2NDPAGE));
+		if (res)
+			return res;
+	}
 
-	return res;
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From 357cc408a4009c0d118b684e4865f85694ebc68c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:02 +0900
Subject: mtd: nand: denali: remove unused CONFIG option and macros

All of these macros are not used at all.
CONFIG_MTD_NAND_DENALI_SCRATCH_REG_ADDR is not used for anything but
defining SCRATCH_REG_ADDR.  The config option should go away as well.

I am removing some register macros.  They are not used, and do not
exist in recent IP versions.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/Kconfig  | 11 ------
 drivers/mtd/nand/denali.c |  5 ---
 drivers/mtd/nand/denali.h | 99 -----------------------------------------------
 3 files changed, 115 deletions(-)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index ae7ae77f295e..aa44fb0a1b18 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -59,17 +59,6 @@ config MTD_NAND_DENALI_DT
 	  Enable the driver for NAND flash on platforms using a Denali NAND
 	  controller as a DT device.
 
-config MTD_NAND_DENALI_SCRATCH_REG_ADDR
-        hex "Denali NAND size scratch register address"
-        default "0xFF108018"
-        depends on MTD_NAND_DENALI_PCI
-        help
-          Some platforms place the NAND chip size in a scratch register
-          because (some versions of) the driver aren't able to automatically
-          determine the size of certain chips. Set the address of the
-          scratch register here to enable this feature. On Intel Moorestown
-          boards, the scratch register is at 0xFF108018.
-
 config MTD_NAND_GPIO
 	tristate "GPIO assisted NAND Flash driver"
 	depends on GPIOLIB || COMPILE_TEST
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 73b9d4e2dca0..f993e1378f94 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -91,11 +91,6 @@ static inline struct denali_nand_info *mtd_to_denali(struct mtd_info *mtd)
 #define DENALI_READ	0
 #define DENALI_WRITE	0x100
 
-/* types of device accesses. We can issue commands and get status */
-#define COMMAND_CYCLE	0
-#define ADDR_CYCLE	1
-#define STATUS_CYCLE	2
-
 /*
  * this is a helper macro that allows us to
  * format the bank into the proper bits for the controller
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index ea22191e8515..c4f3a683de93 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -257,26 +257,6 @@
 #define ERR_PAGE_ADDR(__bank)	(0x440 + ((__bank) * 0x50))
 #define ERR_BLOCK_ADDR(__bank)	(0x450 + ((__bank) * 0x50))
 
-#define DATA_INTR				0x550
-#define     DATA_INTR__WRITE_SPACE_AV			0x0001
-#define     DATA_INTR__READ_DATA_AV			0x0002
-
-#define DATA_INTR_EN				0x560
-#define     DATA_INTR_EN__WRITE_SPACE_AV		0x0001
-#define     DATA_INTR_EN__READ_DATA_AV			0x0002
-
-#define GPREG_0					0x570
-#define     GPREG_0__VALUE				0xffff
-
-#define GPREG_1					0x580
-#define     GPREG_1__VALUE				0xffff
-
-#define GPREG_2					0x590
-#define     GPREG_2__VALUE				0xffff
-
-#define GPREG_3					0x5a0
-#define     GPREG_3__VALUE				0xffff
-
 #define ECC_THRESHOLD				0x600
 #define     ECC_THRESHOLD__VALUE			0x03ff
 
@@ -331,69 +311,15 @@
 #define     CHNL_ACTIVE__CHANNEL2			0x0004
 #define     CHNL_ACTIVE__CHANNEL3			0x0008
 
-#define ACTIVE_SRC_ID				0x800
-#define     ACTIVE_SRC_ID__VALUE			0x00ff
-
-#define PTN_INTR					0x810
-#define     PTN_INTR__CONFIG_ERROR			0x0001
-#define     PTN_INTR__ACCESS_ERROR_BANK0		0x0002
-#define     PTN_INTR__ACCESS_ERROR_BANK1		0x0004
-#define     PTN_INTR__ACCESS_ERROR_BANK2		0x0008
-#define     PTN_INTR__ACCESS_ERROR_BANK3		0x0010
-#define     PTN_INTR__REG_ACCESS_ERROR			0x0020
-
-#define PTN_INTR_EN				0x820
-#define     PTN_INTR_EN__CONFIG_ERROR			0x0001
-#define     PTN_INTR_EN__ACCESS_ERROR_BANK0		0x0002
-#define     PTN_INTR_EN__ACCESS_ERROR_BANK1		0x0004
-#define     PTN_INTR_EN__ACCESS_ERROR_BANK2		0x0008
-#define     PTN_INTR_EN__ACCESS_ERROR_BANK3		0x0010
-#define     PTN_INTR_EN__REG_ACCESS_ERROR		0x0020
-
-#define PERM_SRC_ID(__bank)	(0x830 + ((__bank) * 0x40))
-#define     PERM_SRC_ID__SRCID				0x00ff
-#define     PERM_SRC_ID__DIRECT_ACCESS_ACTIVE		0x0800
-#define     PERM_SRC_ID__WRITE_ACTIVE			0x2000
-#define     PERM_SRC_ID__READ_ACTIVE			0x4000
-#define     PERM_SRC_ID__PARTITION_VALID		0x8000
-
-#define MIN_BLK_ADDR(__bank)	(0x840 + ((__bank) * 0x40))
-#define     MIN_BLK_ADDR__VALUE				0xffff
-
-#define MAX_BLK_ADDR(__bank)	(0x850 + ((__bank) * 0x40))
-#define     MAX_BLK_ADDR__VALUE				0xffff
-
-#define MIN_MAX_BANK(__bank)	(0x860 + ((__bank) * 0x40))
-#define     MIN_MAX_BANK__MIN_VALUE			0x0003
-#define     MIN_MAX_BANK__MAX_VALUE			0x000c
-
-
-/* ffsdefs.h */
-#define CLEAR 0                 /*use this to clear a field instead of "fail"*/
-#define SET   1                 /*use this to set a field instead of "pass"*/
 #define FAIL 1                  /*failed flag*/
 #define PASS 0                  /*success flag*/
-#define ERR -1                  /*error flag*/
-
-/* lld.h */
-#define GOOD_BLOCK 0
-#define DEFECTIVE_BLOCK 1
-#define READ_ERROR 2
 
 #define CLK_X  5
 #define CLK_MULTI 4
 
-/* KBV - Updated to LNW scratch register address */
-#define SCRATCH_REG_ADDR    CONFIG_MTD_NAND_DENALI_SCRATCH_REG_ADDR
-#define SCRATCH_REG_SIZE    64
-
-#define GLOB_HWCTL_DEFAULT_BLKS    2048
-
 #define SUPPORT_15BITECC        1
 #define SUPPORT_8BITECC         1
 
-#define CUSTOM_CONF_PARAMS      0
-
 #define ONFI_BLOOM_TIME         1
 #define MODE5_WORKAROUND        0
 
@@ -403,31 +329,6 @@
 #define MODE_10    0x08000000
 #define MODE_11    0x0C000000
 
-
-#define DATA_TRANSFER_MODE              0
-#define PROTECTION_PER_BLOCK            1
-#define LOAD_WAIT_COUNT                 2
-#define PROGRAM_WAIT_COUNT              3
-#define ERASE_WAIT_COUNT                4
-#define INT_MONITOR_CYCLE_COUNT         5
-#define READ_BUSY_PIN_ENABLED           6
-#define MULTIPLANE_OPERATION_SUPPORT    7
-#define PRE_FETCH_MODE                  8
-#define CE_DONT_CARE_SUPPORT            9
-#define COPYBACK_SUPPORT                10
-#define CACHE_WRITE_SUPPORT             11
-#define CACHE_READ_SUPPORT              12
-#define NUM_PAGES_IN_BLOCK              13
-#define ECC_ENABLE_SELECT               14
-#define WRITE_ENABLE_2_READ_ENABLE      15
-#define ADDRESS_2_DATA                  16
-#define READ_ENABLE_2_WRITE_ENABLE      17
-#define TWO_ROW_ADDRESS_CYCLES          18
-#define MULTIPLANE_ADDRESS_RESTRICT     19
-#define ACC_CLOCKS                      20
-#define READ_WRITE_ENABLE_LOW_COUNT     21
-#define READ_WRITE_ENABLE_HIGH_COUNT    22
-
 #define ECC_SECTOR_SIZE     512
 
 struct nand_buf {
-- 
cgit v1.2.3


From 6b2fc9d495aeaad4116827fc21433c85f82a134a Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:03 +0900
Subject: mtd: nand: denali: remove redundant define of BANK(x)

This macro is defined twice in denali.c (around line 98 and
line 651), so remove the second one.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index f993e1378f94..c9806e644a43 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -648,7 +648,6 @@ static irqreturn_t denali_isr(int irq, void *dev_id)
 	spin_unlock(&denali->irq_lock);
 	return result;
 }
-#define BANK(x) ((x) << 24)
 
 static uint32_t wait_for_irq(struct denali_nand_info *denali, uint32_t irq_mask)
 {
-- 
cgit v1.2.3


From 264a7cabb80e8e9f75cee5cecdf0ed45839c4566 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:04 +0900
Subject: mtd: nand: denali: remove more unused struct members

These members are not used at all.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index c4f3a683de93..6573ea59d4ff 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -350,7 +350,6 @@ struct denali_nand_info {
 	struct nand_buf buf;
 	struct device *dev;
 	int total_used_banks;
-	uint32_t block;  /* stored for future use */
 	uint16_t page;
 	void __iomem *flash_reg;  /* Mapped io reg base address */
 	void __iomem *flash_mem;  /* Mapped io reg base address */
@@ -359,7 +358,6 @@ struct denali_nand_info {
 	struct completion complete;
 	spinlock_t irq_lock;
 	uint32_t irq_status;
-	int irq_debug_array[32];
 	int irq;
 
 	uint32_t devnum;	/* represent how many nands connected */
-- 
cgit v1.2.3


From 60ca41f1d9ccf8b57f4ba05d8e8102658a36ef3b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:05 +0900
Subject: mtd: nand: denali: fix comment of denali_nand_info::flash_mem

The same comment "Mapped io reg base address" for flash_reg and
flash_mem probably due to the mistake of copy-paste work.
Of course, the latter is not the register base address.

Reword the comments using the terminology in the Denali User's Guide.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 6573ea59d4ff..403a7c82d1c7 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -351,8 +351,8 @@ struct denali_nand_info {
 	struct device *dev;
 	int total_used_banks;
 	uint16_t page;
-	void __iomem *flash_reg;  /* Mapped io reg base address */
-	void __iomem *flash_mem;  /* Mapped io reg base address */
+	void __iomem *flash_reg;	/* Register Interface */
+	void __iomem *flash_mem;	/* Host Data/Command Interface */
 
 	/* elements used by ISR */
 	struct completion complete;
-- 
cgit v1.2.3


From 1aded58a27f253f399dcd5746417e684c92ccd7d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:06 +0900
Subject: mtd: nand: denali: consolidate INTR_STATUS__* and INTR_EN__* macros

The interrupts are enabled by INTR_EN register, then asserted
interrupts can be observed via INTR_STATUS register.

The bit fields are identical between INTR_EN and INTR_STATUS, so we
can merge the bit field macros.  Likewise for DATA_INTR.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 56 ++++++++++++++++++++-----------------------
 drivers/mtd/nand/denali.h | 61 ++++++++++++++---------------------------------
 2 files changed, 44 insertions(+), 73 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index c9806e644a43..2c59eb3c117e 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -45,16 +45,16 @@ MODULE_PARM_DESC(onfi_timing_mode,
  * We define a macro here that combines all interrupts this driver uses into
  * a single constant value, for convenience.
  */
-#define DENALI_IRQ_ALL	(INTR_STATUS__DMA_CMD_COMP | \
-			INTR_STATUS__ECC_TRANSACTION_DONE | \
-			INTR_STATUS__ECC_ERR | \
-			INTR_STATUS__PROGRAM_FAIL | \
-			INTR_STATUS__LOAD_COMP | \
-			INTR_STATUS__PROGRAM_COMP | \
-			INTR_STATUS__TIME_OUT | \
-			INTR_STATUS__ERASE_FAIL | \
-			INTR_STATUS__RST_COMP | \
-			INTR_STATUS__ERASE_COMP)
+#define DENALI_IRQ_ALL	(INTR__DMA_CMD_COMP | \
+			INTR__ECC_TRANSACTION_DONE | \
+			INTR__ECC_ERR | \
+			INTR__PROGRAM_FAIL | \
+			INTR__LOAD_COMP | \
+			INTR__PROGRAM_COMP | \
+			INTR__TIME_OUT | \
+			INTR__ERASE_FAIL | \
+			INTR__RST_COMP | \
+			INTR__ERASE_COMP)
 
 /*
  * indicates whether or not the internal value for the flash bank is
@@ -159,7 +159,7 @@ static void read_status(struct denali_nand_info *denali)
 static void reset_bank(struct denali_nand_info *denali)
 {
 	uint32_t irq_status;
-	uint32_t irq_mask = INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT;
+	uint32_t irq_mask = INTR__RST_COMP | INTR__TIME_OUT;
 
 	clear_interrupts(denali);
 
@@ -167,7 +167,7 @@ static void reset_bank(struct denali_nand_info *denali)
 
 	irq_status = wait_for_irq(denali, irq_mask);
 
-	if (irq_status & INTR_STATUS__TIME_OUT)
+	if (irq_status & INTR__TIME_OUT)
 		dev_err(denali->dev, "reset bank failed.\n");
 }
 
@@ -177,22 +177,22 @@ static uint16_t denali_nand_reset(struct denali_nand_info *denali)
 	int i;
 
 	for (i = 0; i < denali->max_banks; i++)
-		iowrite32(INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT,
+		iowrite32(INTR__RST_COMP | INTR__TIME_OUT,
 		denali->flash_reg + INTR_STATUS(i));
 
 	for (i = 0; i < denali->max_banks; i++) {
 		iowrite32(1 << i, denali->flash_reg + DEVICE_RESET);
 		while (!(ioread32(denali->flash_reg + INTR_STATUS(i)) &
-			(INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT)))
+			(INTR__RST_COMP | INTR__TIME_OUT)))
 			cpu_relax();
 		if (ioread32(denali->flash_reg + INTR_STATUS(i)) &
-			INTR_STATUS__TIME_OUT)
+			INTR__TIME_OUT)
 			dev_dbg(denali->dev,
 			"NAND Reset operation timed out on bank %d\n", i);
 	}
 
 	for (i = 0; i < denali->max_banks; i++)
-		iowrite32(INTR_STATUS__RST_COMP | INTR_STATUS__TIME_OUT,
+		iowrite32(INTR__RST_COMP | INTR__TIME_OUT,
 			  denali->flash_reg + INTR_STATUS(i));
 
 	return PASS;
@@ -716,7 +716,7 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali,
 	uint32_t addr, cmd, irq_status, irq_mask;
 
 	if (op == DENALI_READ)
-		irq_mask = INTR_STATUS__LOAD_COMP;
+		irq_mask = INTR__LOAD_COMP;
 	else if (op == DENALI_WRITE)
 		irq_mask = 0;
 	else
@@ -823,8 +823,7 @@ static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	uint32_t irq_status;
-	uint32_t irq_mask = INTR_STATUS__PROGRAM_COMP |
-						INTR_STATUS__PROGRAM_FAIL;
+	uint32_t irq_mask = INTR__PROGRAM_COMP | INTR__PROGRAM_FAIL;
 	int status = 0;
 
 	denali->page = page;
@@ -851,7 +850,7 @@ static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t irq_mask = INTR_STATUS__LOAD_COMP;
+	uint32_t irq_mask = INTR__LOAD_COMP;
 	uint32_t irq_status, addr, cmd;
 
 	denali->page = page;
@@ -912,7 +911,7 @@ static bool handle_ecc(struct denali_nand_info *denali, uint8_t *buf,
 	bool check_erased_page = false;
 	unsigned int bitflips = 0;
 
-	if (irq_status & INTR_STATUS__ECC_ERR) {
+	if (irq_status & INTR__ECC_ERR) {
 		/* read the ECC errors. we'll ignore them for now */
 		uint32_t err_address, err_correction_info, err_byte,
 			 err_sector, err_device, err_correction_value;
@@ -969,7 +968,7 @@ static bool handle_ecc(struct denali_nand_info *denali, uint8_t *buf,
 		 * for a while for this interrupt
 		 */
 		while (!(read_interrupt_status(denali) &
-				INTR_STATUS__ECC_TRANSACTION_DONE))
+				INTR__ECC_TRANSACTION_DONE))
 			cpu_relax();
 		clear_interrupts(denali);
 		denali_set_intr_modes(denali, true);
@@ -1020,8 +1019,7 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	dma_addr_t addr = denali->buf.dma_buf;
 	size_t size = mtd->writesize + mtd->oobsize;
 	uint32_t irq_status;
-	uint32_t irq_mask = INTR_STATUS__DMA_CMD_COMP |
-						INTR_STATUS__PROGRAM_FAIL;
+	uint32_t irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
 
 	/*
 	 * if it is a raw xfer, we want to disable ecc and send the spare area.
@@ -1119,8 +1117,7 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	size_t size = mtd->writesize + mtd->oobsize;
 
 	uint32_t irq_status;
-	uint32_t irq_mask = INTR_STATUS__ECC_TRANSACTION_DONE |
-			    INTR_STATUS__ECC_ERR;
+	uint32_t irq_mask = INTR__ECC_TRANSACTION_DONE | INTR__ECC_ERR;
 	bool check_erased_page = false;
 
 	if (page != denali->page) {
@@ -1168,7 +1165,7 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	dma_addr_t addr = denali->buf.dma_buf;
 	size_t size = mtd->writesize + mtd->oobsize;
-	uint32_t irq_mask = INTR_STATUS__DMA_CMD_COMP;
+	uint32_t irq_mask = INTR__DMA_CMD_COMP;
 
 	if (page != denali->page) {
 		dev_err(denali->dev,
@@ -1241,10 +1238,9 @@ static int denali_erase(struct mtd_info *mtd, int page)
 	index_addr(denali, cmd, 0x1);
 
 	/* wait for erase to complete or failure to occur */
-	irq_status = wait_for_irq(denali, INTR_STATUS__ERASE_COMP |
-					INTR_STATUS__ERASE_FAIL);
+	irq_status = wait_for_irq(denali, INTR__ERASE_COMP | INTR__ERASE_FAIL);
 
-	return irq_status & INTR_STATUS__ERASE_FAIL ? NAND_STATUS_FAIL : PASS;
+	return irq_status & INTR__ERASE_FAIL ? NAND_STATUS_FAIL : PASS;
 }
 
 static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 403a7c82d1c7..8df2285b678b 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -218,40 +218,22 @@
 
 #define INTR_STATUS(__bank)	(0x410 + ((__bank) * 0x50))
 #define INTR_EN(__bank)		(0x420 + ((__bank) * 0x50))
-
-#define     INTR_STATUS__ECC_TRANSACTION_DONE		0x0001
-#define     INTR_STATUS__ECC_ERR			0x0002
-#define     INTR_STATUS__DMA_CMD_COMP			0x0004
-#define     INTR_STATUS__TIME_OUT			0x0008
-#define     INTR_STATUS__PROGRAM_FAIL			0x0010
-#define     INTR_STATUS__ERASE_FAIL			0x0020
-#define     INTR_STATUS__LOAD_COMP			0x0040
-#define     INTR_STATUS__PROGRAM_COMP			0x0080
-#define     INTR_STATUS__ERASE_COMP			0x0100
-#define     INTR_STATUS__PIPE_CPYBCK_CMD_COMP		0x0200
-#define     INTR_STATUS__LOCKED_BLK			0x0400
-#define     INTR_STATUS__UNSUP_CMD			0x0800
-#define     INTR_STATUS__INT_ACT			0x1000
-#define     INTR_STATUS__RST_COMP			0x2000
-#define     INTR_STATUS__PIPE_CMD_ERR			0x4000
-#define     INTR_STATUS__PAGE_XFER_INC			0x8000
-
-#define     INTR_EN__ECC_TRANSACTION_DONE		0x0001
-#define     INTR_EN__ECC_ERR				0x0002
-#define     INTR_EN__DMA_CMD_COMP			0x0004
-#define     INTR_EN__TIME_OUT				0x0008
-#define     INTR_EN__PROGRAM_FAIL			0x0010
-#define     INTR_EN__ERASE_FAIL				0x0020
-#define     INTR_EN__LOAD_COMP				0x0040
-#define     INTR_EN__PROGRAM_COMP			0x0080
-#define     INTR_EN__ERASE_COMP				0x0100
-#define     INTR_EN__PIPE_CPYBCK_CMD_COMP		0x0200
-#define     INTR_EN__LOCKED_BLK				0x0400
-#define     INTR_EN__UNSUP_CMD				0x0800
-#define     INTR_EN__INT_ACT				0x1000
-#define     INTR_EN__RST_COMP				0x2000
-#define     INTR_EN__PIPE_CMD_ERR			0x4000
-#define     INTR_EN__PAGE_XFER_INC			0x8000
+#define     INTR__ECC_TRANSACTION_DONE			0x0001
+#define     INTR__ECC_ERR				0x0002
+#define     INTR__DMA_CMD_COMP				0x0004
+#define     INTR__TIME_OUT				0x0008
+#define     INTR__PROGRAM_FAIL				0x0010
+#define     INTR__ERASE_FAIL				0x0020
+#define     INTR__LOAD_COMP				0x0040
+#define     INTR__PROGRAM_COMP				0x0080
+#define     INTR__ERASE_COMP				0x0100
+#define     INTR__PIPE_CPYBCK_CMD_COMP			0x0200
+#define     INTR__LOCKED_BLK				0x0400
+#define     INTR__UNSUP_CMD				0x0800
+#define     INTR__INT_ACT				0x1000
+#define     INTR__RST_COMP				0x2000
+#define     INTR__PIPE_CMD_ERR				0x4000
+#define     INTR__PAGE_XFER_INC				0x8000
 
 #define PAGE_CNT(__bank)	(0x430 + ((__bank) * 0x50))
 #define ERR_PAGE_ADDR(__bank)	(0x440 + ((__bank) * 0x50))
@@ -284,20 +266,13 @@
 #define     IGNORE_ECC_DONE__FLAG			0x0001
 
 #define DMA_INTR				0x720
+#define DMA_INTR_EN				0x730
 #define     DMA_INTR__TARGET_ERROR			0x0001
 #define     DMA_INTR__DESC_COMP_CHANNEL0		0x0002
 #define     DMA_INTR__DESC_COMP_CHANNEL1		0x0004
 #define     DMA_INTR__DESC_COMP_CHANNEL2		0x0008
 #define     DMA_INTR__DESC_COMP_CHANNEL3		0x0010
-#define     DMA_INTR__MEMCOPY_DESC_COMP		0x0020
-
-#define DMA_INTR_EN				0x730
-#define     DMA_INTR_EN__TARGET_ERROR			0x0001
-#define     DMA_INTR_EN__DESC_COMP_CHANNEL0		0x0002
-#define     DMA_INTR_EN__DESC_COMP_CHANNEL1		0x0004
-#define     DMA_INTR_EN__DESC_COMP_CHANNEL2		0x0008
-#define     DMA_INTR_EN__DESC_COMP_CHANNEL3		0x0010
-#define     DMA_INTR_EN__MEMCOPY_DESC_COMP		0x0020
+#define     DMA_INTR__MEMCOPY_DESC_COMP			0x0020
 
 #define TARGET_ERR_ADDR_LO			0x740
 #define     TARGET_ERR_ADDR_LO__VALUE			0xffff
-- 
cgit v1.2.3


From be72a4aa8ec1c13a55708bd0285106a5020ff72f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:07 +0900
Subject: mtd: nand: denali: introduce capability flag

The Denali NAND controller IP has various customizable features.
SoC vendors can choose desired functions when a delivery RTL is
created.  It means there are several variants for this IP.  For
example, the Intel version is equipped with 32bit DMA, whereas the
IP for UniPhier SoC family with 64bit DMA.

This driver was originally written for some Intel platforms with
Intel specific things hard-coded.  What is worse, the revision
register of this IP does not work to distinguish such features.
We need to do something to make the driver available for other SoCs.

Let's introduce a caps member to the denali_nand_info structure to
switch on/off various features.  Also, add struct denali_dt_data to
store the capability associated with compatible string.

Boris suggested this approach in discussion [1] instead of a new DT
property for every feature.

[1] https://lkml.org/lkml/2016/3/29/142

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.h    |  1 +
 drivers/mtd/nand/denali_dt.c | 18 +++++++++---------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 8df2285b678b..eed001dde3b4 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -338,6 +338,7 @@ struct denali_nand_info {
 	uint32_t devnum;	/* represent how many nands connected */
 	uint32_t bbtskipbytes;
 	uint32_t max_banks;
+	unsigned int caps;
 };
 
 extern int denali_init(struct denali_nand_info *denali);
diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index 5607fcd3b8ed..293ddb8e5a32 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -29,6 +29,10 @@ struct denali_dt {
 	struct clk		*clk;
 };
 
+struct denali_dt_data {
+	unsigned int caps;
+};
+
 static const struct of_device_id denali_nand_dt_ids[] = {
 		{ .compatible = "denali,denali-nand-dt" },
 		{ /* sentinel */ }
@@ -42,23 +46,19 @@ static int denali_dt_probe(struct platform_device *ofdev)
 {
 	struct resource *denali_reg, *nand_data;
 	struct denali_dt *dt;
+	const struct denali_dt_data *data;
 	struct denali_nand_info *denali;
 	int ret;
-	const struct of_device_id *of_id;
-
-	of_id = of_match_device(denali_nand_dt_ids, &ofdev->dev);
-	if (of_id) {
-		ofdev->id_entry = of_id->data;
-	} else {
-		pr_err("Failed to find the right device id.\n");
-		return -ENOMEM;
-	}
 
 	dt = devm_kzalloc(&ofdev->dev, sizeof(*dt), GFP_KERNEL);
 	if (!dt)
 		return -ENOMEM;
 	denali = &dt->denali;
 
+	data = of_device_get_match_data(&ofdev->dev);
+	if (data)
+		denali->caps = data->caps;
+
 	denali->platform = DT;
 	denali->dev = &ofdev->dev;
 	denali->irq = platform_get_irq(ofdev, 0);
-- 
cgit v1.2.3


From e30b46909bdf3ac8699a47cfd64b44fabc29f253 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:08 +0900
Subject: mtd: nand: denali: use int where no reason to use fixed width
 variable

The page number is generally stored in an integer type variable.
The uint16_t does not have enough width.  I see no reason to use
uint32_t for other members, either.  Just use int.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index eed001dde3b4..7b2d7851ed7b 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -325,7 +325,7 @@ struct denali_nand_info {
 	struct nand_buf buf;
 	struct device *dev;
 	int total_used_banks;
-	uint16_t page;
+	int page;
 	void __iomem *flash_reg;	/* Register Interface */
 	void __iomem *flash_mem;	/* Host Data/Command Interface */
 
@@ -335,9 +335,9 @@ struct denali_nand_info {
 	uint32_t irq_status;
 	int irq;
 
-	uint32_t devnum;	/* represent how many nands connected */
-	uint32_t bbtskipbytes;
-	uint32_t max_banks;
+	int devnum;	/* represent how many nands connected */
+	int bbtskipbytes;
+	int max_banks;
 	unsigned int caps;
 };
 
-- 
cgit v1.2.3


From 3158fa0e739615769cc047d2428f30f4c3b6640e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 09:17:49 +0900
Subject: mtd: nand: do not check R/B# for CMD_READID in nand_command(_lp)

Read ID (0x90) command does not toggle the R/B# pin.  Without this
patch, NAND_CMD_READID falls into the default: label, then R/B# is
checked by chip->dev_ready().

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index afbb80f4375a..b7daedee23b5 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -668,6 +668,7 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
 	case NAND_CMD_ERASE2:
 	case NAND_CMD_SEQIN:
 	case NAND_CMD_STATUS:
+	case NAND_CMD_READID:
 		return;
 
 	case NAND_CMD_RESET:
@@ -786,6 +787,7 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 	case NAND_CMD_ERASE2:
 	case NAND_CMD_SEQIN:
 	case NAND_CMD_STATUS:
+	case NAND_CMD_READID:
 		return;
 
 	case NAND_CMD_RNDIN:
-- 
cgit v1.2.3


From c5d664aa5a4c4b257a54eb35045031630d105f49 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 09:17:50 +0900
Subject: mtd: nand: do not check R/B# for CMD_SET_FEATURES in
 nand_command(_lp)

Set Features (0xEF) command toggles the R/B# pin after 4 sub feature
parameters are written.

Currently, nand_command(_lp) calls chip->dev_ready immediately after
the address cycle because NAND_CMD_SET_FEATURES falls into default:
label.  No wait is needed at this point.

If you see nand_onfi_set_features(), R/B# is already cared by the
chip->waitfunc call.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b7daedee23b5..c4c3386e4c0a 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -669,6 +669,7 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
 	case NAND_CMD_SEQIN:
 	case NAND_CMD_STATUS:
 	case NAND_CMD_READID:
+	case NAND_CMD_SET_FEATURES:
 		return;
 
 	case NAND_CMD_RESET:
@@ -788,6 +789,7 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 	case NAND_CMD_SEQIN:
 	case NAND_CMD_STATUS:
 	case NAND_CMD_READID:
+	case NAND_CMD_SET_FEATURES:
 		return;
 
 	case NAND_CMD_RNDIN:
-- 
cgit v1.2.3


From e75276638c1423d286e425fd29375e5736c7635c Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 24 Mar 2017 10:18:44 +0100
Subject: iommu/exynos: Don't open-code loop unrolling

IOMMU domain allocation is not performance critical operation, so remove
hand made optimisation of unrolled initialization loop and leave this to
the compiler.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/exynos-iommu.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index a7e0821c9967..b83df7196e76 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -741,16 +741,8 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
 		goto err_counter;
 
 	/* Workaround for System MMU v3.3 to prevent caching 1MiB mapping */
-	for (i = 0; i < NUM_LV1ENTRIES; i += 8) {
-		domain->pgtable[i + 0] = ZERO_LV2LINK;
-		domain->pgtable[i + 1] = ZERO_LV2LINK;
-		domain->pgtable[i + 2] = ZERO_LV2LINK;
-		domain->pgtable[i + 3] = ZERO_LV2LINK;
-		domain->pgtable[i + 4] = ZERO_LV2LINK;
-		domain->pgtable[i + 5] = ZERO_LV2LINK;
-		domain->pgtable[i + 6] = ZERO_LV2LINK;
-		domain->pgtable[i + 7] = ZERO_LV2LINK;
-	}
+	for (i = 0; i < NUM_LV1ENTRIES; i++)
+		domain->pgtable[i] = ZERO_LV2LINK;
 
 	handle = dma_map_single(dma_dev, domain->pgtable, LV1TABLE_SIZE,
 				DMA_TO_DEVICE);
-- 
cgit v1.2.3


From d5bf739dc7628a35b334cdb9058750388927760a Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 24 Mar 2017 10:19:01 +0100
Subject: iommu/exynos: Use smarter TLB flush method for v5 SYSMMU

SYSMMU v5 has dedicated registers to perform TLB flush range operation,
so use them instead of looping with FLUSH_ENTRY command.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/exynos-iommu.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index b83df7196e76..88b26d3e8c2c 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -171,6 +171,9 @@ static u32 lv2ent_offset(sysmmu_iova_t iova)
 #define REG_V5_PT_BASE_PFN	0x00C
 #define REG_V5_MMU_FLUSH_ALL	0x010
 #define REG_V5_MMU_FLUSH_ENTRY	0x014
+#define REG_V5_MMU_FLUSH_RANGE	0x018
+#define REG_V5_MMU_FLUSH_START	0x020
+#define REG_V5_MMU_FLUSH_END	0x024
 #define REG_V5_INT_STATUS	0x060
 #define REG_V5_INT_CLEAR	0x064
 #define REG_V5_FAULT_AR_VA	0x070
@@ -319,14 +322,23 @@ static void __sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
 {
 	unsigned int i;
 
-	for (i = 0; i < num_inv; i++) {
-		if (MMU_MAJ_VER(data->version) < 5)
+	if (MMU_MAJ_VER(data->version) < 5) {
+		for (i = 0; i < num_inv; i++) {
 			writel((iova & SPAGE_MASK) | 1,
 				     data->sfrbase + REG_MMU_FLUSH_ENTRY);
-		else
+			iova += SPAGE_SIZE;
+		}
+	} else {
+		if (num_inv == 1) {
 			writel((iova & SPAGE_MASK) | 1,
 				     data->sfrbase + REG_V5_MMU_FLUSH_ENTRY);
-		iova += SPAGE_SIZE;
+		} else {
+			writel((iova & SPAGE_MASK),
+				     data->sfrbase + REG_V5_MMU_FLUSH_START);
+			writel((iova & SPAGE_MASK) + (num_inv - 1) * SPAGE_SIZE,
+				     data->sfrbase + REG_V5_MMU_FLUSH_END);
+			writel(1, data->sfrbase + REG_V5_MMU_FLUSH_RANGE);
+		}
 	}
 }
 
-- 
cgit v1.2.3


From 1175f83cdb7a321b8b7b061d18846d58490b2654 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 22 Mar 2017 04:22:36 +0000
Subject: dmaengine: rcar-dmac: enable descriptor mode on 40bit

SYS-DMAC can use 40bit address transfer, and it supports Descriptor
Mode too. Current SYS-DMAC driver disables Descriptor Mode if it was
40bit address today. But it can use Descriptor Mode with 40bit if
transfer Source/Destination address are located in same 4GiB region
in the 40 bit address space.
This patch enables it if all condition was clear

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/sh/rcar-dmac.c | 52 ++++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 48b22d5c8602..db41795fe42a 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -344,13 +344,19 @@ static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
 		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
 
 	if (desc->hwdescs.use) {
-		struct rcar_dmac_xfer_chunk *chunk;
+		struct rcar_dmac_xfer_chunk *chunk =
+			list_first_entry(&desc->chunks,
+					 struct rcar_dmac_xfer_chunk, node);
 
 		dev_dbg(chan->chan.device->dev,
 			"chan%u: queue desc %p: %u@%pad\n",
 			chan->index, desc, desc->nchunks, &desc->hwdescs.dma);
 
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
+				     chunk->src_addr >> 32);
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
+				     chunk->dst_addr >> 32);
 		rcar_dmac_chan_write(chan, RCAR_DMAFIXDPBASE,
 				     desc->hwdescs.dma >> 32);
 #endif
@@ -368,8 +374,6 @@ static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
 		 * should. Initialize it manually with the destination address
 		 * of the first chunk.
 		 */
-		chunk = list_first_entry(&desc->chunks,
-					 struct rcar_dmac_xfer_chunk, node);
 		rcar_dmac_chan_write(chan, RCAR_DMADAR,
 				     chunk->dst_addr & 0xffffffff);
 
@@ -855,8 +859,12 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 	unsigned int nchunks = 0;
 	unsigned int max_chunk_size;
 	unsigned int full_size = 0;
-	bool highmem = false;
+	bool cross_boundary = false;
 	unsigned int i;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	u32 high_dev_addr;
+	u32 high_mem_addr;
+#endif
 
 	desc = rcar_dmac_desc_get(chan);
 	if (!desc)
@@ -882,6 +890,16 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 
 		full_size += len;
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		if (i == 0) {
+			high_dev_addr = dev_addr >> 32;
+			high_mem_addr = mem_addr >> 32;
+		}
+
+		if ((dev_addr >> 32 != high_dev_addr) ||
+		    (mem_addr >> 32 != high_mem_addr))
+			cross_boundary = true;
+#endif
 		while (len) {
 			unsigned int size = min(len, max_chunk_size);
 
@@ -890,18 +908,14 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 			 * Prevent individual transfers from crossing 4GB
 			 * boundaries.
 			 */
-			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)
+			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32) {
 				size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
-			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32)
+				cross_boundary = true;
+			}
+			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32) {
 				size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;
-
-			/*
-			 * Check if either of the source or destination address
-			 * can't be expressed in 32 bits. If so we can't use
-			 * hardware descriptor lists.
-			 */
-			if (dev_addr >> 32 || mem_addr >> 32)
-				highmem = true;
+				cross_boundary = true;
+			}
 #endif
 
 			chunk = rcar_dmac_xfer_chunk_get(chan);
@@ -943,13 +957,11 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 	 * Use hardware descriptor lists if possible when more than one chunk
 	 * needs to be transferred (otherwise they don't make much sense).
 	 *
-	 * The highmem check currently covers the whole transfer. As an
-	 * optimization we could use descriptor lists for consecutive lowmem
-	 * chunks and direct manual mode for highmem chunks. Whether the
-	 * performance improvement would be significant enough compared to the
-	 * additional complexity remains to be investigated.
+	 * Source/Destination address should be located in same 4GiB region
+	 * in the 40bit address space when it uses Hardware descriptor,
+	 * and cross_boundary is checking it.
 	 */
-	desc->hwdescs.use = !highmem && nchunks > 1;
+	desc->hwdescs.use = !cross_boundary && nchunks > 1;
 	if (desc->hwdescs.use) {
 		if (rcar_dmac_fill_hwdesc(chan, desc) < 0)
 			desc->hwdescs.use = false;
-- 
cgit v1.2.3


From c3a4528788cf09d709217f21be4b0fc38ada17d0 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sat, 25 Mar 2017 19:04:02 -0400
Subject: dmaengine: qcom_hidma: disable/enable IRQs on pause/resume

Once the channels are stopped, disable interrupts to make sure no new
HW interaction can happen.

Similarly, re-enable the interrupts only if we know that channel is
operational again.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/qcom/hidma_ll.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c
index 6645bdf0d151..1530a661518d 100644
--- a/drivers/dma/qcom/hidma_ll.c
+++ b/drivers/dma/qcom/hidma_ll.c
@@ -499,6 +499,9 @@ int hidma_ll_enable(struct hidma_lldev *lldev)
 	lldev->trch_state = HIDMA_CH_ENABLED;
 	lldev->evch_state = HIDMA_CH_ENABLED;
 
+	/* enable irqs */
+	writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
 	return 0;
 }
 
@@ -596,6 +599,9 @@ int hidma_ll_disable(struct hidma_lldev *lldev)
 
 	lldev->trch_state = HIDMA_CH_SUSPENDED;
 	lldev->evch_state = HIDMA_CH_SUSPENDED;
+
+	/* disable interrupts */
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
 	return 0;
 }
 
-- 
cgit v1.2.3


From dc7c733acb04307563da357af6118b12948c3f50 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sat, 25 Mar 2017 19:04:03 -0400
Subject: dmaengine: qcom_hidma: pause the channel on shutdown

We need to ensure that all DMAs and interrupts are cleared during
shutdown operation in order for kexec to start the next kernel clearly.

Otherwise, HW could be performing a DMA into random addresses in the
middle of second kernel start.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/qcom/hidma.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c
index 3c982c96b4b7..5072a7d306d4 100644
--- a/drivers/dma/qcom/hidma.c
+++ b/drivers/dma/qcom/hidma.c
@@ -865,6 +865,20 @@ bailout:
 	return rc;
 }
 
+static void hidma_shutdown(struct platform_device *pdev)
+{
+	struct hidma_dev *dmadev = platform_get_drvdata(pdev);
+
+	dev_info(dmadev->ddev.dev, "HI-DMA engine shutdown\n");
+
+	pm_runtime_get_sync(dmadev->ddev.dev);
+	if (hidma_ll_disable(dmadev->lldev))
+		dev_warn(dmadev->ddev.dev, "channel did not stop\n");
+	pm_runtime_mark_last_busy(dmadev->ddev.dev);
+	pm_runtime_put_autosuspend(dmadev->ddev.dev);
+
+}
+
 static int hidma_remove(struct platform_device *pdev)
 {
 	struct hidma_dev *dmadev = platform_get_drvdata(pdev);
@@ -908,6 +922,7 @@ MODULE_DEVICE_TABLE(of, hidma_match);
 static struct platform_driver hidma_driver = {
 	.probe = hidma_probe,
 	.remove = hidma_remove,
+	.shutdown = hidma_shutdown,
 	.driver = {
 		   .name = "hidma",
 		   .of_match_table = hidma_match,
-- 
cgit v1.2.3


From a08f2673dde1aaba22c79ed016d5901f1b042401 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 16 Mar 2017 16:18:44 +0200
Subject: dmaengine: cppi41: fix cppi41_dma_tx_status() logic

It makes sense to set residue when channel is in progress. Otherwise it
should be 0 since transfer is completed. Meanwhile this patch doesn't
prevent to set residue value anyway.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 675de6a27e39..a583c644a7bc 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -420,11 +420,9 @@ static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan,
 	struct cppi41_channel *c = to_cpp41_chan(chan);
 	enum dma_status ret;
 
-	/* lock */
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (txstate && ret == DMA_COMPLETE)
-		txstate->residue = c->residue;
-	/* unlock */
+
+	dma_set_residue(txstate, c->residue);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 8e3ba95f41901e0da0bb3151ed696dca6f7ec511 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 16 Mar 2017 16:18:45 +0200
Subject: dmaengine: cppi41: use managed functions devm_*()

This makes the error handling much more simpler than open-coding
everything and in addition makes the probe function smaller an tidier.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 64 ++++++++++++++++++++--------------------------------
 1 file changed, 25 insertions(+), 39 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index a583c644a7bc..1c63e1cf8c59 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -739,21 +739,9 @@ static int cppi41_stop_chan(struct dma_chan *chan)
 	return 0;
 }
 
-static void cleanup_chans(struct cppi41_dd *cdd)
-{
-	while (!list_empty(&cdd->ddev.channels)) {
-		struct cppi41_channel *cchan;
-
-		cchan = list_first_entry(&cdd->ddev.channels,
-				struct cppi41_channel, chan.device_node);
-		list_del(&cchan->chan.device_node);
-		kfree(cchan);
-	}
-}
-
 static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
 {
-	struct cppi41_channel *cchan;
+	struct cppi41_channel *cchan, *chans;
 	int i;
 	u32 n_chans = cdd->n_chans;
 
@@ -763,10 +751,12 @@ static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
 	 */
 	n_chans *= 2;
 
+	chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL);
+	if (!chans)
+		return -ENOMEM;
+
 	for (i = 0; i < n_chans; i++) {
-		cchan = kzalloc(sizeof(*cchan), GFP_KERNEL);
-		if (!cchan)
-			goto err;
+		cchan = &chans[i];
 
 		cchan->cdd = cdd;
 		if (i & 1) {
@@ -786,9 +776,6 @@ static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
 	cdd->first_td_desc = n_chans;
 
 	return 0;
-err:
-	cleanup_chans(cdd);
-	return -ENOMEM;
 }
 
 static void purge_descs(struct device *dev, struct cppi41_dd *cdd)
@@ -1018,6 +1005,7 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	struct cppi41_dd *cdd;
 	struct device *dev = &pdev->dev;
 	const struct cppi_glue_infos *glue_info;
+	struct resource *mem;
 	int index;
 	int irq;
 	int ret;
@@ -1050,18 +1038,26 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	if (index < 0)
 		return index;
 
-	cdd->ctrl_mem = of_iomap(dev->of_node, index);
-	cdd->sched_mem = of_iomap(dev->of_node, index + 1);
-	cdd->qmgr_mem = of_iomap(dev->of_node, index + 2);
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	cdd->ctrl_mem = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(cdd->ctrl_mem))
+		return PTR_ERR(cdd->ctrl_mem);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	cdd->sched_mem = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(cdd->sched_mem))
+		return PTR_ERR(cdd->sched_mem);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	cdd->qmrg_mem = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(cdd->qmrg_mem))
+		return PTR_ERR(cdd->qmrg_mem);
+
 	spin_lock_init(&cdd->lock);
 	INIT_LIST_HEAD(&cdd->pending);
 
 	platform_set_drvdata(pdev, cdd);
 
-	if (!cdd->ctrl_mem || !cdd->sched_mem ||
-			!cdd->qmgr_mem)
-		return -ENXIO;
-
 	pm_runtime_enable(dev);
 	pm_runtime_set_autosuspend_delay(dev, 100);
 	pm_runtime_use_autosuspend(dev);
@@ -1091,18 +1087,18 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	irq = irq_of_parse_and_map(dev->of_node, 0);
 	if (!irq) {
 		ret = -EINVAL;
-		goto err_irq;
+		goto err_chans;
 	}
 
 	ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED,
 			dev_name(dev), cdd);
 	if (ret)
-		goto err_irq;
+		goto err_chans;
 	cdd->irq = irq;
 
 	ret = dma_async_device_register(&cdd->ddev);
 	if (ret)
-		goto err_dma_reg;
+		goto err_chans;
 
 	ret = of_dma_controller_register(dev->of_node,
 			cppi41_dma_xlate, &cpp41_dma_info);
@@ -1115,9 +1111,6 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	return 0;
 err_of:
 	dma_async_device_unregister(&cdd->ddev);
-err_dma_reg:
-err_irq:
-	cleanup_chans(cdd);
 err_chans:
 	deinit_cppi41(dev, cdd);
 err_init_cppi:
@@ -1126,9 +1119,6 @@ err_get_n_chans:
 err_get_sync:
 	pm_runtime_put_sync(dev);
 	pm_runtime_disable(dev);
-	iounmap(cdd->ctrl_mem);
-	iounmap(cdd->sched_mem);
-	iounmap(cdd->qmgr_mem);
 	return ret;
 }
 
@@ -1145,11 +1135,7 @@ static int cppi41_dma_remove(struct platform_device *pdev)
 	dma_async_device_unregister(&cdd->ddev);
 
 	devm_free_irq(&pdev->dev, cdd->irq, cdd);
-	cleanup_chans(cdd);
 	deinit_cppi41(&pdev->dev, cdd);
-	iounmap(cdd->ctrl_mem);
-	iounmap(cdd->sched_mem);
-	iounmap(cdd->qmgr_mem);
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-- 
cgit v1.2.3


From 7f3ff14b7eb1ffad132117f08a1973b48e653d43 Mon Sep 17 00:00:00 2001
From: Jiada Wang <jiada_wang@mentor.com>
Date: Thu, 16 Mar 2017 23:12:09 -0700
Subject: dmaengine: imx-sdma: add 1ms delay to ensure SDMA channel is stopped

sdma_disable_channel() cannot ensure dma is stopped to access
module's FIFOs. There is chance SDMA core is running and accessing
BD when disable of corresponding channel, this may cause sometimes
even after call of .sdma_disable_channel(), SDMA core still be
running and accessing module's FIFOs.

According to NXP R&D team a delay of one BD SDMA cost time (maximum
is 1ms) should be added after disable of the channel bit, to ensure
SDMA core has really been stopped after SDMA clients call
.device_terminate_all.

This patch introduces adds a new function sdma_disable_channel_with_delay()
which simply adds 1ms delay after call sdma_disable_channel(),
and set it as .device_terminate_all.

Signed-off-by: Jiada Wang <jiada_wang@mentor.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/imx-sdma.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 46359c4be22d..085993cb2ccc 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -937,6 +937,21 @@ static int sdma_disable_channel(struct dma_chan *chan)
 	return 0;
 }
 
+static int sdma_disable_channel_with_delay(struct dma_chan *chan)
+{
+	sdma_disable_channel(chan);
+
+	/*
+	 * According to NXP R&D team a delay of one BD SDMA cost time
+	 * (maximum is 1ms) should be added after disable of the channel
+	 * bit, to ensure SDMA core has really been stopped after SDMA
+	 * clients call .device_terminate_all.
+	 */
+	mdelay(1);
+
+	return 0;
+}
+
 static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac)
 {
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1828,7 +1843,7 @@ static int sdma_probe(struct platform_device *pdev)
 	sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg;
 	sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic;
 	sdma->dma_device.device_config = sdma_config;
-	sdma->dma_device.device_terminate_all = sdma_disable_channel;
+	sdma->dma_device.device_terminate_all = sdma_disable_channel_with_delay;
 	sdma->dma_device.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	sdma->dma_device.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	sdma->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-- 
cgit v1.2.3


From d7fd724e01939f777781a86d078c3b7142fb9c03 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Mon, 27 Mar 2017 13:33:31 +0530
Subject: dmaengine: cppi: fix build error due to bad variable

Commit 8e3ba95f4190 ("dmaengine: cppi41: use managed functions devm_*()")
moved the code to devm_* but erranously changed a varible name, so fix it.

drivers/dma/cppi41.c:1052:5: error: 'struct cppi41_dd' has no member named 'qmrg_mem'
  cdd->qmrg_mem = devm_ioremap_resource(dev, mem);
     ^
drivers/dma/cppi41.c:1053:16: error: 'struct cppi41_dd' has no member named 'qmrg_mem'
  if (IS_ERR(cdd->qmrg_mem))
                ^
drivers/dma/cppi41.c:1054:21: error: 'struct cppi41_dd' has no member named 'qmrg_mem'
   return PTR_ERR(cdd->qmrg_mem);
                     ^

Fixes: 8e3ba95f4190 ("dmaengine: cppi41: use managed functions devm_*()")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 1c63e1cf8c59..0be56c971786 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -1049,9 +1049,9 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 		return PTR_ERR(cdd->sched_mem);
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	cdd->qmrg_mem = devm_ioremap_resource(dev, mem);
-	if (IS_ERR(cdd->qmrg_mem))
-		return PTR_ERR(cdd->qmrg_mem);
+	cdd->qmgr_mem = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(cdd->qmgr_mem))
+		return PTR_ERR(cdd->qmgr_mem);
 
 	spin_lock_init(&cdd->lock);
 	INIT_LIST_HEAD(&cdd->pending);
-- 
cgit v1.2.3


From 1394a7265f88e1fbd001048113f28fb090e5eff5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:17 +0900
Subject: mtd: nand: denali: use nand_chip to hold frequently accessed data

The denali_init() needs to setup a bunch of parameters of nand_chip.
Replace denali->nand.(member) with chip->(member) for shorter code.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 68 +++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 2c59eb3c117e..4b907a5bdb25 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1405,7 +1405,8 @@ static void denali_drv_init(struct denali_nand_info *denali)
 
 int denali_init(struct denali_nand_info *denali)
 {
-	struct mtd_info *mtd = nand_to_mtd(&denali->nand);
+	struct nand_chip *chip = &denali->nand;
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
 	if (denali->platform == INTEL_CE4100) {
@@ -1442,10 +1443,10 @@ int denali_init(struct denali_nand_info *denali)
 	mtd->name = "denali-nand";
 
 	/* register the driver with the NAND core subsystem */
-	denali->nand.select_chip = denali_select_chip;
-	denali->nand.cmdfunc = denali_cmdfunc;
-	denali->nand.read_byte = denali_read_byte;
-	denali->nand.waitfunc = denali_waitfunc;
+	chip->select_chip = denali_select_chip;
+	chip->cmdfunc = denali_cmdfunc;
+	chip->read_byte = denali_read_byte;
+	chip->waitfunc = denali_waitfunc;
 
 	/*
 	 * scan for NAND devices attached to the controller
@@ -1488,17 +1489,16 @@ int denali_init(struct denali_nand_info *denali)
 	 * the real pagesize and anything necessery
 	 */
 	denali->devnum = ioread32(denali->flash_reg + DEVICES_CONNECTED);
-	denali->nand.chipsize <<= denali->devnum - 1;
-	denali->nand.page_shift += denali->devnum - 1;
-	denali->nand.pagemask = (denali->nand.chipsize >>
-						denali->nand.page_shift) - 1;
-	denali->nand.bbt_erase_shift += denali->devnum - 1;
-	denali->nand.phys_erase_shift = denali->nand.bbt_erase_shift;
-	denali->nand.chip_shift += denali->devnum - 1;
+	chip->chipsize <<= denali->devnum - 1;
+	chip->page_shift += denali->devnum - 1;
+	chip->pagemask = (chip->chipsize >> chip->page_shift) - 1;
+	chip->bbt_erase_shift += denali->devnum - 1;
+	chip->phys_erase_shift = chip->bbt_erase_shift;
+	chip->chip_shift += denali->devnum - 1;
 	mtd->writesize <<= denali->devnum - 1;
 	mtd->oobsize <<= denali->devnum - 1;
 	mtd->erasesize <<= denali->devnum - 1;
-	mtd->size = denali->nand.numchips * denali->nand.chipsize;
+	mtd->size = chip->numchips * chip->chipsize;
 	denali->bbtskipbytes *= denali->devnum;
 
 	/*
@@ -1508,29 +1508,29 @@ int denali_init(struct denali_nand_info *denali)
 	 */
 
 	/* Bad block management */
-	denali->nand.bbt_td = &bbt_main_descr;
-	denali->nand.bbt_md = &bbt_mirror_descr;
+	chip->bbt_td = &bbt_main_descr;
+	chip->bbt_md = &bbt_mirror_descr;
 
 	/* skip the scan for now until we have OOB read and write support */
-	denali->nand.bbt_options |= NAND_BBT_USE_FLASH;
-	denali->nand.options |= NAND_SKIP_BBTSCAN;
-	denali->nand.ecc.mode = NAND_ECC_HW_SYNDROME;
+	chip->bbt_options |= NAND_BBT_USE_FLASH;
+	chip->options |= NAND_SKIP_BBTSCAN;
+	chip->ecc.mode = NAND_ECC_HW_SYNDROME;
 
 	/* no subpage writes on denali */
-	denali->nand.options |= NAND_NO_SUBPAGE_WRITE;
+	chip->options |= NAND_NO_SUBPAGE_WRITE;
 
 	/*
 	 * Denali Controller only support 15bit and 8bit ECC in MRST,
 	 * so just let controller do 15bit ECC for MLC and 8bit ECC for
 	 * SLC if possible.
 	 * */
-	if (!nand_is_slc(&denali->nand) &&
+	if (!nand_is_slc(chip) &&
 			(mtd->oobsize > (denali->bbtskipbytes +
 			ECC_15BITS * (mtd->writesize /
 			ECC_SECTOR_SIZE)))) {
 		/* if MLC OOB size is large enough, use 15bit ECC*/
-		denali->nand.ecc.strength = 15;
-		denali->nand.ecc.bytes = ECC_15BITS;
+		chip->ecc.strength = 15;
+		chip->ecc.bytes = ECC_15BITS;
 		iowrite32(15, denali->flash_reg + ECC_CORRECTION);
 	} else if (mtd->oobsize < (denali->bbtskipbytes +
 			ECC_8BITS * (mtd->writesize /
@@ -1538,24 +1538,24 @@ int denali_init(struct denali_nand_info *denali)
 		pr_err("Your NAND chip OOB is not large enough to contain 8bit ECC correction codes");
 		goto failed_req_irq;
 	} else {
-		denali->nand.ecc.strength = 8;
-		denali->nand.ecc.bytes = ECC_8BITS;
+		chip->ecc.strength = 8;
+		chip->ecc.bytes = ECC_8BITS;
 		iowrite32(8, denali->flash_reg + ECC_CORRECTION);
 	}
 
 	mtd_set_ooblayout(mtd, &denali_ooblayout_ops);
-	denali->nand.ecc.bytes *= denali->devnum;
-	denali->nand.ecc.strength *= denali->devnum;
+	chip->ecc.bytes *= denali->devnum;
+	chip->ecc.strength *= denali->devnum;
 
 	/* override the default read operations */
-	denali->nand.ecc.size = ECC_SECTOR_SIZE * denali->devnum;
-	denali->nand.ecc.read_page = denali_read_page;
-	denali->nand.ecc.read_page_raw = denali_read_page_raw;
-	denali->nand.ecc.write_page = denali_write_page;
-	denali->nand.ecc.write_page_raw = denali_write_page_raw;
-	denali->nand.ecc.read_oob = denali_read_oob;
-	denali->nand.ecc.write_oob = denali_write_oob;
-	denali->nand.erase = denali_erase;
+	chip->ecc.size = ECC_SECTOR_SIZE * denali->devnum;
+	chip->ecc.read_page = denali_read_page;
+	chip->ecc.read_page_raw = denali_read_page_raw;
+	chip->ecc.write_page = denali_write_page;
+	chip->ecc.write_page_raw = denali_write_page_raw;
+	chip->ecc.read_oob = denali_read_oob;
+	chip->ecc.write_oob = denali_write_oob;
+	chip->erase = denali_erase;
 
 	ret = nand_scan_tail(mtd);
 	if (ret)
-- 
cgit v1.2.3


From 63757d463ea683b469c1976032054d46cecdef09 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:18 +0900
Subject: mtd: nand: denali: call nand_set_flash_node() to set DT node

This will allow nand_dt_init() to parse DT properties in the NAND
controller device node.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 4b907a5bdb25..f120f143539b 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1441,6 +1441,7 @@ int denali_init(struct denali_nand_info *denali)
 	/* now that our ISR is registered, we can enable interrupts */
 	denali_set_intr_modes(denali, true);
 	mtd->name = "denali-nand";
+	nand_set_flash_node(chip, denali->dev->of_node);
 
 	/* register the driver with the NAND core subsystem */
 	chip->select_chip = denali_select_chip;
-- 
cgit v1.2.3


From 6da27b469317435b776114649439e32384165aa5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:20 +0900
Subject: mtd: nand: denali: move multi device fixup code to a helper function

Collect multi NAND fixups into a helper function instead of
scattering them in denali_init().

I am rewording the comment block to clearly explain what is called
"multi device".

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 54 +++++++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index f120f143539b..2b5edd39248b 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1403,6 +1403,36 @@ static void denali_drv_init(struct denali_nand_info *denali)
 	denali->irq_status = 0;
 }
 
+static void denali_multidev_fixup(struct denali_nand_info *denali)
+{
+	struct nand_chip *chip = &denali->nand;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	/*
+	 * Support for multi device:
+	 * When the IP configuration is x16 capable and two x8 chips are
+	 * connected in parallel, DEVICES_CONNECTED should be set to 2.
+	 * In this case, the core framework knows nothing about this fact,
+	 * so we should tell it the _logical_ pagesize and anything necessary.
+	 */
+	denali->devnum = ioread32(denali->flash_reg + DEVICES_CONNECTED);
+
+	mtd->size <<= denali->devnum - 1;
+	mtd->erasesize <<= denali->devnum - 1;
+	mtd->writesize <<= denali->devnum - 1;
+	mtd->oobsize <<= denali->devnum - 1;
+	chip->chipsize <<= denali->devnum - 1;
+	chip->page_shift += denali->devnum - 1;
+	chip->phys_erase_shift += denali->devnum - 1;
+	chip->bbt_erase_shift += denali->devnum - 1;
+	chip->chip_shift += denali->devnum - 1;
+	chip->pagemask <<= denali->devnum - 1;
+	chip->ecc.size *= denali->devnum;
+	chip->ecc.bytes *= denali->devnum;
+	chip->ecc.strength *= denali->devnum;
+	denali->bbtskipbytes *= denali->devnum;
+}
+
 int denali_init(struct denali_nand_info *denali)
 {
 	struct nand_chip *chip = &denali->nand;
@@ -1484,24 +1514,6 @@ int denali_init(struct denali_nand_info *denali)
 		goto failed_req_irq;
 	}
 
-	/*
-	 * support for multi nand
-	 * MTD known nothing about multi nand, so we should tell it
-	 * the real pagesize and anything necessery
-	 */
-	denali->devnum = ioread32(denali->flash_reg + DEVICES_CONNECTED);
-	chip->chipsize <<= denali->devnum - 1;
-	chip->page_shift += denali->devnum - 1;
-	chip->pagemask = (chip->chipsize >> chip->page_shift) - 1;
-	chip->bbt_erase_shift += denali->devnum - 1;
-	chip->phys_erase_shift = chip->bbt_erase_shift;
-	chip->chip_shift += denali->devnum - 1;
-	mtd->writesize <<= denali->devnum - 1;
-	mtd->oobsize <<= denali->devnum - 1;
-	mtd->erasesize <<= denali->devnum - 1;
-	mtd->size = chip->numchips * chip->chipsize;
-	denali->bbtskipbytes *= denali->devnum;
-
 	/*
 	 * second stage of the NAND scan
 	 * this stage requires information regarding ECC and
@@ -1545,11 +1557,9 @@ int denali_init(struct denali_nand_info *denali)
 	}
 
 	mtd_set_ooblayout(mtd, &denali_ooblayout_ops);
-	chip->ecc.bytes *= denali->devnum;
-	chip->ecc.strength *= denali->devnum;
 
 	/* override the default read operations */
-	chip->ecc.size = ECC_SECTOR_SIZE * denali->devnum;
+	chip->ecc.size = ECC_SECTOR_SIZE;
 	chip->ecc.read_page = denali_read_page;
 	chip->ecc.read_page_raw = denali_read_page_raw;
 	chip->ecc.write_page = denali_write_page;
@@ -1558,6 +1568,8 @@ int denali_init(struct denali_nand_info *denali)
 	chip->ecc.write_oob = denali_write_oob;
 	chip->erase = denali_erase;
 
+	denali_multidev_fixup(denali);
+
 	ret = nand_scan_tail(mtd);
 	if (ret)
 		goto failed_req_irq;
-- 
cgit v1.2.3


From 1ddfe82ed84a6bf09eef9067c2dc093c758eb4b4 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 9 Mar 2017 11:41:46 +0100
Subject: clk: meson: fix SET_PARM macro

parameter val is not enclosed in parenthesis which is buggy when given an
expression instead of a simple value

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/20170309104154.28295-2-jbrunet@baylibre.com
---
 drivers/clk/meson/clkc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/meson/clkc.h b/drivers/clk/meson/clkc.h
index 9bb70e7a7d6a..c6be77dd8694 100644
--- a/drivers/clk/meson/clkc.h
+++ b/drivers/clk/meson/clkc.h
@@ -25,7 +25,7 @@
 #define PARM_GET(width, shift, reg)					\
 	(((reg) & SETPMASK(width, shift)) >> (shift))
 #define PARM_SET(width, shift, reg, val)				\
-	(((reg) & CLRPMASK(width, shift)) | (val << (shift)))
+	(((reg) & CLRPMASK(width, shift)) | ((val) << (shift)))
 
 #define MESON_PARM_APPLICABLE(p)		(!!((p)->width))
 
-- 
cgit v1.2.3


From f7e3a826097711c63688ef3a9ca03c7dc93302d8 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 9 Mar 2017 11:41:47 +0100
Subject: clk: meson: add missing const qualifiers on gate arrays

Reported-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/20170309104154.28295-3-jbrunet@baylibre.com
---
 drivers/clk/meson/gxbb.c    | 2 +-
 drivers/clk/meson/meson8b.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 1c1ec137a3cc..c063287bb0ed 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -816,7 +816,7 @@ static struct meson_clk_mpll *const gxbb_clk_mplls[] = {
 	&gxbb_mpll2,
 };
 
-static struct clk_gate *gxbb_clk_gates[] = {
+static struct clk_gate *const gxbb_clk_gates[] = {
 	&gxbb_clk81,
 	&gxbb_ddr,
 	&gxbb_dos,
diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c
index 888494d4fb8a..d1cc4d7cc8ff 100644
--- a/drivers/clk/meson/meson8b.c
+++ b/drivers/clk/meson/meson8b.c
@@ -501,7 +501,7 @@ static struct meson_clk_pll *const meson8b_clk_plls[] = {
 	&meson8b_sys_pll,
 };
 
-static struct clk_gate *meson8b_clk_gates[] = {
+static struct clk_gate *const meson8b_clk_gates[] = {
 	&meson8b_clk81,
 	&meson8b_ddr,
 	&meson8b_dos,
-- 
cgit v1.2.3


From e988aae54c1f307c23463e5e5433572e69a8d0da Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 9 Mar 2017 11:41:48 +0100
Subject: clk: meson8b: put dividers and muxes in tables

Until now, there was only 1 divider and 1 mux declared for the meson8b
platform. With the ongoing work on various system, including audio, this
is about to change. Use the same approach as gates for dividers and muxes,
putting them in tables to fix the register address at runtime.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/20170309104154.28295-4-jbrunet@baylibre.com
---
 drivers/clk/meson/meson8b.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c
index d1cc4d7cc8ff..2937443d4505 100644
--- a/drivers/clk/meson/meson8b.c
+++ b/drivers/clk/meson/meson8b.c
@@ -582,6 +582,14 @@ static struct clk_gate *const meson8b_clk_gates[] = {
 	&meson8b_ao_iface,
 };
 
+static struct clk_mux *const meson8b_clk_muxes[] = {
+	&meson8b_mpeg_clk_sel,
+};
+
+static struct clk_divider *const meson8b_clk_dividers[] = {
+	&meson8b_mpeg_clk_div,
+};
+
 static int meson8b_clkc_probe(struct platform_device *pdev)
 {
 	void __iomem *clk_base;
@@ -604,15 +612,21 @@ static int meson8b_clkc_probe(struct platform_device *pdev)
 	/* Populate the base address for CPU clk */
 	meson8b_cpu_clk.base = clk_base;
 
-	/* Populate the base address for the MPEG clks */
-	meson8b_mpeg_clk_sel.reg = clk_base + (u32)meson8b_mpeg_clk_sel.reg;
-	meson8b_mpeg_clk_div.reg = clk_base + (u32)meson8b_mpeg_clk_div.reg;
-
 	/* Populate base address for gates */
 	for (i = 0; i < ARRAY_SIZE(meson8b_clk_gates); i++)
 		meson8b_clk_gates[i]->reg = clk_base +
 			(u32)meson8b_clk_gates[i]->reg;
 
+	/* Populate base address for muxes */
+	for (i = 0; i < ARRAY_SIZE(meson8b_clk_muxes); i++)
+		meson8b_clk_muxes[i]->reg = clk_base +
+			(u32)meson8b_clk_muxes[i]->reg;
+
+	/* Populate base address for dividers */
+	for (i = 0; i < ARRAY_SIZE(meson8b_clk_dividers); i++)
+		meson8b_clk_dividers[i]->reg = clk_base +
+			(u32)meson8b_clk_dividers[i]->reg;
+
 	/*
 	 * register all clks
 	 * CLKID_UNUSED = 0, so skip it and start with CLKID_XTAL = 1
-- 
cgit v1.2.3


From b92332eea8a3406020411e61f84affb479fdc69e Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 9 Mar 2017 11:41:49 +0100
Subject: clk: gxbb: put dividers and muxes in tables

Until now, there was only 2 dividers and 2 muxes declared for the gxbb
platform. With the ongoing work on various subsystem, including audio,
this is about to change. Use the same approach as gates for dividers and
muxes, putting them in tables to fix the register address at runtime.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/20170309104154.28295-5-jbrunet@baylibre.com
---
 drivers/clk/meson/gxbb.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index c063287bb0ed..79e9313e6703 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -902,6 +902,16 @@ static struct clk_gate *const gxbb_clk_gates[] = {
 	&gxbb_sar_adc_clk,
 };
 
+static struct clk_mux *const gxbb_clk_muxes[] = {
+	&gxbb_mpeg_clk_sel,
+	&gxbb_sar_adc_clk_sel,
+};
+
+static struct clk_divider *const gxbb_clk_dividers[] = {
+	&gxbb_mpeg_clk_div,
+	&gxbb_sar_adc_clk_div,
+};
+
 static int gxbb_clkc_probe(struct platform_device *pdev)
 {
 	void __iomem *clk_base;
@@ -928,19 +938,21 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 	/* Populate the base address for CPU clk */
 	gxbb_cpu_clk.base = clk_base;
 
-	/* Populate the base address for the MPEG clks */
-	gxbb_mpeg_clk_sel.reg = clk_base + (u64)gxbb_mpeg_clk_sel.reg;
-	gxbb_mpeg_clk_div.reg = clk_base + (u64)gxbb_mpeg_clk_div.reg;
-
-	/* Populate the base address for the SAR ADC clks */
-	gxbb_sar_adc_clk_sel.reg = clk_base + (u64)gxbb_sar_adc_clk_sel.reg;
-	gxbb_sar_adc_clk_div.reg = clk_base + (u64)gxbb_sar_adc_clk_div.reg;
-
 	/* Populate base address for gates */
 	for (i = 0; i < ARRAY_SIZE(gxbb_clk_gates); i++)
 		gxbb_clk_gates[i]->reg = clk_base +
 			(u64)gxbb_clk_gates[i]->reg;
 
+	/* Populate base address for muxes */
+	for (i = 0; i < ARRAY_SIZE(gxbb_clk_muxes); i++)
+		gxbb_clk_muxes[i]->reg = clk_base +
+			(u64)gxbb_clk_muxes[i]->reg;
+
+	/* Populate base address for dividers */
+	for (i = 0; i < ARRAY_SIZE(gxbb_clk_dividers); i++)
+		gxbb_clk_dividers[i]->reg = clk_base +
+			(u64)gxbb_clk_dividers[i]->reg;
+
 	/*
 	 * register all clks
 	 */
-- 
cgit v1.2.3


From 007e6e5c5f01d379da91e0e30f83245022fd2579 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 9 Mar 2017 11:41:50 +0100
Subject: clk: meson: mpll: add rw operation

This patch adds new callbacks to the meson-mpll driver to control
and set the pll rate. For this, we also need to add the enable bit and
sdm enable bit. The corresponding parameters are added to mpll data
structure.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/20170309104154.28295-6-jbrunet@baylibre.com
---
 drivers/clk/meson/clk-mpll.c | 152 +++++++++++++++++++++++++++++++++++++++++--
 drivers/clk/meson/clkc.h     |   4 +-
 drivers/clk/meson/gxbb.c     |  30 +++++++++
 3 files changed, 180 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/meson/clk-mpll.c b/drivers/clk/meson/clk-mpll.c
index 03af79005ddb..342b85d4e22a 100644
--- a/drivers/clk/meson/clk-mpll.c
+++ b/drivers/clk/meson/clk-mpll.c
@@ -64,16 +64,50 @@
 #include <linux/clk-provider.h>
 #include "clkc.h"
 
-#define SDM_MAX 16384
+#define SDM_DEN 16384
+#define SDM_MIN 1
+#define SDM_MAX 16383
+#define N2_MIN	4
+#define N2_MAX	127
 
 #define to_meson_clk_mpll(_hw) container_of(_hw, struct meson_clk_mpll, hw)
 
+static unsigned long rate_from_params(unsigned long parent_rate,
+				      unsigned long sdm,
+				      unsigned long n2)
+{
+	return (parent_rate * SDM_DEN) / ((SDM_DEN * n2) + sdm);
+}
+
+static void params_from_rate(unsigned long requested_rate,
+			     unsigned long parent_rate,
+			     unsigned long *sdm,
+			     unsigned long *n2)
+{
+	uint64_t div = parent_rate;
+	unsigned long rem = do_div(div, requested_rate);
+
+	if (div < N2_MIN) {
+		*n2 = N2_MIN;
+		*sdm = SDM_MIN;
+	} else if (div > N2_MAX) {
+		*n2 = N2_MAX;
+		*sdm = SDM_MAX;
+	} else {
+		*n2 = div;
+		*sdm = DIV_ROUND_UP(rem * SDM_DEN, requested_rate);
+		if (*sdm < SDM_MIN)
+			*sdm = SDM_MIN;
+		else if (*sdm > SDM_MAX)
+			*sdm = SDM_MAX;
+	}
+}
+
 static unsigned long mpll_recalc_rate(struct clk_hw *hw,
 		unsigned long parent_rate)
 {
 	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
 	struct parm *p;
-	unsigned long rate = 0;
 	unsigned long reg, sdm, n2;
 
 	p = &mpll->sdm;
@@ -84,11 +118,119 @@ static unsigned long mpll_recalc_rate(struct clk_hw *hw,
 	reg = readl(mpll->base + p->reg_off);
 	n2 = PARM_GET(p->width, p->shift, reg);
 
-	rate = (parent_rate * SDM_MAX) / ((SDM_MAX * n2) + sdm);
+	return rate_from_params(parent_rate, sdm, n2);
+}
+
+static long mpll_round_rate(struct clk_hw *hw,
+			    unsigned long rate,
+			    unsigned long *parent_rate)
+{
+	unsigned long sdm, n2;
+
+	params_from_rate(rate, *parent_rate, &sdm, &n2);
+	return rate_from_params(*parent_rate, sdm, n2);
+}
+
+static int mpll_set_rate(struct clk_hw *hw,
+			 unsigned long rate,
+			 unsigned long parent_rate)
+{
+	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
+	struct parm *p;
+	unsigned long reg, sdm, n2;
+	unsigned long flags = 0;
+
+	params_from_rate(rate, parent_rate, &sdm, &n2);
+
+	if (mpll->lock)
+		spin_lock_irqsave(mpll->lock, flags);
+	else
+		__acquire(mpll->lock);
+
+	p = &mpll->sdm;
+	reg = readl(mpll->base + p->reg_off);
+	reg = PARM_SET(p->width, p->shift, reg, sdm);
+	writel(reg, mpll->base + p->reg_off);
+
+	p = &mpll->sdm_en;
+	reg = readl(mpll->base + p->reg_off);
+	reg = PARM_SET(p->width, p->shift, reg, 1);
+	writel(reg, mpll->base + p->reg_off);
+
+	p = &mpll->n2;
+	reg = readl(mpll->base + p->reg_off);
+	reg = PARM_SET(p->width, p->shift, reg, n2);
+	writel(reg, mpll->base + p->reg_off);
+
+	if (mpll->lock)
+		spin_unlock_irqrestore(mpll->lock, flags);
+	else
+		__release(mpll->lock);
 
-	return rate;
+	return 0;
+}
+
+static void mpll_enable_core(struct clk_hw *hw, int enable)
+{
+	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
+	struct parm *p;
+	unsigned long reg;
+	unsigned long flags = 0;
+
+	if (mpll->lock)
+		spin_lock_irqsave(mpll->lock, flags);
+	else
+		__acquire(mpll->lock);
+
+	p = &mpll->en;
+	reg = readl(mpll->base + p->reg_off);
+	reg = PARM_SET(p->width, p->shift, reg, enable ? 1 : 0);
+	writel(reg, mpll->base + p->reg_off);
+
+	if (mpll->lock)
+		spin_unlock_irqrestore(mpll->lock, flags);
+	else
+		__release(mpll->lock);
+}
+
+
+static int mpll_enable(struct clk_hw *hw)
+{
+	mpll_enable_core(hw, 1);
+
+	return 0;
+}
+
+static void mpll_disable(struct clk_hw *hw)
+{
+	mpll_enable_core(hw, 0);
+}
+
+static int mpll_is_enabled(struct clk_hw *hw)
+{
+	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
+	struct parm *p;
+	unsigned long reg;
+	int en;
+
+	p = &mpll->en;
+	reg = readl(mpll->base + p->reg_off);
+	en = PARM_GET(p->width, p->shift, reg);
+
+	return en;
 }
 
 const struct clk_ops meson_clk_mpll_ro_ops = {
-	.recalc_rate = mpll_recalc_rate,
+	.recalc_rate	= mpll_recalc_rate,
+	.round_rate	= mpll_round_rate,
+	.is_enabled	= mpll_is_enabled,
+};
+
+const struct clk_ops meson_clk_mpll_ops = {
+	.recalc_rate	= mpll_recalc_rate,
+	.round_rate	= mpll_round_rate,
+	.set_rate	= mpll_set_rate,
+	.enable		= mpll_enable,
+	.disable	= mpll_disable,
+	.is_enabled	= mpll_is_enabled,
 };
diff --git a/drivers/clk/meson/clkc.h b/drivers/clk/meson/clkc.h
index c6be77dd8694..ad254675edd8 100644
--- a/drivers/clk/meson/clkc.h
+++ b/drivers/clk/meson/clkc.h
@@ -92,8 +92,9 @@ struct meson_clk_mpll {
 	struct clk_hw hw;
 	void __iomem *base;
 	struct parm sdm;
+	struct parm sdm_en;
 	struct parm n2;
-	/* FIXME ssen gate control? */
+	struct parm en;
 	spinlock_t *lock;
 };
 
@@ -116,5 +117,6 @@ extern const struct clk_ops meson_clk_pll_ro_ops;
 extern const struct clk_ops meson_clk_pll_ops;
 extern const struct clk_ops meson_clk_cpu_ops;
 extern const struct clk_ops meson_clk_mpll_ro_ops;
+extern const struct clk_ops meson_clk_mpll_ops;
 
 #endif /* __CLKC_H */
diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 79e9313e6703..79fb8989f8dd 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -441,11 +441,21 @@ static struct meson_clk_mpll gxbb_mpll0 = {
 		.shift   = 0,
 		.width   = 14,
 	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 15,
+		.width	 = 1,
+	},
 	.n2 = {
 		.reg_off = HHI_MPLL_CNTL7,
 		.shift   = 16,
 		.width   = 9,
 	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 14,
+		.width	 = 1,
+	},
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "mpll0",
@@ -461,11 +471,21 @@ static struct meson_clk_mpll gxbb_mpll1 = {
 		.shift   = 0,
 		.width   = 14,
 	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 15,
+		.width	 = 1,
+	},
 	.n2 = {
 		.reg_off = HHI_MPLL_CNTL8,
 		.shift   = 16,
 		.width   = 9,
 	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 14,
+		.width	 = 1,
+	},
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "mpll1",
@@ -481,11 +501,21 @@ static struct meson_clk_mpll gxbb_mpll2 = {
 		.shift   = 0,
 		.width   = 14,
 	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 15,
+		.width	 = 1,
+	},
 	.n2 = {
 		.reg_off = HHI_MPLL_CNTL9,
 		.shift   = 16,
 		.width   = 9,
 	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 14,
+		.width	 = 1,
+	},
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "mpll2",
-- 
cgit v1.2.3


From 05b43aa2addfccef4db3319af4277df6b15ea293 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 9 Mar 2017 11:41:51 +0100
Subject: clk: meson: gxbb: mpll: use rw operation

Use read/write operations for the mpll clocks instead of the
read-only ones.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/20170309104154.28295-7-jbrunet@baylibre.com
---
 drivers/clk/meson/gxbb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 79fb8989f8dd..5059c7bbdbb3 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -459,7 +459,7 @@ static struct meson_clk_mpll gxbb_mpll0 = {
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "mpll0",
-		.ops = &meson_clk_mpll_ro_ops,
+		.ops = &meson_clk_mpll_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
@@ -489,7 +489,7 @@ static struct meson_clk_mpll gxbb_mpll1 = {
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "mpll1",
-		.ops = &meson_clk_mpll_ro_ops,
+		.ops = &meson_clk_mpll_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
@@ -519,7 +519,7 @@ static struct meson_clk_mpll gxbb_mpll2 = {
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "mpll2",
-		.ops = &meson_clk_mpll_ro_ops,
+		.ops = &meson_clk_mpll_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
-- 
cgit v1.2.3


From b778f7451a8d4fac386ffaa221f0b510737b9704 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 9 Mar 2017 11:41:52 +0100
Subject: clk: meson8b: add the mplls clocks 0, 1 and 2

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/20170309104154.28295-8-jbrunet@baylibre.com
---
 drivers/clk/meson/meson8b.c | 103 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/clk/meson/meson8b.h |  20 ++++++++-
 2 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c
index 2937443d4505..e9985503165c 100644
--- a/drivers/clk/meson/meson8b.c
+++ b/drivers/clk/meson/meson8b.c
@@ -245,6 +245,96 @@ static struct clk_fixed_factor meson8b_fclk_div7 = {
 	},
 };
 
+static struct meson_clk_mpll meson8b_mpll0 = {
+	.sdm = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 0,
+		.width   = 14,
+	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 15,
+		.width   = 1,
+	},
+	.n2 = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 16,
+		.width   = 9,
+	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL7,
+		.shift   = 14,
+		.width   = 1,
+	},
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll0",
+		.ops = &meson_clk_mpll_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
+static struct meson_clk_mpll meson8b_mpll1 = {
+	.sdm = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 0,
+		.width   = 14,
+	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 15,
+		.width   = 1,
+	},
+	.n2 = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 16,
+		.width   = 9,
+	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL8,
+		.shift   = 14,
+		.width   = 1,
+	},
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll1",
+		.ops = &meson_clk_mpll_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
+static struct meson_clk_mpll meson8b_mpll2 = {
+	.sdm = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 0,
+		.width   = 14,
+	},
+	.sdm_en = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 15,
+		.width   = 1,
+	},
+	.n2 = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 16,
+		.width   = 9,
+	},
+	.en = {
+		.reg_off = HHI_MPLL_CNTL9,
+		.shift   = 14,
+		.width   = 1,
+	},
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll2",
+		.ops = &meson_clk_mpll_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
 /*
  * FIXME cpu clocks and the legacy composite clocks (e.g. clk81) are both PLL
  * post-dividers and should be modeled with their respective PLLs via the
@@ -491,6 +581,9 @@ static struct clk_hw_onecell_data meson8b_hw_onecell_data = {
 		[CLKID_AO_AHB_SRAM]	    = &meson8b_ao_ahb_sram.hw,
 		[CLKID_AO_AHB_BUS]	    = &meson8b_ao_ahb_bus.hw,
 		[CLKID_AO_IFACE]	    = &meson8b_ao_iface.hw,
+		[CLKID_MPLL0]		    = &meson8b_mpll0.hw,
+		[CLKID_MPLL1]		    = &meson8b_mpll1.hw,
+		[CLKID_MPLL2]		    = &meson8b_mpll2.hw,
 	},
 	.num = CLK_NR_CLKS,
 };
@@ -501,6 +594,12 @@ static struct meson_clk_pll *const meson8b_clk_plls[] = {
 	&meson8b_sys_pll,
 };
 
+static struct meson_clk_mpll *const meson8b_clk_mplls[] = {
+	&meson8b_mpll0,
+	&meson8b_mpll1,
+	&meson8b_mpll2,
+};
+
 static struct clk_gate *const meson8b_clk_gates[] = {
 	&meson8b_clk81,
 	&meson8b_ddr,
@@ -609,6 +708,10 @@ static int meson8b_clkc_probe(struct platform_device *pdev)
 	for (i = 0; i < ARRAY_SIZE(meson8b_clk_plls); i++)
 		meson8b_clk_plls[i]->base = clk_base;
 
+	/* Populate base address for MPLLs */
+	for (i = 0; i < ARRAY_SIZE(meson8b_clk_mplls); i++)
+		meson8b_clk_mplls[i]->base = clk_base;
+
 	/* Populate the base address for CPU clk */
 	meson8b_cpu_clk.base = clk_base;
 
diff --git a/drivers/clk/meson/meson8b.h b/drivers/clk/meson/meson8b.h
index 010e9582888d..3881defc8644 100644
--- a/drivers/clk/meson/meson8b.h
+++ b/drivers/clk/meson/meson8b.h
@@ -41,6 +41,21 @@
 #define HHI_SYS_PLL_CNTL		0x300 /* 0xc0 offset in data sheet */
 #define HHI_VID_PLL_CNTL		0x320 /* 0xc8 offset in data sheet */
 
+/*
+ * MPLL register offeset taken from the S905 datasheet. Vendor kernel source
+ * confirm these are the same for the S805.
+ */
+#define HHI_MPLL_CNTL			0x280 /* 0xa0 offset in data sheet */
+#define HHI_MPLL_CNTL2			0x284 /* 0xa1 offset in data sheet */
+#define HHI_MPLL_CNTL3			0x288 /* 0xa2 offset in data sheet */
+#define HHI_MPLL_CNTL4			0x28C /* 0xa3 offset in data sheet */
+#define HHI_MPLL_CNTL5			0x290 /* 0xa4 offset in data sheet */
+#define HHI_MPLL_CNTL6			0x294 /* 0xa5 offset in data sheet */
+#define HHI_MPLL_CNTL7			0x298 /* 0xa6 offset in data sheet */
+#define HHI_MPLL_CNTL8			0x29C /* 0xa7 offset in data sheet */
+#define HHI_MPLL_CNTL9			0x2A0 /* 0xa8 offset in data sheet */
+#define HHI_MPLL_CNTL10			0x2A4 /* 0xa9 offset in data sheet */
+
 /*
  * CLKID index values
  *
@@ -142,8 +157,11 @@
 #define CLKID_AO_AHB_SRAM	90
 #define CLKID_AO_AHB_BUS	91
 #define CLKID_AO_IFACE		92
+#define CLKID_MPLL0		93
+#define CLKID_MPLL1		94
+#define CLKID_MPLL2		95
 
-#define CLK_NR_CLKS		93
+#define CLK_NR_CLKS		96
 
 /* include the CLKIDs that have been made part of the stable DT binding */
 #include <dt-bindings/clock/meson8b-clkc.h>
-- 
cgit v1.2.3


From b68fb7871e15ae2a37294dda4ab86936b72450f5 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 9 Mar 2017 11:41:53 +0100
Subject: clk: meson: mpll: correct N2 maximum value

Gxbb datasheet says N2 maximum value is 127 but the register field is
9 bits wide, the maximum value should 511.

Test shows value greater than 127, all the way to 511, works well

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/20170309104154.28295-9-jbrunet@baylibre.com
---
 drivers/clk/meson/clk-mpll.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/meson/clk-mpll.c b/drivers/clk/meson/clk-mpll.c
index 342b85d4e22a..540dabe5adad 100644
--- a/drivers/clk/meson/clk-mpll.c
+++ b/drivers/clk/meson/clk-mpll.c
@@ -68,7 +68,7 @@
 #define SDM_MIN 1
 #define SDM_MAX 16383
 #define N2_MIN	4
-#define N2_MAX	127
+#define N2_MAX	511
 
 #define to_meson_clk_mpll(_hw) container_of(_hw, struct meson_clk_mpll, hw)
 
-- 
cgit v1.2.3


From e93c1640e0387ab55b98b4e04600050f2beceac1 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:21 +0900
Subject: mtd: nand: denali: simplify multi device fixup code

The available configuration of the IP bus width is x8 or x16, so the
possible value for denali->devnum is 1 or 2.

If the value is 1, there is nothing to do.  Fixup parameters only
when denali->devnum is 2.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 46 ++++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 2b5edd39248b..e1c45bf2656d 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1403,7 +1403,7 @@ static void denali_drv_init(struct denali_nand_info *denali)
 	denali->irq_status = 0;
 }
 
-static void denali_multidev_fixup(struct denali_nand_info *denali)
+static int denali_multidev_fixup(struct denali_nand_info *denali)
 {
 	struct nand_chip *chip = &denali->nand;
 	struct mtd_info *mtd = nand_to_mtd(chip);
@@ -1417,20 +1417,32 @@ static void denali_multidev_fixup(struct denali_nand_info *denali)
 	 */
 	denali->devnum = ioread32(denali->flash_reg + DEVICES_CONNECTED);
 
-	mtd->size <<= denali->devnum - 1;
-	mtd->erasesize <<= denali->devnum - 1;
-	mtd->writesize <<= denali->devnum - 1;
-	mtd->oobsize <<= denali->devnum - 1;
-	chip->chipsize <<= denali->devnum - 1;
-	chip->page_shift += denali->devnum - 1;
-	chip->phys_erase_shift += denali->devnum - 1;
-	chip->bbt_erase_shift += denali->devnum - 1;
-	chip->chip_shift += denali->devnum - 1;
-	chip->pagemask <<= denali->devnum - 1;
-	chip->ecc.size *= denali->devnum;
-	chip->ecc.bytes *= denali->devnum;
-	chip->ecc.strength *= denali->devnum;
-	denali->bbtskipbytes *= denali->devnum;
+	if (denali->devnum == 1)
+		return 0;
+
+	if (denali->devnum != 2) {
+		dev_err(denali->dev, "unsupported number of devices %d\n",
+			denali->devnum);
+		return -EINVAL;
+	}
+
+	/* 2 chips in parallel */
+	mtd->size <<= 1;
+	mtd->erasesize <<= 1;
+	mtd->writesize <<= 1;
+	mtd->oobsize <<= 1;
+	chip->chipsize <<= 1;
+	chip->page_shift += 1;
+	chip->phys_erase_shift += 1;
+	chip->bbt_erase_shift += 1;
+	chip->chip_shift += 1;
+	chip->pagemask <<= 1;
+	chip->ecc.size <<= 1;
+	chip->ecc.bytes <<= 1;
+	chip->ecc.strength <<= 1;
+	denali->bbtskipbytes <<= 1;
+
+	return 0;
 }
 
 int denali_init(struct denali_nand_info *denali)
@@ -1568,7 +1580,9 @@ int denali_init(struct denali_nand_info *denali)
 	chip->ecc.write_oob = denali_write_oob;
 	chip->erase = denali_erase;
 
-	denali_multidev_fixup(denali);
+	ret = denali_multidev_fixup(denali);
+	if (ret)
+		goto failed_req_irq;
 
 	ret = nand_scan_tail(mtd);
 	if (ret)
-- 
cgit v1.2.3


From cc5d8031f8c7b11c325a37118f9aa6133f0b28a0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:22 +0900
Subject: mtd: nand: denali: set DEVICES_CONNECTED 1 if not set

Currently, the driver expects DEVICE_CONNECTED is automatically set
by the hardware, but this feature is disabled in some cases.
In such cases, it is the software's responsibility to set up the
DEVICES_CONNECTED register.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index e1c45bf2656d..b442a3ed9f0f 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1417,6 +1417,15 @@ static int denali_multidev_fixup(struct denali_nand_info *denali)
 	 */
 	denali->devnum = ioread32(denali->flash_reg + DEVICES_CONNECTED);
 
+	/*
+	 * On some SoCs, DEVICES_CONNECTED is not auto-detected.
+	 * For those, DEVICES_CONNECTED is left to 0.  Set 1 if it is the case.
+	 */
+	if (denali->devnum == 0) {
+		denali->devnum = 1;
+		iowrite32(1, denali->flash_reg + DEVICES_CONNECTED);
+	}
+
 	if (denali->devnum == 1)
 		return 0;
 
-- 
cgit v1.2.3


From 6652ef88c4a7036e8f5e900f47a4daf2a9ba30c8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:23 +0900
Subject: mtd: nand: denali: remove meaningless writes to read-only registers

The write accesses to LOGICAL_PAGE_{DATA,SPARE}_SIZE have no effect
because the Denali User's Guide says these registers are read-only.

The hardware automatically multiplies the main/spare size by the
number of devices and update LOGICAL_PAGE_{DATA,SPARE}_SIZE.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index b442a3ed9f0f..1a2e0638affc 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -342,8 +342,6 @@ static void get_samsung_nand_para(struct denali_nand_info *denali,
 
 static void get_toshiba_nand_para(struct denali_nand_info *denali)
 {
-	uint32_t tmp;
-
 	/*
 	 * Workaround to fix a controller bug which reports a wrong
 	 * spare area size for some kind of Toshiba NAND device
@@ -351,10 +349,6 @@ static void get_toshiba_nand_para(struct denali_nand_info *denali)
 	if ((ioread32(denali->flash_reg + DEVICE_MAIN_AREA_SIZE) == 4096) &&
 		(ioread32(denali->flash_reg + DEVICE_SPARE_AREA_SIZE) == 64)) {
 		iowrite32(216, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-		tmp = ioread32(denali->flash_reg + DEVICES_CONNECTED) *
-			ioread32(denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-		iowrite32(tmp,
-				denali->flash_reg + LOGICAL_PAGE_SPARE_SIZE);
 #if SUPPORT_15BITECC
 		iowrite32(15, denali->flash_reg + ECC_CORRECTION);
 #elif SUPPORT_8BITECC
@@ -366,22 +360,12 @@ static void get_toshiba_nand_para(struct denali_nand_info *denali)
 static void get_hynix_nand_para(struct denali_nand_info *denali,
 							uint8_t device_id)
 {
-	uint32_t main_size, spare_size;
-
 	switch (device_id) {
 	case 0xD5: /* Hynix H27UAG8T2A, H27UBG8U5A or H27UCG8VFA */
 	case 0xD7: /* Hynix H27UDG8VEM, H27UCG8UDM or H27UCG8V5A */
 		iowrite32(128, denali->flash_reg + PAGES_PER_BLOCK);
 		iowrite32(4096, denali->flash_reg + DEVICE_MAIN_AREA_SIZE);
 		iowrite32(224, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-		main_size = 4096 *
-			ioread32(denali->flash_reg + DEVICES_CONNECTED);
-		spare_size = 224 *
-			ioread32(denali->flash_reg + DEVICES_CONNECTED);
-		iowrite32(main_size,
-				denali->flash_reg + LOGICAL_PAGE_DATA_SIZE);
-		iowrite32(spare_size,
-				denali->flash_reg + LOGICAL_PAGE_SPARE_SIZE);
 		iowrite32(0, denali->flash_reg + DEVICE_WIDTH);
 #if SUPPORT_15BITECC
 		iowrite32(15, denali->flash_reg + ECC_CORRECTION);
-- 
cgit v1.2.3


From e713ddd87ccee801be1fd13f478407b1bde93c21 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 23 Mar 2017 05:07:24 +0900
Subject: mtd: nand: denali: remove unnecessary writes to ECC_CORRECTION

Because SUPPORT_15BITECC is defined, the following is dead code:

  #elif SUPPORT_8BITECC
          iowrite32(8, denali->flash_reg + ECC_CORRECTION);
  #endif

Such ifdefs are useless and unacceptable coding style.

These writes are not needed in the first place since ECC_CORRECTION
is set up by the nand_init() function.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 15 +--------------
 drivers/mtd/nand/denali.h |  3 ---
 2 files changed, 1 insertion(+), 17 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 1a2e0638affc..4ca75d3926ef 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -62,8 +62,6 @@ MODULE_PARM_DESC(onfi_timing_mode,
  */
 #define CHIP_SELECT_INVALID	-1
 
-#define SUPPORT_8BITECC		1
-
 /*
  * This macro divides two integers and rounds fractional values up
  * to the nearest integer value.
@@ -347,14 +345,8 @@ static void get_toshiba_nand_para(struct denali_nand_info *denali)
 	 * spare area size for some kind of Toshiba NAND device
 	 */
 	if ((ioread32(denali->flash_reg + DEVICE_MAIN_AREA_SIZE) == 4096) &&
-		(ioread32(denali->flash_reg + DEVICE_SPARE_AREA_SIZE) == 64)) {
+		(ioread32(denali->flash_reg + DEVICE_SPARE_AREA_SIZE) == 64))
 		iowrite32(216, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-#if SUPPORT_15BITECC
-		iowrite32(15, denali->flash_reg + ECC_CORRECTION);
-#elif SUPPORT_8BITECC
-		iowrite32(8, denali->flash_reg + ECC_CORRECTION);
-#endif
-	}
 }
 
 static void get_hynix_nand_para(struct denali_nand_info *denali,
@@ -367,11 +359,6 @@ static void get_hynix_nand_para(struct denali_nand_info *denali,
 		iowrite32(4096, denali->flash_reg + DEVICE_MAIN_AREA_SIZE);
 		iowrite32(224, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
 		iowrite32(0, denali->flash_reg + DEVICE_WIDTH);
-#if SUPPORT_15BITECC
-		iowrite32(15, denali->flash_reg + ECC_CORRECTION);
-#elif SUPPORT_8BITECC
-		iowrite32(8, denali->flash_reg + ECC_CORRECTION);
-#endif
 		break;
 	default:
 		dev_warn(denali->dev,
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 7b2d7851ed7b..483c0e988f33 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -292,9 +292,6 @@
 #define CLK_X  5
 #define CLK_MULTI 4
 
-#define SUPPORT_15BITECC        1
-#define SUPPORT_8BITECC         1
-
 #define ONFI_BLOOM_TIME         1
 #define MODE5_WORKAROUND        0
 
-- 
cgit v1.2.3


From e8bb4673596ea28fab287dbc417e8100d798cd40 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 27 Mar 2017 07:31:03 +0200
Subject: dmaengine: pl330: remove pdata based initialization

This driver is now used only on platforms which support device tree, so
it is safe to remove legacy platform data based initialization code.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
For plat-samsung:
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/plat-samsung/devs.c |  1 -
 drivers/dma/pl330.c          | 42 ++++++++----------------------------------
 include/linux/amba/pl330.h   | 35 -----------------------------------
 3 files changed, 8 insertions(+), 70 deletions(-)
 delete mode 100644 include/linux/amba/pl330.h

diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index 03fac123676d..dc269d9143bc 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -10,7 +10,6 @@
  * published by the Free Software Foundation.
 */
 
-#include <linux/amba/pl330.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/interrupt.h>
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index f37f4978dabb..8b0da7fa520d 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -22,7 +22,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/amba/bus.h>
-#include <linux/amba/pl330.h>
 #include <linux/scatterlist.h>
 #include <linux/of.h>
 #include <linux/of_dma.h>
@@ -2077,18 +2076,6 @@ static void pl330_tasklet(unsigned long data)
 	}
 }
 
-bool pl330_filter(struct dma_chan *chan, void *param)
-{
-	u8 *peri_id;
-
-	if (chan->device->dev->driver != &pl330_driver.drv)
-		return false;
-
-	peri_id = chan->private;
-	return *peri_id == (unsigned long)param;
-}
-EXPORT_SYMBOL(pl330_filter);
-
 static struct dma_chan *of_dma_pl330_xlate(struct of_phandle_args *dma_spec,
 						struct of_dma *ofdma)
 {
@@ -2833,7 +2820,6 @@ static SIMPLE_DEV_PM_OPS(pl330_pm, pl330_suspend, pl330_resume);
 static int
 pl330_probe(struct amba_device *adev, const struct amba_id *id)
 {
-	struct dma_pl330_platdata *pdat;
 	struct pl330_config *pcfg;
 	struct pl330_dmac *pl330;
 	struct dma_pl330_chan *pch, *_p;
@@ -2843,8 +2829,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	int num_chan;
 	struct device_node *np = adev->dev.of_node;
 
-	pdat = dev_get_platdata(&adev->dev);
-
 	ret = dma_set_mask_and_coherent(&adev->dev, DMA_BIT_MASK(32));
 	if (ret)
 		return ret;
@@ -2857,7 +2841,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	pd = &pl330->ddma;
 	pd->dev = &adev->dev;
 
-	pl330->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
+	pl330->mcbufsz = 0;
 
 	/* get quirk */
 	for (i = 0; i < ARRAY_SIZE(of_quirks); i++)
@@ -2901,10 +2885,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	INIT_LIST_HEAD(&pd->channels);
 
 	/* Initialize channel parameters */
-	if (pdat)
-		num_chan = max_t(int, pdat->nr_valid_peri, pcfg->num_chan);
-	else
-		num_chan = max_t(int, pcfg->num_peri, pcfg->num_chan);
+	num_chan = max_t(int, pcfg->num_peri, pcfg->num_chan);
 
 	pl330->num_peripherals = num_chan;
 
@@ -2916,11 +2897,8 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
 	for (i = 0; i < num_chan; i++) {
 		pch = &pl330->peripherals[i];
-		if (!adev->dev.of_node)
-			pch->chan.private = pdat ? &pdat->peri_id[i] : NULL;
-		else
-			pch->chan.private = adev->dev.of_node;
 
+		pch->chan.private = adev->dev.of_node;
 		INIT_LIST_HEAD(&pch->submitted_list);
 		INIT_LIST_HEAD(&pch->work_list);
 		INIT_LIST_HEAD(&pch->completed_list);
@@ -2933,15 +2911,11 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		list_add_tail(&pch->chan.device_node, &pd->channels);
 	}
 
-	if (pdat) {
-		pd->cap_mask = pdat->cap_mask;
-	} else {
-		dma_cap_set(DMA_MEMCPY, pd->cap_mask);
-		if (pcfg->num_peri) {
-			dma_cap_set(DMA_SLAVE, pd->cap_mask);
-			dma_cap_set(DMA_CYCLIC, pd->cap_mask);
-			dma_cap_set(DMA_PRIVATE, pd->cap_mask);
-		}
+	dma_cap_set(DMA_MEMCPY, pd->cap_mask);
+	if (pcfg->num_peri) {
+		dma_cap_set(DMA_SLAVE, pd->cap_mask);
+		dma_cap_set(DMA_CYCLIC, pd->cap_mask);
+		dma_cap_set(DMA_PRIVATE, pd->cap_mask);
 	}
 
 	pd->device_alloc_chan_resources = pl330_alloc_chan_resources;
diff --git a/include/linux/amba/pl330.h b/include/linux/amba/pl330.h
deleted file mode 100644
index fe93758e8403..000000000000
--- a/include/linux/amba/pl330.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* linux/include/linux/amba/pl330.h
- *
- * Copyright (C) 2010 Samsung Electronics Co. Ltd.
- *	Jaswinder Singh <jassi.brar@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef	__AMBA_PL330_H_
-#define	__AMBA_PL330_H_
-
-#include <linux/dmaengine.h>
-
-struct dma_pl330_platdata {
-	/*
-	 * Number of valid peripherals connected to DMAC.
-	 * This may be different from the value read from
-	 * CR0, as the PL330 implementation might have 'holes'
-	 * in the peri list or the peri could also be reached
-	 * from another DMAC which the platform prefers.
-	 */
-	u8 nr_valid_peri;
-	/* Array of valid peripherals */
-	u8 *peri_id;
-	/* Operational capabilities */
-	dma_cap_mask_t cap_mask;
-	/* Bytes to allocate for MC buffer */
-	unsigned mcbuf_sz;
-};
-
-extern bool pl330_filter(struct dma_chan *chan, void *param);
-#endif	/* __AMBA_PL330_H_ */
-- 
cgit v1.2.3


From 161b28aae1651aa7ad63ec14753aa8a751154340 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 28 Mar 2017 17:04:52 +0200
Subject: iommu/vt-d: Make sure IOMMUs are off when intel_iommu=off

When booting into a kexec kernel with intel_iommu=off, and
the previous kernel had intel_iommu=on, the IOMMU hardware
is still enabled and gets not disabled by the new kernel.

This causes the boot to fail because DMA is blocked by the
hardware. Disable the IOMMUs when we find it enabled in the
kexec kernel and boot with intel_iommu=off.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 238ad3447712..5f08ba13972b 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4730,6 +4730,15 @@ static int intel_iommu_cpu_dead(unsigned int cpu)
 	return 0;
 }
 
+static void intel_disable_iommus(void)
+{
+	struct intel_iommu *iommu = NULL;
+	struct dmar_drhd_unit *drhd;
+
+	for_each_iommu(iommu, drhd)
+		iommu_disable_translation(iommu);
+}
+
 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
 {
 	return container_of(dev, struct intel_iommu, iommu.dev);
@@ -4840,8 +4849,15 @@ int __init intel_iommu_init(void)
 		goto out_free_dmar;
 	}
 
-	if (no_iommu || dmar_disabled)
+	if (no_iommu || dmar_disabled) {
+		/*
+		 * Make sure the IOMMUs are switched off, even when we
+		 * boot into a kexec kernel and the previous kernel left
+		 * them enabled
+		 */
+		intel_disable_iommus();
 		goto out_free_dmar;
+	}
 
 	if (list_empty(&dmar_rmrr_units))
 		pr_info("No RMRR found\n");
-- 
cgit v1.2.3


From 675b11d94ce9baa5eb365a51b35d2793f77c8ab8 Mon Sep 17 00:00:00 2001
From: Simon Baatz <gmbnomis@gmail.com>
Date: Mon, 27 Mar 2017 20:02:07 +0200
Subject: mtd: nand: orion: fix clk handling

The clk handling in orion_nand.c had two problems:

- In the probe function, clk_put() was called for an enabled clock,
  which violates the API (see documentation for clk_put() in
  include/linux/clk.h)

- In the error path of the probe function, clk_put() could be called
  twice for the same clock.

In order to clean this up, use the managed function devm_clk_get() and
store the pointer to the clk in the driver data.

Fixes: baffab28b13120694fa3ebab08d3e99667a851d2 ('ARM: Orion: fix driver probe error handling with respect to clk')
Cc: <stable@vger.kernel.org> # v4.5+
Signed-off-by: Simon Baatz <gmbnomis@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/orion_nand.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index 4a91c5d000be..3acdc20485f1 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c
@@ -23,6 +23,11 @@
 #include <asm/sizes.h>
 #include <linux/platform_data/mtd-orion_nand.h>
 
+struct orion_nand_info {
+	struct nand_chip chip;
+	struct clk *clk;
+};
+
 static void orion_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 {
 	struct nand_chip *nc = mtd_to_nand(mtd);
@@ -75,20 +80,21 @@ static void orion_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 
 static int __init orion_nand_probe(struct platform_device *pdev)
 {
+	struct orion_nand_info *info;
 	struct mtd_info *mtd;
 	struct nand_chip *nc;
 	struct orion_nand_data *board;
 	struct resource *res;
-	struct clk *clk;
 	void __iomem *io_base;
 	int ret = 0;
 	u32 val = 0;
 
-	nc = devm_kzalloc(&pdev->dev,
-			sizeof(struct nand_chip),
+	info = devm_kzalloc(&pdev->dev,
+			sizeof(struct orion_nand_info),
 			GFP_KERNEL);
-	if (!nc)
+	if (!info)
 		return -ENOMEM;
+	nc = &info->chip;
 	mtd = nand_to_mtd(nc);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -145,15 +151,13 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	if (board->dev_ready)
 		nc->dev_ready = board->dev_ready;
 
-	platform_set_drvdata(pdev, mtd);
+	platform_set_drvdata(pdev, info);
 
 	/* Not all platforms can gate the clock, so it is not
 	   an error if the clock does not exists. */
-	clk = clk_get(&pdev->dev, NULL);
-	if (!IS_ERR(clk)) {
-		clk_prepare_enable(clk);
-		clk_put(clk);
-	}
+	info->clk = devm_clk_get(&pdev->dev, NULL);
+	if (!IS_ERR(info->clk))
+		clk_prepare_enable(info->clk);
 
 	ret = nand_scan(mtd, 1);
 	if (ret)
@@ -169,26 +173,22 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	return 0;
 
 no_dev:
-	if (!IS_ERR(clk)) {
-		clk_disable_unprepare(clk);
-		clk_put(clk);
-	}
+	if (!IS_ERR(info->clk))
+		clk_disable_unprepare(info->clk);
 
 	return ret;
 }
 
 static int orion_nand_remove(struct platform_device *pdev)
 {
-	struct mtd_info *mtd = platform_get_drvdata(pdev);
-	struct clk *clk;
+	struct orion_nand_info *info = platform_get_drvdata(pdev);
+	struct nand_chip *chip = &info->chip;
+	struct mtd_info *mtd = nand_to_mtd(chip);
 
 	nand_release(mtd);
 
-	clk = clk_get(&pdev->dev, NULL);
-	if (!IS_ERR(clk)) {
-		clk_disable_unprepare(clk);
-		clk_put(clk);
-	}
+	if (!IS_ERR(info->clk))
+		clk_disable_unprepare(info->clk);
 
 	return 0;
 }
-- 
cgit v1.2.3


From ef980cf8b05bc862f4533fcdeae2911e6ff7027a Mon Sep 17 00:00:00 2001
From: Simon Baatz <gmbnomis@gmail.com>
Date: Mon, 27 Mar 2017 20:02:08 +0200
Subject: mtd: nand: orion: improve handling of optional clock

The clock gate used by orion_nand is not available on all platforms.
When getting this optional clock gate, the code masked all errors.
Let's be more precise here and actually only allow ENOENT.

EPROBE_DEFER is handled like any other error code since probe deferral
is not supported by drivers using module_platform_driver_probe().

Signed-off-by: Simon Baatz <gmbnomis@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/orion_nand.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index 3acdc20485f1..f8e463a97b9e 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c
@@ -156,8 +156,17 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	/* Not all platforms can gate the clock, so it is not
 	   an error if the clock does not exists. */
 	info->clk = devm_clk_get(&pdev->dev, NULL);
-	if (!IS_ERR(info->clk))
-		clk_prepare_enable(info->clk);
+	if (IS_ERR(info->clk)) {
+		ret = PTR_ERR(info->clk);
+		if (ret == -ENOENT) {
+			info->clk = NULL;
+		} else {
+			dev_err(&pdev->dev, "failed to get clock!\n");
+			return ret;
+		}
+	}
+
+	clk_prepare_enable(info->clk);
 
 	ret = nand_scan(mtd, 1);
 	if (ret)
@@ -173,9 +182,7 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	return 0;
 
 no_dev:
-	if (!IS_ERR(info->clk))
-		clk_disable_unprepare(info->clk);
-
+	clk_disable_unprepare(info->clk);
 	return ret;
 }
 
@@ -187,8 +194,7 @@ static int orion_nand_remove(struct platform_device *pdev)
 
 	nand_release(mtd);
 
-	if (!IS_ERR(info->clk))
-		clk_disable_unprepare(info->clk);
+	clk_disable_unprepare(info->clk);
 
 	return 0;
 }
-- 
cgit v1.2.3


From e7d22fd2f634d570ab036336c7a6e5017244d0e0 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Thu, 9 Mar 2017 13:36:00 +0530
Subject: thermal: ti-soc-thermal: Fetch slope and offset from DT

Currently slope and offset values for calculating the hot spot
temperature of a thermal zone is being taken directly from driver
data. So fetch them from device tree.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 0586bd0f2bab..2054a5c06e86 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -96,8 +96,8 @@ static inline int __ti_thermal_get_temp(void *devdata, int *temp)
 		return ret;
 
 	/* Default constants */
-	slope = s->slope;
-	constant = s->constant;
+	slope = thermal_zone_get_slope(data->ti_thermal);
+	constant = thermal_zone_get_offset(data->ti_thermal);
 
 	pcb_tz = data->pcb_tz;
 	/* In case pcb zone is available, use the extrapolation rule with it */
-- 
cgit v1.2.3


From 004f772871315091e671b0979047d9a88c1061fd Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Thu, 9 Mar 2017 13:36:01 +0530
Subject: thermal: ti-soc-thermal: Remove redundant constants

Now that slope and offset data are being passed from
device tree no need to populate in driver data.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/ti-soc-thermal/dra752-thermal-data.c | 10 ----------
 drivers/thermal/ti-soc-thermal/omap3-thermal-data.c  |  4 ----
 drivers/thermal/ti-soc-thermal/omap4-thermal-data.c  |  6 ------
 drivers/thermal/ti-soc-thermal/omap5-thermal-data.c  |  4 ----
 drivers/thermal/ti-soc-thermal/ti-bandgap.h          |  4 ----
 drivers/thermal/ti-soc-thermal/ti-thermal.h          | 16 ----------------
 6 files changed, 44 deletions(-)

diff --git a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
index 118d7d847715..4167373327d9 100644
--- a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
+++ b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
@@ -410,8 +410,6 @@ const struct ti_bandgap_data dra752_data = {
 		.domain = "cpu",
 		.register_cooling = ti_thermal_register_cpu_cooling,
 		.unregister_cooling = ti_thermal_unregister_cpu_cooling,
-		.slope = DRA752_GRADIENT_SLOPE,
-		.constant = DRA752_GRADIENT_CONST,
 		.slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB,
 		.constant_pcb = DRA752_GRADIENT_CONST_W_PCB,
 		},
@@ -419,8 +417,6 @@ const struct ti_bandgap_data dra752_data = {
 		.registers = &dra752_gpu_temp_sensor_registers,
 		.ts_data = &dra752_gpu_temp_sensor_data,
 		.domain = "gpu",
-		.slope = DRA752_GRADIENT_SLOPE,
-		.constant = DRA752_GRADIENT_CONST,
 		.slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB,
 		.constant_pcb = DRA752_GRADIENT_CONST_W_PCB,
 		},
@@ -428,8 +424,6 @@ const struct ti_bandgap_data dra752_data = {
 		.registers = &dra752_core_temp_sensor_registers,
 		.ts_data = &dra752_core_temp_sensor_data,
 		.domain = "core",
-		.slope = DRA752_GRADIENT_SLOPE,
-		.constant = DRA752_GRADIENT_CONST,
 		.slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB,
 		.constant_pcb = DRA752_GRADIENT_CONST_W_PCB,
 		},
@@ -437,8 +431,6 @@ const struct ti_bandgap_data dra752_data = {
 		.registers = &dra752_dspeve_temp_sensor_registers,
 		.ts_data = &dra752_dspeve_temp_sensor_data,
 		.domain = "dspeve",
-		.slope = DRA752_GRADIENT_SLOPE,
-		.constant = DRA752_GRADIENT_CONST,
 		.slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB,
 		.constant_pcb = DRA752_GRADIENT_CONST_W_PCB,
 		},
@@ -446,8 +438,6 @@ const struct ti_bandgap_data dra752_data = {
 		.registers = &dra752_iva_temp_sensor_registers,
 		.ts_data = &dra752_iva_temp_sensor_data,
 		.domain = "iva",
-		.slope = DRA752_GRADIENT_SLOPE,
-		.constant = DRA752_GRADIENT_CONST,
 		.slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB,
 		.constant_pcb = DRA752_GRADIENT_CONST_W_PCB,
 		},
diff --git a/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c b/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c
index 3ee34340edab..c6d217913dd1 100644
--- a/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c
+++ b/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c
@@ -91,8 +91,6 @@ const struct ti_bandgap_data omap34xx_data = {
 		.registers = &omap34xx_mpu_temp_sensor_registers,
 		.ts_data = &omap34xx_mpu_temp_sensor_data,
 		.domain = "cpu",
-		.slope = 0,
-		.constant = 20000,
 		.slope_pcb = 0,
 		.constant_pcb = 20000,
 		.register_cooling = NULL,
@@ -164,8 +162,6 @@ const struct ti_bandgap_data omap36xx_data = {
 		.registers = &omap36xx_mpu_temp_sensor_registers,
 		.ts_data = &omap36xx_mpu_temp_sensor_data,
 		.domain = "cpu",
-		.slope = 0,
-		.constant = 20000,
 		.slope_pcb = 0,
 		.constant_pcb = 20000,
 		.register_cooling = NULL,
diff --git a/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c b/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c
index d255d33da9eb..fd1113360603 100644
--- a/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c
+++ b/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c
@@ -82,8 +82,6 @@ const struct ti_bandgap_data omap4430_data = {
 		.registers = &omap4430_mpu_temp_sensor_registers,
 		.ts_data = &omap4430_mpu_temp_sensor_data,
 		.domain = "cpu",
-		.slope = OMAP_GRADIENT_SLOPE_4430,
-		.constant = OMAP_GRADIENT_CONST_4430,
 		.slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4430,
 		.constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4430,
 		.register_cooling = ti_thermal_register_cpu_cooling,
@@ -222,8 +220,6 @@ const struct ti_bandgap_data omap4460_data = {
 		.registers = &omap4460_mpu_temp_sensor_registers,
 		.ts_data = &omap4460_mpu_temp_sensor_data,
 		.domain = "cpu",
-		.slope = OMAP_GRADIENT_SLOPE_4460,
-		.constant = OMAP_GRADIENT_CONST_4460,
 		.slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4460,
 		.constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4460,
 		.register_cooling = ti_thermal_register_cpu_cooling,
@@ -255,8 +251,6 @@ const struct ti_bandgap_data omap4470_data = {
 		.registers = &omap4460_mpu_temp_sensor_registers,
 		.ts_data = &omap4460_mpu_temp_sensor_data,
 		.domain = "cpu",
-		.slope = OMAP_GRADIENT_SLOPE_4470,
-		.constant = OMAP_GRADIENT_CONST_4470,
 		.slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4470,
 		.constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4470,
 		.register_cooling = ti_thermal_register_cpu_cooling,
diff --git a/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c b/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c
index 79ff70c446ba..cd9a304fb571 100644
--- a/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c
+++ b/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c
@@ -336,8 +336,6 @@ const struct ti_bandgap_data omap5430_data = {
 		.domain = "cpu",
 		.register_cooling = ti_thermal_register_cpu_cooling,
 		.unregister_cooling = ti_thermal_unregister_cpu_cooling,
-		.slope = OMAP_GRADIENT_SLOPE_5430_CPU,
-		.constant = OMAP_GRADIENT_CONST_5430_CPU,
 		.slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_5430_CPU,
 		.constant_pcb = OMAP_GRADIENT_CONST_W_PCB_5430_CPU,
 		},
@@ -345,8 +343,6 @@ const struct ti_bandgap_data omap5430_data = {
 		.registers = &omap5430_gpu_temp_sensor_registers,
 		.ts_data = &omap5430_gpu_temp_sensor_data,
 		.domain = "gpu",
-		.slope = OMAP_GRADIENT_SLOPE_5430_GPU,
-		.constant = OMAP_GRADIENT_CONST_5430_GPU,
 		.slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_5430_GPU,
 		.constant_pcb = OMAP_GRADIENT_CONST_W_PCB_5430_GPU,
 		},
diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.h b/drivers/thermal/ti-soc-thermal/ti-bandgap.h
index fe0adb898764..209c664c2823 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.h
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.h
@@ -254,8 +254,6 @@ struct ti_bandgap {
  * @ts_data: pointer to struct with thresholds, limits of temperature sensor
  * @registers: pointer to the list of register offsets and bitfields
  * @domain: the name of the domain where the sensor is located
- * @slope: sensor gradient slope info for hotspot extrapolation equation
- * @constant: sensor gradient const info for hotspot extrapolation equation
  * @slope_pcb: sensor gradient slope info for hotspot extrapolation equation
  *             with no external influence
  * @constant_pcb: sensor gradient const info for hotspot extrapolation equation
@@ -274,8 +272,6 @@ struct ti_temp_sensor {
 	struct temp_sensor_registers	*registers;
 	char				*domain;
 	/* for hotspot extrapolation */
-	const int			slope;
-	const int			constant;
 	const int			slope_pcb;
 	const int			constant_pcb;
 	int (*register_cooling)(struct ti_bandgap *bgp, int id);
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal.h b/drivers/thermal/ti-soc-thermal/ti-thermal.h
index f8b7ffea6194..8e85ca973967 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal.h
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal.h
@@ -25,22 +25,6 @@
 
 #include "ti-bandgap.h"
 
-/* sensors gradient and offsets */
-#define OMAP_GRADIENT_SLOPE_4430				0
-#define OMAP_GRADIENT_CONST_4430				20000
-#define OMAP_GRADIENT_SLOPE_4460				348
-#define OMAP_GRADIENT_CONST_4460				-9301
-#define OMAP_GRADIENT_SLOPE_4470				308
-#define OMAP_GRADIENT_CONST_4470				-7896
-
-#define OMAP_GRADIENT_SLOPE_5430_CPU				65
-#define OMAP_GRADIENT_CONST_5430_CPU				-1791
-#define OMAP_GRADIENT_SLOPE_5430_GPU				117
-#define OMAP_GRADIENT_CONST_5430_GPU				-2992
-
-#define DRA752_GRADIENT_SLOPE					0
-#define DRA752_GRADIENT_CONST					2000
-
 /* PCB sensor calculation constants */
 #define OMAP_GRADIENT_SLOPE_W_PCB_4430				0
 #define OMAP_GRADIENT_CONST_W_PCB_4430				20000
-- 
cgit v1.2.3


From b263b473bf62dfd4aa10fc9068b2c78b1b20f202 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Thu, 9 Mar 2017 13:36:02 +0530
Subject: thermal: ti-soc-thermal: Remove redundant code

ti_thermal_expose_sensor always takes the
devm_thermal_zone_of_sensor_register call for registration
with the device tree nodes present for all the bandgap sensors
for omap3/4/5 and dra7 family. There are large chunks of unused
code. Removing all of them.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 154 +--------------------
 1 file changed, 3 insertions(+), 151 deletions(-)

diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 2054a5c06e86..02790f69e26c 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -126,119 +126,6 @@ static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
 	return __ti_thermal_get_temp(data, temp);
 }
 
-/* Bind callback functions for thermal zone */
-static int ti_thermal_bind(struct thermal_zone_device *thermal,
-			   struct thermal_cooling_device *cdev)
-{
-	struct ti_thermal_data *data = thermal->devdata;
-	int id;
-
-	if (!data || IS_ERR(data))
-		return -ENODEV;
-
-	/* check if this is the cooling device we registered */
-	if (data->cool_dev != cdev)
-		return 0;
-
-	id = data->sensor_id;
-
-	/* Simple thing, two trips, one passive another critical */
-	return thermal_zone_bind_cooling_device(thermal, 0, cdev,
-	/* bind with min and max states defined by cpu_cooling */
-						THERMAL_NO_LIMIT,
-						THERMAL_NO_LIMIT,
-						THERMAL_WEIGHT_DEFAULT);
-}
-
-/* Unbind callback functions for thermal zone */
-static int ti_thermal_unbind(struct thermal_zone_device *thermal,
-			     struct thermal_cooling_device *cdev)
-{
-	struct ti_thermal_data *data = thermal->devdata;
-
-	if (!data || IS_ERR(data))
-		return -ENODEV;
-
-	/* check if this is the cooling device we registered */
-	if (data->cool_dev != cdev)
-		return 0;
-
-	/* Simple thing, two trips, one passive another critical */
-	return thermal_zone_unbind_cooling_device(thermal, 0, cdev);
-}
-
-/* Get mode callback functions for thermal zone */
-static int ti_thermal_get_mode(struct thermal_zone_device *thermal,
-			       enum thermal_device_mode *mode)
-{
-	struct ti_thermal_data *data = thermal->devdata;
-
-	if (data)
-		*mode = data->mode;
-
-	return 0;
-}
-
-/* Set mode callback functions for thermal zone */
-static int ti_thermal_set_mode(struct thermal_zone_device *thermal,
-			       enum thermal_device_mode mode)
-{
-	struct ti_thermal_data *data = thermal->devdata;
-	struct ti_bandgap *bgp;
-
-	bgp = data->bgp;
-
-	if (!data->ti_thermal) {
-		dev_notice(&thermal->device, "thermal zone not registered\n");
-		return 0;
-	}
-
-	mutex_lock(&data->ti_thermal->lock);
-
-	if (mode == THERMAL_DEVICE_ENABLED)
-		data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE;
-	else
-		data->ti_thermal->polling_delay = 0;
-
-	mutex_unlock(&data->ti_thermal->lock);
-
-	data->mode = mode;
-	ti_bandgap_write_update_interval(bgp, data->sensor_id,
-					data->ti_thermal->polling_delay);
-	thermal_zone_device_update(data->ti_thermal, THERMAL_EVENT_UNSPECIFIED);
-	dev_dbg(&thermal->device, "thermal polling set for duration=%d msec\n",
-		data->ti_thermal->polling_delay);
-
-	return 0;
-}
-
-/* Get trip type callback functions for thermal zone */
-static int ti_thermal_get_trip_type(struct thermal_zone_device *thermal,
-				    int trip, enum thermal_trip_type *type)
-{
-	if (!ti_thermal_is_valid_trip(trip))
-		return -EINVAL;
-
-	if (trip + 1 == OMAP_TRIP_NUMBER)
-		*type = THERMAL_TRIP_CRITICAL;
-	else
-		*type = THERMAL_TRIP_PASSIVE;
-
-	return 0;
-}
-
-/* Get trip temperature callback functions for thermal zone */
-static int ti_thermal_get_trip_temp(struct thermal_zone_device *thermal,
-				    int trip, int *temp)
-{
-	if (!ti_thermal_is_valid_trip(trip))
-		return -EINVAL;
-
-	*temp = ti_thermal_get_trip_value(trip);
-
-	return 0;
-}
-
 static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend)
 {
 	struct ti_thermal_data *data = p;
@@ -262,38 +149,11 @@ static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend)
 	return 0;
 }
 
-/* Get the temperature trend callback functions for thermal zone */
-static int ti_thermal_get_trend(struct thermal_zone_device *thermal,
-				int trip, enum thermal_trend *trend)
-{
-	return __ti_thermal_get_trend(thermal->devdata, trip, trend);
-}
-
-/* Get critical temperature callback functions for thermal zone */
-static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
-				    int *temp)
-{
-	/* shutdown zone */
-	return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
-}
-
 static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
 	.get_temp = __ti_thermal_get_temp,
 	.get_trend = __ti_thermal_get_trend,
 };
 
-static struct thermal_zone_device_ops ti_thermal_ops = {
-	.get_temp = ti_thermal_get_temp,
-	.get_trend = ti_thermal_get_trend,
-	.bind = ti_thermal_bind,
-	.unbind = ti_thermal_unbind,
-	.get_mode = ti_thermal_get_mode,
-	.set_mode = ti_thermal_set_mode,
-	.get_trip_type = ti_thermal_get_trip_type,
-	.get_trip_temp = ti_thermal_get_trip_temp,
-	.get_crit_temp = ti_thermal_get_crit_temp,
-};
-
 static struct ti_thermal_data
 *ti_thermal_build_data(struct ti_bandgap *bgp, int id)
 {
@@ -331,18 +191,10 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
 	data->ti_thermal = devm_thermal_zone_of_sensor_register(bgp->dev, id,
 					data, &ti_of_thermal_ops);
 	if (IS_ERR(data->ti_thermal)) {
-		/* Create thermal zone */
-		data->ti_thermal = thermal_zone_device_register(domain,
-				OMAP_TRIP_NUMBER, 0, data, &ti_thermal_ops,
-				NULL, FAST_TEMP_MONITORING_RATE,
-				FAST_TEMP_MONITORING_RATE);
-		if (IS_ERR(data->ti_thermal)) {
-			dev_err(bgp->dev, "thermal zone device is NULL\n");
-			return PTR_ERR(data->ti_thermal);
-		}
-		data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE;
-		data->our_zone = true;
+		dev_err(bgp->dev, "thermal zone device is NULL\n");
+		return PTR_ERR(data->ti_thermal);
 	}
+
 	ti_bandgap_set_sensor_data(bgp, id, data);
 	ti_bandgap_write_update_interval(bgp, data->sensor_id,
 					data->ti_thermal->polling_delay);
-- 
cgit v1.2.3


From 48d0341e41870bcfc42206d38e00a6b1c2fea929 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 29 Sep 2016 14:47:58 +0200
Subject: clk: renesas: cpg-mssr: Add support for fixing up clock tables

The same SoC may have different clocks and/or module clock parents,
depending on SoC revision.  One option is to use different sets of clock
tables for each SoC revision.  However, if the differences are small, it
is much more space-efficient to have a single set of clock tables, and
fix those up at runtime instead.

Hence provide three helpers:
  - Two helpers to NULLify core and module clocks that do not exist on
    some revisions (NULLified clocks are skipped during the registration
    phase),
  - One helper to reparent module clocks that have different clock
    parents.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/renesas-cpg-mssr.c | 50 ++++++++++++++++++++++++++++++++++
 drivers/clk/renesas/renesas-cpg-mssr.h | 22 +++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c
index eadcbd43ff88..99eeec6f24ec 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.c
+++ b/drivers/clk/renesas/renesas-cpg-mssr.c
@@ -265,6 +265,11 @@ static void __init cpg_mssr_register_core_clk(const struct cpg_core_clk *core,
 	WARN_DEBUG(id >= priv->num_core_clks);
 	WARN_DEBUG(PTR_ERR(priv->clks[id]) != -ENOENT);
 
+	if (!core->name) {
+		/* Skip NULLified clock */
+		return;
+	}
+
 	switch (core->type) {
 	case CLK_TYPE_IN:
 		clk = of_clk_get_by_name(priv->dev->of_node, core->name);
@@ -335,6 +340,11 @@ static void __init cpg_mssr_register_mod_clk(const struct mssr_mod_clk *mod,
 	WARN_DEBUG(mod->parent >= priv->num_core_clks + priv->num_mod_clks);
 	WARN_DEBUG(PTR_ERR(priv->clks[id]) != -ENOENT);
 
+	if (!mod->name) {
+		/* Skip NULLified clock */
+		return;
+	}
+
 	parent = priv->clks[mod->parent];
 	if (IS_ERR(parent)) {
 		clk = parent;
@@ -734,5 +744,45 @@ static int __init cpg_mssr_init(void)
 
 subsys_initcall(cpg_mssr_init);
 
+void __init cpg_core_nullify_range(struct cpg_core_clk *core_clks,
+				   unsigned int num_core_clks,
+				   unsigned int first_clk,
+				   unsigned int last_clk)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_core_clks; i++)
+		if (core_clks[i].id >= first_clk &&
+		    core_clks[i].id <= last_clk)
+			core_clks[i].name = NULL;
+}
+
+void __init mssr_mod_nullify(struct mssr_mod_clk *mod_clks,
+			     unsigned int num_mod_clks,
+			     const unsigned int *clks, unsigned int n)
+{
+	unsigned int i, j;
+
+	for (i = 0, j = 0; i < num_mod_clks && j < n; i++)
+		if (mod_clks[i].id == clks[j]) {
+			mod_clks[i].name = NULL;
+			j++;
+		}
+}
+
+void __init mssr_mod_reparent(struct mssr_mod_clk *mod_clks,
+			      unsigned int num_mod_clks,
+			      const struct mssr_mod_reparent *clks,
+			      unsigned int n)
+{
+	unsigned int i, j;
+
+	for (i = 0, j = 0; i < num_mod_clks && j < n; i++)
+		if (mod_clks[i].id == clks[j].clk) {
+			mod_clks[i].parent = clks[j].parent;
+			j++;
+		}
+}
+
 MODULE_DESCRIPTION("Renesas CPG/MSSR Driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.h b/drivers/clk/renesas/renesas-cpg-mssr.h
index 4bb7a80c6469..148f4f0aa2a4 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.h
+++ b/drivers/clk/renesas/renesas-cpg-mssr.h
@@ -134,4 +134,26 @@ extern const struct cpg_mssr_info r8a7743_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7745_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7795_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7796_cpg_mssr_info;
+
+
+    /*
+     * Helpers for fixing up clock tables depending on SoC revision
+     */
+
+struct mssr_mod_reparent {
+	unsigned int clk, parent;
+};
+
+
+extern void cpg_core_nullify_range(struct cpg_core_clk *core_clks,
+				   unsigned int num_core_clks,
+				   unsigned int first_clk,
+				   unsigned int last_clk);
+extern void mssr_mod_nullify(struct mssr_mod_clk *mod_clks,
+			     unsigned int num_mod_clks,
+			     const unsigned int *clks, unsigned int n);
+extern void mssr_mod_reparent(struct mssr_mod_clk *mod_clks,
+			      unsigned int num_mod_clks,
+			      const struct mssr_mod_reparent *clks,
+			      unsigned int n);
 #endif
-- 
cgit v1.2.3


From 89f1b1c614253d7ea57543f769d93fced99d4d05 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 29 Sep 2016 13:06:15 +0200
Subject: clk: renesas: Add r8a7795 ES2.0 CPG Core Clock Definitions

Add all R-Car H3 ES2.0 Clock Pulse Generator Core Clock Outputs, as
listed in Table 8.2a ("List of Clocks [R-Car H3]") of the R-Car Gen3
Hardware User's Manual rev. 0.53E.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/dt-bindings/clock/r8a7795-cpg-mssr.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/dt-bindings/clock/r8a7795-cpg-mssr.h b/include/dt-bindings/clock/r8a7795-cpg-mssr.h
index e864aae0a256..f047eaf261f3 100644
--- a/include/dt-bindings/clock/r8a7795-cpg-mssr.h
+++ b/include/dt-bindings/clock/r8a7795-cpg-mssr.h
@@ -60,4 +60,11 @@
 #define R8A7795_CLK_R			45
 #define R8A7795_CLK_OSC			46
 
+/* r8a7795 ES2.0 CPG Core Clocks */
+#define R8A7795_CLK_S0D2		47
+#define R8A7795_CLK_S0D3		48
+#define R8A7795_CLK_S0D6		49
+#define R8A7795_CLK_S0D8		50
+#define R8A7795_CLK_S0D12		51
+
 #endif /* __DT_BINDINGS_CLOCK_R8A7795_CPG_MSSR_H__ */
-- 
cgit v1.2.3


From 5573d194128b47334e3edb2db87cb471449d445a Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 29 Sep 2016 14:36:11 +0200
Subject: clk: renesas: r8a7795: Add support for R-Car H3 ES2.0

The Clock Pulse Generator / Module Standby and Software Reset module in
R-Car H3 ES2.0 differs from ES1.x in the following areas:
  - More core clocks (S0D2, S0D3, S0D6, S0D8, S0D12),
  - Different parent clocks for AUDMAC, EtherAVB, FCP, FDP, IMR,
    SYS-DMAC, VIN, VSPB, VSPI,
  - Removal of modules CSI21, FCPCI, FCPF2, FCPVD3, FCPVI2, FDP1-2,
    USB3-IF1, VSPD3, VSPI2,
  - Addition of modules EHCI3, HS-USB-IF3, USB-DMAC3-0, USB-DMAC3-1.

The goal is twofold:
  1. Support both the ES1.x and ES2.0 SoC revisions in a single binary
     for now,
  2. Make it clear which code supports ES1.x, so it can easily be
     identified and removed later, when production SoCs are deemed
     ubiquitous.

This is achieved by:
  - Updating the clock tables for the latest revision (ES2.0), but not
    removing clocks that only exist on earlier revisions (ES1.x),
  - Detecting the SoC revision at runtime using the new soc_device_match()
    API, and fixing up the clocks tables to match the actual SoC
    revision, by:
      - NULLifying core and module clocks of modules that do not exist,
      - Reparenting module clocks that have a different parent on ES1.x.

Based on R-Car Gen3 Hardware User's Manual rev. 0.53E.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/r8a7795-cpg-mssr.c | 201 +++++++++++++++++++++++++--------
 1 file changed, 151 insertions(+), 50 deletions(-)

diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
index 4699f416e275..eaa98b488f01 100644
--- a/drivers/clk/renesas/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/soc/renesas/rcar-rst.h>
+#include <linux/sys_soc.h>
 
 #include <dt-bindings/clock/r8a7795-cpg-mssr.h>
 
@@ -24,7 +25,7 @@
 
 enum clk_ids {
 	/* Core Clock Outputs exported to DT */
-	LAST_DT_CORE_CLK = R8A7795_CLK_OSC,
+	LAST_DT_CORE_CLK = R8A7795_CLK_S0D12,
 
 	/* External Input Clocks */
 	CLK_EXTAL,
@@ -51,7 +52,7 @@ enum clk_ids {
 	MOD_CLK_BASE
 };
 
-static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
+static struct cpg_core_clk r8a7795_core_clks[] __initdata = {
 	/* External Clock Inputs */
 	DEF_INPUT("extal",      CLK_EXTAL),
 	DEF_INPUT("extalr",     CLK_EXTALR),
@@ -78,7 +79,12 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
 	DEF_FIXED("zt",         R8A7795_CLK_ZT,    CLK_PLL1_DIV2,  4, 1),
 	DEF_FIXED("zx",         R8A7795_CLK_ZX,    CLK_PLL1_DIV2,  2, 1),
 	DEF_FIXED("s0d1",       R8A7795_CLK_S0D1,  CLK_S0,         1, 1),
+	DEF_FIXED("s0d2",       R8A7795_CLK_S0D2,  CLK_S0,         2, 1),
+	DEF_FIXED("s0d3",       R8A7795_CLK_S0D3,  CLK_S0,         3, 1),
 	DEF_FIXED("s0d4",       R8A7795_CLK_S0D4,  CLK_S0,         4, 1),
+	DEF_FIXED("s0d6",       R8A7795_CLK_S0D6,  CLK_S0,         6, 1),
+	DEF_FIXED("s0d8",       R8A7795_CLK_S0D8,  CLK_S0,         8, 1),
+	DEF_FIXED("s0d12",      R8A7795_CLK_S0D12, CLK_S0,        12, 1),
 	DEF_FIXED("s1d1",       R8A7795_CLK_S1D1,  CLK_S1,         1, 1),
 	DEF_FIXED("s1d2",       R8A7795_CLK_S1D2,  CLK_S1,         2, 1),
 	DEF_FIXED("s1d4",       R8A7795_CLK_S1D4,  CLK_S1,         4, 1),
@@ -108,10 +114,10 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
 	DEF_BASE("r",           R8A7795_CLK_R,     CLK_TYPE_GEN3_R, CLK_RINT),
 };
 
-static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
-	DEF_MOD("fdp1-2",		 117,	R8A7795_CLK_S2D1),
-	DEF_MOD("fdp1-1",		 118,	R8A7795_CLK_S2D1),
-	DEF_MOD("fdp1-0",		 119,	R8A7795_CLK_S2D1),
+static struct mssr_mod_clk r8a7795_mod_clks[] __initdata = {
+	DEF_MOD("fdp1-2",		 117,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fdp1-1",		 118,	R8A7795_CLK_S0D1),
+	DEF_MOD("fdp1-0",		 119,	R8A7795_CLK_S0D1),
 	DEF_MOD("scif5",		 202,	R8A7795_CLK_S3D4),
 	DEF_MOD("scif4",		 203,	R8A7795_CLK_S3D4),
 	DEF_MOD("scif3",		 204,	R8A7795_CLK_S3D4),
@@ -121,9 +127,9 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("msiof2",		 209,	R8A7795_CLK_MSO),
 	DEF_MOD("msiof1",		 210,	R8A7795_CLK_MSO),
 	DEF_MOD("msiof0",		 211,	R8A7795_CLK_MSO),
-	DEF_MOD("sys-dmac2",		 217,	R8A7795_CLK_S3D1),
-	DEF_MOD("sys-dmac1",		 218,	R8A7795_CLK_S3D1),
-	DEF_MOD("sys-dmac0",		 219,	R8A7795_CLK_S3D1),
+	DEF_MOD("sys-dmac2",		 217,	R8A7795_CLK_S0D3),
+	DEF_MOD("sys-dmac1",		 218,	R8A7795_CLK_S0D3),
+	DEF_MOD("sys-dmac0",		 219,	R8A7795_CLK_S0D3),
 	DEF_MOD("cmt3",			 300,	R8A7795_CLK_R),
 	DEF_MOD("cmt2",			 301,	R8A7795_CLK_R),
 	DEF_MOD("cmt1",			 302,	R8A7795_CLK_R),
@@ -135,15 +141,15 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("sdif0",		 314,	R8A7795_CLK_SD0),
 	DEF_MOD("pcie1",		 318,	R8A7795_CLK_S3D1),
 	DEF_MOD("pcie0",		 319,	R8A7795_CLK_S3D1),
-	DEF_MOD("usb3-if1",		 327,	R8A7795_CLK_S3D1),
+	DEF_MOD("usb3-if1",		 327,	R8A7795_CLK_S3D1), /* ES1.x */
 	DEF_MOD("usb3-if0",		 328,	R8A7795_CLK_S3D1),
 	DEF_MOD("usb-dmac0",		 330,	R8A7795_CLK_S3D1),
 	DEF_MOD("usb-dmac1",		 331,	R8A7795_CLK_S3D1),
 	DEF_MOD("rwdt",			 402,	R8A7795_CLK_R),
 	DEF_MOD("intc-ex",		 407,	R8A7795_CLK_CP),
 	DEF_MOD("intc-ap",		 408,	R8A7795_CLK_S3D1),
-	DEF_MOD("audmac1",		 501,	R8A7795_CLK_S3D1),
-	DEF_MOD("audmac0",		 502,	R8A7795_CLK_S3D1),
+	DEF_MOD("audmac1",		 501,	R8A7795_CLK_S0D3),
+	DEF_MOD("audmac0",		 502,	R8A7795_CLK_S0D3),
 	DEF_MOD("drif7",		 508,	R8A7795_CLK_S3D2),
 	DEF_MOD("drif6",		 509,	R8A7795_CLK_S3D2),
 	DEF_MOD("drif5",		 510,	R8A7795_CLK_S3D2),
@@ -159,35 +165,35 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("hscif0",		 520,	R8A7795_CLK_S3D1),
 	DEF_MOD("thermal",		 522,	R8A7795_CLK_CP),
 	DEF_MOD("pwm",			 523,	R8A7795_CLK_S3D4),
-	DEF_MOD("fcpvd3",		 600,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvd2",		 601,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvd1",		 602,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvd0",		 603,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvb1",		 606,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvb0",		 607,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvi2",		 609,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvi1",		 610,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpvi0",		 611,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpf2",		 613,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpf1",		 614,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpf0",		 615,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpci1",		 616,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpci0",		 617,	R8A7795_CLK_S2D1),
-	DEF_MOD("fcpcs",		 619,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspd3",		 620,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspd2",		 621,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspd1",		 622,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspd0",		 623,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspbc",		 624,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspbd",		 626,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspi2",		 629,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspi1",		 630,	R8A7795_CLK_S2D1),
-	DEF_MOD("vspi0",		 631,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvd3",		 600,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fcpvd2",		 601,	R8A7795_CLK_S0D2),
+	DEF_MOD("fcpvd1",		 602,	R8A7795_CLK_S0D2),
+	DEF_MOD("fcpvd0",		 603,	R8A7795_CLK_S0D2),
+	DEF_MOD("fcpvb1",		 606,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpvb0",		 607,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpvi2",		 609,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fcpvi1",		 610,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpvi0",		 611,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpf2",		 613,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fcpf1",		 614,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpf0",		 615,	R8A7795_CLK_S0D1),
+	DEF_MOD("fcpci1",		 616,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fcpci0",		 617,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("fcpcs",		 619,	R8A7795_CLK_S0D1),
+	DEF_MOD("vspd3",		 620,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("vspd2",		 621,	R8A7795_CLK_S0D2),
+	DEF_MOD("vspd1",		 622,	R8A7795_CLK_S0D2),
+	DEF_MOD("vspd0",		 623,	R8A7795_CLK_S0D2),
+	DEF_MOD("vspbc",		 624,	R8A7795_CLK_S0D1),
+	DEF_MOD("vspbd",		 626,	R8A7795_CLK_S0D1),
+	DEF_MOD("vspi2",		 629,	R8A7795_CLK_S2D1), /* ES1.x */
+	DEF_MOD("vspi1",		 630,	R8A7795_CLK_S0D1),
+	DEF_MOD("vspi0",		 631,	R8A7795_CLK_S0D1),
 	DEF_MOD("ehci2",		 701,	R8A7795_CLK_S3D4),
 	DEF_MOD("ehci1",		 702,	R8A7795_CLK_S3D4),
 	DEF_MOD("ehci0",		 703,	R8A7795_CLK_S3D4),
 	DEF_MOD("hsusb",		 704,	R8A7795_CLK_S3D4),
-	DEF_MOD("csi21",		 713,	R8A7795_CLK_CSI0),
+	DEF_MOD("csi21",		 713,	R8A7795_CLK_CSI0), /* ES1.x */
 	DEF_MOD("csi20",		 714,	R8A7795_CLK_CSI0),
 	DEF_MOD("csi41",		 715,	R8A7795_CLK_CSI0),
 	DEF_MOD("csi40",		 716,	R8A7795_CLK_CSI0),
@@ -198,20 +204,20 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("lvds",			 727,	R8A7795_CLK_S0D4),
 	DEF_MOD("hdmi1",		 728,	R8A7795_CLK_HDMI),
 	DEF_MOD("hdmi0",		 729,	R8A7795_CLK_HDMI),
-	DEF_MOD("vin7",			 804,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin6",			 805,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin5",			 806,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin4",			 807,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin3",			 808,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin2",			 809,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin1",			 810,	R8A7795_CLK_S2D1),
-	DEF_MOD("vin0",			 811,	R8A7795_CLK_S2D1),
-	DEF_MOD("etheravb",		 812,	R8A7795_CLK_S3D2),
+	DEF_MOD("vin7",			 804,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin6",			 805,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin5",			 806,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin4",			 807,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin3",			 808,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin2",			 809,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin1",			 810,	R8A7795_CLK_S0D2),
+	DEF_MOD("vin0",			 811,	R8A7795_CLK_S0D2),
+	DEF_MOD("etheravb",		 812,	R8A7795_CLK_S0D6),
 	DEF_MOD("sata0",		 815,	R8A7795_CLK_S3D2),
-	DEF_MOD("imr3",			 820,	R8A7795_CLK_S2D1),
-	DEF_MOD("imr2",			 821,	R8A7795_CLK_S2D1),
-	DEF_MOD("imr1",			 822,	R8A7795_CLK_S2D1),
-	DEF_MOD("imr0",			 823,	R8A7795_CLK_S2D1),
+	DEF_MOD("imr3",			 820,	R8A7795_CLK_S0D2),
+	DEF_MOD("imr2",			 821,	R8A7795_CLK_S0D2),
+	DEF_MOD("imr1",			 822,	R8A7795_CLK_S0D2),
+	DEF_MOD("imr0",			 823,	R8A7795_CLK_S0D2),
 	DEF_MOD("gpio7",		 905,	R8A7795_CLK_CP),
 	DEF_MOD("gpio6",		 906,	R8A7795_CLK_CP),
 	DEF_MOD("gpio5",		 907,	R8A7795_CLK_CP),
@@ -314,6 +320,82 @@ static const struct rcar_gen3_cpg_pll_config cpg_pll_configs[16] __initconst = {
 	{ 2,		192,		192,	},
 };
 
+static const struct soc_device_attribute r8a7795es1[] __initconst = {
+	{ .soc_id = "r8a7795", .revision = "ES1.*" },
+	{ /* sentinel */ }
+};
+
+
+	/*
+	 * Fixups for R-Car H3 ES1.x
+	 */
+
+static const unsigned int r8a7795es1_mod_nullify[] __initconst = {
+	MOD_CLK_ID(326),			/* USB-DMAC3-0 */
+	MOD_CLK_ID(329),			/* USB-DMAC3-1 */
+	MOD_CLK_ID(700),			/* EHCI/OHCI3 */
+	MOD_CLK_ID(705),			/* HS-USB-IF3 */
+
+};
+
+static const struct mssr_mod_reparent r8a7795es1_mod_reparent[] __initconst = {
+	{ MOD_CLK_ID(118), R8A7795_CLK_S2D1 },	/* FDP1-1 */
+	{ MOD_CLK_ID(119), R8A7795_CLK_S2D1 },	/* FDP1-0 */
+	{ MOD_CLK_ID(217), R8A7795_CLK_S3D1 },	/* SYS-DMAC2 */
+	{ MOD_CLK_ID(218), R8A7795_CLK_S3D1 },	/* SYS-DMAC1 */
+	{ MOD_CLK_ID(219), R8A7795_CLK_S3D1 },	/* SYS-DMAC0 */
+	{ MOD_CLK_ID(501), R8A7795_CLK_S3D1 },	/* AUDMAC1 */
+	{ MOD_CLK_ID(502), R8A7795_CLK_S3D1 },	/* AUDMAC0 */
+	{ MOD_CLK_ID(601), R8A7795_CLK_S2D1 },	/* FCPVD2 */
+	{ MOD_CLK_ID(602), R8A7795_CLK_S2D1 },	/* FCPVD1 */
+	{ MOD_CLK_ID(603), R8A7795_CLK_S2D1 },	/* FCPVD0 */
+	{ MOD_CLK_ID(606), R8A7795_CLK_S2D1 },	/* FCPVB1 */
+	{ MOD_CLK_ID(607), R8A7795_CLK_S2D1 },	/* FCPVB0 */
+	{ MOD_CLK_ID(610), R8A7795_CLK_S2D1 },	/* FCPVI1 */
+	{ MOD_CLK_ID(611), R8A7795_CLK_S2D1 },	/* FCPVI0 */
+	{ MOD_CLK_ID(614), R8A7795_CLK_S2D1 },	/* FCPF1 */
+	{ MOD_CLK_ID(615), R8A7795_CLK_S2D1 },	/* FCPF0 */
+	{ MOD_CLK_ID(619), R8A7795_CLK_S2D1 },	/* FCPCS */
+	{ MOD_CLK_ID(621), R8A7795_CLK_S2D1 },	/* VSPD2 */
+	{ MOD_CLK_ID(622), R8A7795_CLK_S2D1 },	/* VSPD1 */
+	{ MOD_CLK_ID(623), R8A7795_CLK_S2D1 },	/* VSPD0 */
+	{ MOD_CLK_ID(624), R8A7795_CLK_S2D1 },	/* VSPBC */
+	{ MOD_CLK_ID(626), R8A7795_CLK_S2D1 },	/* VSPBD */
+	{ MOD_CLK_ID(630), R8A7795_CLK_S2D1 },	/* VSPI1 */
+	{ MOD_CLK_ID(631), R8A7795_CLK_S2D1 },	/* VSPI0 */
+	{ MOD_CLK_ID(804), R8A7795_CLK_S2D1 },	/* VIN7 */
+	{ MOD_CLK_ID(805), R8A7795_CLK_S2D1 },	/* VIN6 */
+	{ MOD_CLK_ID(806), R8A7795_CLK_S2D1 },	/* VIN5 */
+	{ MOD_CLK_ID(807), R8A7795_CLK_S2D1 },	/* VIN4 */
+	{ MOD_CLK_ID(808), R8A7795_CLK_S2D1 },	/* VIN3 */
+	{ MOD_CLK_ID(809), R8A7795_CLK_S2D1 },	/* VIN2 */
+	{ MOD_CLK_ID(810), R8A7795_CLK_S2D1 },	/* VIN1 */
+	{ MOD_CLK_ID(811), R8A7795_CLK_S2D1 },	/* VIN0 */
+	{ MOD_CLK_ID(812), R8A7795_CLK_S3D2 },	/* EAVB-IF */
+	{ MOD_CLK_ID(820), R8A7795_CLK_S2D1 },	/* IMR3 */
+	{ MOD_CLK_ID(821), R8A7795_CLK_S2D1 },	/* IMR2 */
+	{ MOD_CLK_ID(822), R8A7795_CLK_S2D1 },	/* IMR1 */
+	{ MOD_CLK_ID(823), R8A7795_CLK_S2D1 },	/* IMR0 */
+};
+
+
+	/*
+	 * Fixups for R-Car H3 ES2.x
+	 */
+
+static const unsigned int r8a7795es2_mod_nullify[] __initconst = {
+	MOD_CLK_ID(117),			/* FDP1-2 */
+	MOD_CLK_ID(327),			/* USB3-IF1 */
+	MOD_CLK_ID(600),			/* FCPVD3 */
+	MOD_CLK_ID(609),			/* FCPVI2 */
+	MOD_CLK_ID(613),			/* FCPF2 */
+	MOD_CLK_ID(616),			/* FCPCI1 */
+	MOD_CLK_ID(617),			/* FCPCI0 */
+	MOD_CLK_ID(620),			/* VSPD3 */
+	MOD_CLK_ID(629),			/* VSPI2 */
+	MOD_CLK_ID(713),			/* CSI21 */
+};
+
 static int __init r8a7795_cpg_mssr_init(struct device *dev)
 {
 	const struct rcar_gen3_cpg_pll_config *cpg_pll_config;
@@ -330,6 +412,25 @@ static int __init r8a7795_cpg_mssr_init(struct device *dev)
 		return -EINVAL;
 	}
 
+	if (soc_device_match(r8a7795es1)) {
+		cpg_core_nullify_range(r8a7795_core_clks,
+				       ARRAY_SIZE(r8a7795_core_clks),
+				       R8A7795_CLK_S0D2, R8A7795_CLK_S0D12);
+		mssr_mod_nullify(r8a7795_mod_clks,
+				 ARRAY_SIZE(r8a7795_mod_clks),
+				 r8a7795es1_mod_nullify,
+				 ARRAY_SIZE(r8a7795es1_mod_nullify));
+		mssr_mod_reparent(r8a7795_mod_clks,
+				  ARRAY_SIZE(r8a7795_mod_clks),
+				  r8a7795es1_mod_reparent,
+				  ARRAY_SIZE(r8a7795es1_mod_reparent));
+	} else {
+		mssr_mod_nullify(r8a7795_mod_clks,
+				 ARRAY_SIZE(r8a7795_mod_clks),
+				 r8a7795es2_mod_nullify,
+				 ARRAY_SIZE(r8a7795es2_mod_nullify));
+	}
+
 	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
 }
 
-- 
cgit v1.2.3


From bb1953067c05be30a605ee1d5b05a2677735bb37 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 10 Mar 2017 12:13:37 +0100
Subject: clk: renesas: rcar-gen3-cpg: Add support for RCLK on R-Car H3 ES2.0

Starting with R-Car H3 ES2.0, the parent of RCLK is selected using MD28.

Add support for that, but retain the old behavior for R-Car H3 ES1.x and
M3-W ES1.0 using a quirk.

Inspired by a patch by Takeshi Kihara in the BSP.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Takeshi Kihara <takeshi.kihara.df@renesas.com>
---
 drivers/clk/renesas/rcar-gen3-cpg.c | 38 ++++++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/clk/renesas/rcar-gen3-cpg.c b/drivers/clk/renesas/rcar-gen3-cpg.c
index e5247e3dc897..3dee900522b7 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.c
+++ b/drivers/clk/renesas/rcar-gen3-cpg.c
@@ -252,11 +252,20 @@ static u32 cpg_mode __initdata;
 static u32 cpg_quirks __initdata;
 
 #define PLL_ERRATA	BIT(0)		/* Missing PLL0/2/4 post-divider */
+#define RCKCR_CKSEL	BIT(1)		/* Manual RCLK parent selection */
 
 static const struct soc_device_attribute cpg_quirks_match[] __initconst = {
 	{
 		.soc_id = "r8a7795", .revision = "ES1.0",
-		.data = (void *)PLL_ERRATA,
+		.data = (void *)(PLL_ERRATA | RCKCR_CKSEL),
+	},
+	{
+		.soc_id = "r8a7795", .revision = "ES1.*",
+		.data = (void *)RCKCR_CKSEL,
+	},
+	{
+		.soc_id = "r8a7796", .revision = "ES1.0",
+		.data = (void *)RCKCR_CKSEL,
 	},
 	{ /* sentinel */ }
 };
@@ -330,18 +339,25 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 		return cpg_sd_clk_register(core, base, __clk_get_name(parent));
 
 	case CLK_TYPE_GEN3_R:
-		/*
-		 * RINT is default.
-		 * Only if EXTALR is populated, we switch to it.
-		 */
-		value = readl(base + CPG_RCKCR) & 0x3f;
-
-		if (clk_get_rate(clks[cpg_clk_extalr])) {
-			parent = clks[cpg_clk_extalr];
-			value |= BIT(15);
+		if (cpg_quirks & RCKCR_CKSEL) {
+			/*
+			 * RINT is default.
+			 * Only if EXTALR is populated, we switch to it.
+			 */
+			value = readl(base + CPG_RCKCR) & 0x3f;
+
+			if (clk_get_rate(clks[cpg_clk_extalr])) {
+				parent = clks[cpg_clk_extalr];
+				value |= BIT(15);
+			}
+
+			writel(value, base + CPG_RCKCR);
+			break;
 		}
 
-		writel(value, base + CPG_RCKCR);
+		/* Select parent clock of RCLK by MD28 */
+		if (cpg_mode & BIT(28))
+			parent = clks[cpg_clk_extalr];
 		break;
 
 	default:
-- 
cgit v1.2.3


From 78aefd2d5911c4e0b5dc0b0578b3b8c7673be1d2 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 29 Mar 2017 20:43:50 +0200
Subject: thermal: rcar_gen3_thermal: add delay in .thermal_init on r8a7796
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .thermal_init needs to be delayed a short amount of time to allow
for the TEMP register to contain something useful. If it's not delayed
these warnings are common during boot:

thermal thermal_zone0: failed to read out thermal zone (-5)
thermal thermal_zone1: failed to read out thermal zone (-5)
thermal thermal_zone2: failed to read out thermal zone (-5)

The warnings are triggered by the first call to .get_temp() while the
TEMP register contains 0 and rcar_gen3_thermal_get_temp() returns -EIO
since a TEMP value of 0 will result in a temperature reading which is
out of specifications.

This should have been done in the initial commit which adds the driver
as the same issue was found and corrected for r8a7795.

Fixes: 564e73d283af9d4c ("thermal: rcar_gen3_thermal: Add R-Car Gen3 thermal driver")
Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/rcar_gen3_thermal.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index d33c845244b1..ec477d47d0ba 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -222,6 +222,8 @@ static void r8a7796_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
 	reg_val = rcar_gen3_thermal_read(tsc, REG_GEN3_THCTR);
 	reg_val |= THCTR_THSST;
 	rcar_gen3_thermal_write(tsc, REG_GEN3_THCTR, reg_val);
+
+	usleep_range(1000, 2000);
 }
 
 static const struct rcar_gen3_thermal_data r8a7795_data = {
-- 
cgit v1.2.3


From 100cfbcf2580b0605f50af32fefd9c8d1d8357fb Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 29 Mar 2017 20:43:51 +0200
Subject: thermal: rcar_gen3_thermal: remove unneeded mutex
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no point in protecting a register read with a lock. This is
most likely a leftover from when the driver was reworked before being
submitted for upstream.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/rcar_gen3_thermal.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index ec477d47d0ba..cb5c362c0000 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -20,7 +20,6 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -72,7 +71,6 @@ struct rcar_gen3_thermal_tsc {
 	void __iomem *base;
 	struct thermal_zone_device *zone;
 	struct equation_coefs coef;
-	struct mutex lock;
 };
 
 struct rcar_gen3_thermal_priv {
@@ -163,16 +161,12 @@ static int rcar_gen3_thermal_get_temp(void *devdata, int *temp)
 	u32 reg;
 
 	/* Read register and convert to mili Celsius */
-	mutex_lock(&tsc->lock);
-
 	reg = rcar_gen3_thermal_read(tsc, REG_GEN3_TEMP) & CTEMP_MASK;
 
 	val1 = FIXPT_DIV(FIXPT_INT(reg) - tsc->coef.b1, tsc->coef.a1);
 	val2 = FIXPT_DIV(FIXPT_INT(reg) - tsc->coef.b2, tsc->coef.a2);
 	mcelsius = FIXPT_TO_MCELSIUS((val1 + val2) / 2);
 
-	mutex_unlock(&tsc->lock);
-
 	/* Make sure we are inside specifications */
 	if ((mcelsius < MCELSIUS(-40)) || (mcelsius > MCELSIUS(125)))
 		return -EIO;
@@ -299,7 +293,6 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 		}
 
 		priv->tscs[i] = tsc;
-		mutex_init(&tsc->lock);
 
 		match_data->thermal_init(tsc);
 		rcar_gen3_thermal_calc_coefs(&tsc->coef, ptat, thcode[i]);
-- 
cgit v1.2.3


From d51546c0db975a4750161d17eef62dfcf9eedc90 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 29 Mar 2017 20:43:52 +0200
Subject: thermal: rcar_gen3_thermal: check that TSC exists before memory
 allocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the check for a TSC resource before allocating memory for a new
TSC. If no TSC is found there is little point in allocating memory for
it.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/rcar_gen3_thermal.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index cb5c362c0000..9b6bc03dd142 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -276,16 +276,16 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 	for (i = 0; i < TSC_MAX_NUM; i++) {
 		struct rcar_gen3_thermal_tsc *tsc;
 
+		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+		if (!res)
+			break;
+
 		tsc = devm_kzalloc(dev, sizeof(*tsc), GFP_KERNEL);
 		if (!tsc) {
 			ret = -ENOMEM;
 			goto error_unregister;
 		}
 
-		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
-		if (!res)
-			break;
-
 		tsc->base = devm_ioremap_resource(dev, res);
 		if (IS_ERR(tsc->base)) {
 			ret = PTR_ERR(tsc->base);
-- 
cgit v1.2.3


From 97dad1f1d2b3f2a2a77551849357b7ac38b0b6ff Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 29 Mar 2017 20:43:53 +0200
Subject: thermal: rcar_gen3_thermal: record and check number of TSCs found
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Record how many TSCs are found in struct rcar_gen3_thermal_priv, this is
needed to be able to add hardware interrupts for trip points later. Also
add a check to make sure at least one TSC is found.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/rcar_gen3_thermal.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index 9b6bc03dd142..4a08b35533dc 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -75,6 +75,7 @@ struct rcar_gen3_thermal_tsc {
 
 struct rcar_gen3_thermal_priv {
 	struct rcar_gen3_thermal_tsc *tscs[TSC_MAX_NUM];
+	unsigned int num_tscs;
 };
 
 struct rcar_gen3_thermal_data {
@@ -307,6 +308,13 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 		tsc->zone = zone;
 	}
 
+	priv->num_tscs = i;
+
+	if (!priv->num_tscs) {
+		ret = -ENODEV;
+		goto error_unregister;
+	}
+
 	return 0;
 
 error_unregister:
-- 
cgit v1.2.3


From 7d4b269776ec67c1b7d83c6c727a2771e5f39d12 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 29 Mar 2017 20:43:54 +0200
Subject: thermal: rcar_gen3_thermal: enable hardware interrupts for trip
 points
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable hardware trip points by implementing the set_trips callback. The
thermal core will take care of setting the initial trip point window and
to update it once the driver reports a TSC has moved outside it.

The interrupt structure for this device is a bit odd. There is not a
dedicated IRQ for each TSC, instead the interrupts are shared between
all TSCs. IRQn is fired if the temp monitored in IRQTEMPn is reached in
any of the TSCs, example IRQ3 is fired if temperature in IRQTEMP3 is
reached in either TSC0, TSC1 or TSC2.

For this reason the usage of interrupts in this driver is an all-on or
all-off design. When an interrupt happens all TSCs are checked and all
thermal zones are updated. This could be refined to be more fine grained
but the thermal core takes care of only updating the thermal zones that
have left their trip point window.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/rcar_gen3_thermal.c | 132 +++++++++++++++++++++++++++++++++++-
 1 file changed, 131 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index 4a08b35533dc..d37c7d8f8fcd 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -23,8 +23,11 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/spinlock.h>
 #include <linux/thermal.h>
 
+#include "thermal_core.h"
+
 /* Register offsets */
 #define REG_GEN3_IRQSTR		0x04
 #define REG_GEN3_IRQMSK		0x08
@@ -40,6 +43,14 @@
 #define REG_GEN3_THCODE2	0x54
 #define REG_GEN3_THCODE3	0x58
 
+/* IRQ{STR,MSK,EN} bits */
+#define IRQ_TEMP1		BIT(0)
+#define IRQ_TEMP2		BIT(1)
+#define IRQ_TEMP3		BIT(2)
+#define IRQ_TEMPD1		BIT(3)
+#define IRQ_TEMPD2		BIT(4)
+#define IRQ_TEMPD3		BIT(5)
+
 /* CTSR bits */
 #define CTSR_PONM	BIT(8)
 #define CTSR_AOUT	BIT(7)
@@ -76,6 +87,7 @@ struct rcar_gen3_thermal_tsc {
 struct rcar_gen3_thermal_priv {
 	struct rcar_gen3_thermal_tsc *tscs[TSC_MAX_NUM];
 	unsigned int num_tscs;
+	spinlock_t lock; /* Protect interrupts on and off */
 };
 
 struct rcar_gen3_thermal_data {
@@ -113,6 +125,7 @@ static inline void rcar_gen3_thermal_write(struct rcar_gen3_thermal_tsc *tsc,
 
 #define FIXPT_SHIFT 7
 #define FIXPT_INT(_x) ((_x) << FIXPT_SHIFT)
+#define INT_FIXPT(_x) ((_x) >> FIXPT_SHIFT)
 #define FIXPT_DIV(_a, _b) DIV_ROUND_CLOSEST(((_a) << FIXPT_SHIFT), (_b))
 #define FIXPT_TO_MCELSIUS(_x) ((_x) * 1000 >> FIXPT_SHIFT)
 
@@ -178,10 +191,87 @@ static int rcar_gen3_thermal_get_temp(void *devdata, int *temp)
 	return 0;
 }
 
+static int rcar_gen3_thermal_mcelsius_to_temp(struct rcar_gen3_thermal_tsc *tsc,
+					      int mcelsius)
+{
+	int celsius, val1, val2;
+
+	celsius = DIV_ROUND_CLOSEST(mcelsius, 1000);
+	val1 = celsius * tsc->coef.a1 + tsc->coef.b1;
+	val2 = celsius * tsc->coef.a2 + tsc->coef.b2;
+
+	return INT_FIXPT((val1 + val2) / 2);
+}
+
+static int rcar_gen3_thermal_set_trips(void *devdata, int low, int high)
+{
+	struct rcar_gen3_thermal_tsc *tsc = devdata;
+
+	low = clamp_val(low, -40000, 125000);
+	high = clamp_val(high, -40000, 125000);
+
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP1,
+				rcar_gen3_thermal_mcelsius_to_temp(tsc, low));
+
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP2,
+				rcar_gen3_thermal_mcelsius_to_temp(tsc, high));
+
+	return 0;
+}
+
 static struct thermal_zone_of_device_ops rcar_gen3_tz_of_ops = {
 	.get_temp	= rcar_gen3_thermal_get_temp,
+	.set_trips	= rcar_gen3_thermal_set_trips,
 };
 
+static void rcar_thermal_irq_set(struct rcar_gen3_thermal_priv *priv, bool on)
+{
+	unsigned int i;
+	u32 val = on ? IRQ_TEMPD1 | IRQ_TEMP2 : 0;
+
+	for (i = 0; i < priv->num_tscs; i++)
+		rcar_gen3_thermal_write(priv->tscs[i], REG_GEN3_IRQMSK, val);
+}
+
+static irqreturn_t rcar_gen3_thermal_irq(int irq, void *data)
+{
+	struct rcar_gen3_thermal_priv *priv = data;
+	u32 status;
+	int i, ret = IRQ_HANDLED;
+
+	spin_lock(&priv->lock);
+	for (i = 0; i < priv->num_tscs; i++) {
+		status = rcar_gen3_thermal_read(priv->tscs[i], REG_GEN3_IRQSTR);
+		rcar_gen3_thermal_write(priv->tscs[i], REG_GEN3_IRQSTR, 0);
+		if (status)
+			ret = IRQ_WAKE_THREAD;
+	}
+
+	if (ret == IRQ_WAKE_THREAD)
+		rcar_thermal_irq_set(priv, false);
+
+	spin_unlock(&priv->lock);
+
+	return ret;
+}
+
+static irqreturn_t rcar_gen3_thermal_irq_thread(int irq, void *data)
+{
+	struct rcar_gen3_thermal_priv *priv = data;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < priv->num_tscs; i++)
+		thermal_zone_device_update(priv->tscs[i]->zone,
+					   THERMAL_EVENT_UNSPECIFIED);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	rcar_thermal_irq_set(priv, true);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return IRQ_HANDLED;
+}
+
 static void r8a7795_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
 {
 	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR,  CTSR_THBGR);
@@ -190,7 +280,11 @@ static void r8a7795_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
 	usleep_range(1000, 2000);
 
 	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR, CTSR_PONM);
+
 	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0x3F);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, 0);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQEN, IRQ_TEMPD1 | IRQ_TEMP2);
+
 	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR,
 				CTSR_PONM | CTSR_AOUT | CTSR_THBGR | CTSR_VMEN);
 
@@ -214,6 +308,9 @@ static void r8a7796_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
 	usleep_range(1000, 2000);
 
 	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0x3F);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, 0);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQEN, IRQ_TEMPD1 | IRQ_TEMP2);
+
 	reg_val = rcar_gen3_thermal_read(tsc, REG_GEN3_THCTR);
 	reg_val |= THCTR_THSST;
 	rcar_gen3_thermal_write(tsc, REG_GEN3_THCTR, reg_val);
@@ -252,7 +349,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct resource *res;
 	struct thermal_zone_device *zone;
-	int ret, i;
+	int ret, irq, i;
+	char *irqname;
 	const struct rcar_gen3_thermal_data *match_data =
 		of_device_get_match_data(dev);
 
@@ -269,8 +367,32 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
+	spin_lock_init(&priv->lock);
+
 	platform_set_drvdata(pdev, priv);
 
+	/*
+	 * Request 2 (of the 3 possible) IRQs, the driver only needs to
+	 * to trigger on the low and high trip points of the current
+	 * temp window at this point.
+	 */
+	for (i = 0; i < 2; i++) {
+		irq = platform_get_irq(pdev, i);
+		if (irq < 0)
+			return irq;
+
+		irqname = devm_kasprintf(dev, GFP_KERNEL, "%s:ch%d",
+					 dev_name(dev), i);
+		if (!irqname)
+			return -ENOMEM;
+
+		ret = devm_request_threaded_irq(dev, irq, rcar_gen3_thermal_irq,
+						rcar_gen3_thermal_irq_thread,
+						IRQF_SHARED, irqname, priv);
+		if (ret)
+			return ret;
+	}
+
 	pm_runtime_enable(dev);
 	pm_runtime_get_sync(dev);
 
@@ -306,6 +428,12 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 			goto error_unregister;
 		}
 		tsc->zone = zone;
+
+		ret = of_thermal_get_ntrips(tsc->zone);
+		if (ret < 0)
+			goto error_unregister;
+
+		dev_info(dev, "TSC%d: Loaded %d trip points\n", i, ret);
 	}
 
 	priv->num_tscs = i;
@@ -315,6 +443,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 		goto error_unregister;
 	}
 
+	rcar_thermal_irq_set(priv, true);
+
 	return 0;
 
 error_unregister:
-- 
cgit v1.2.3


From cc4d072b66298716484f5c78d782c64509f4b6d9 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 29 Mar 2017 20:43:55 +0200
Subject: thermal: rcar_gen3_thermal: store device match data in private
 structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The device match data needs to be accessible outside the probe function,
store it in the private data structure.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/rcar_gen3_thermal.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index d37c7d8f8fcd..f259a995c66c 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -88,6 +88,7 @@ struct rcar_gen3_thermal_priv {
 	struct rcar_gen3_thermal_tsc *tscs[TSC_MAX_NUM];
 	unsigned int num_tscs;
 	spinlock_t lock; /* Protect interrupts on and off */
+	const struct rcar_gen3_thermal_data *data;
 };
 
 struct rcar_gen3_thermal_data {
@@ -351,8 +352,6 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 	struct thermal_zone_device *zone;
 	int ret, irq, i;
 	char *irqname;
-	const struct rcar_gen3_thermal_data *match_data =
-		of_device_get_match_data(dev);
 
 	/* default values if FUSEs are missing */
 	/* TODO: Read values from hardware on supported platforms */
@@ -367,6 +366,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
+	priv->data = of_device_get_match_data(dev);
+
 	spin_lock_init(&priv->lock);
 
 	platform_set_drvdata(pdev, priv);
@@ -417,7 +418,7 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 
 		priv->tscs[i] = tsc;
 
-		match_data->thermal_init(tsc);
+		priv->data->thermal_init(tsc);
 		rcar_gen3_thermal_calc_coefs(&tsc->coef, ptat, thcode[i]);
 
 		zone = devm_thermal_zone_of_sensor_register(dev, i, tsc,
-- 
cgit v1.2.3


From 75f78d6d9eb793d141affaa5a76f20ce1d6ae5c9 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 29 Mar 2017 20:43:56 +0200
Subject: thermal: rcar_gen3_thermal: add suspend and resume support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To restore operation it's easiest to reinitialise all TSCs. In order to
do this the current trip window needs to be stored in the TSC structure
so that it can be restored upon resume.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/rcar_gen3_thermal.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index f259a995c66c..37fcefd06d9f 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -82,6 +82,8 @@ struct rcar_gen3_thermal_tsc {
 	void __iomem *base;
 	struct thermal_zone_device *zone;
 	struct equation_coefs coef;
+	int low;
+	int high;
 };
 
 struct rcar_gen3_thermal_priv {
@@ -217,6 +219,9 @@ static int rcar_gen3_thermal_set_trips(void *devdata, int low, int high)
 	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP2,
 				rcar_gen3_thermal_mcelsius_to_temp(tsc, high));
 
+	tsc->low = low;
+	tsc->high = high;
+
 	return 0;
 }
 
@@ -454,9 +459,39 @@ error_unregister:
 	return ret;
 }
 
+static int __maybe_unused rcar_gen3_thermal_suspend(struct device *dev)
+{
+	struct rcar_gen3_thermal_priv *priv = dev_get_drvdata(dev);
+
+	rcar_thermal_irq_set(priv, false);
+
+	return 0;
+}
+
+static int __maybe_unused rcar_gen3_thermal_resume(struct device *dev)
+{
+	struct rcar_gen3_thermal_priv *priv = dev_get_drvdata(dev);
+	unsigned int i;
+
+	for (i = 0; i < priv->num_tscs; i++) {
+		struct rcar_gen3_thermal_tsc *tsc = priv->tscs[i];
+
+		priv->data->thermal_init(tsc);
+		rcar_gen3_thermal_set_trips(tsc, tsc->low, tsc->high);
+	}
+
+	rcar_thermal_irq_set(priv, true);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(rcar_gen3_thermal_pm_ops, rcar_gen3_thermal_suspend,
+			 rcar_gen3_thermal_resume);
+
 static struct platform_driver rcar_gen3_thermal_driver = {
 	.driver	= {
 		.name	= "rcar_gen3_thermal",
+		.pm = &rcar_gen3_thermal_pm_ops,
 		.of_match_table = rcar_gen3_thermal_dt_ids,
 	},
 	.probe		= rcar_gen3_thermal_probe,
-- 
cgit v1.2.3


From 1e2ac9821de6a85d3e8358f238436708d1d46869 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 31 Mar 2017 20:03:03 +0000
Subject: dt-bindings: Add thermal zone to bcm2835-thermal example

Add a thermal zone in order to make the example complete.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 .../bindings/thermal/brcm,bcm2835-thermal.txt      | 32 +++++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt b/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt
index 474531d2b2c5..da8c5b73ad10 100644
--- a/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt
@@ -3,15 +3,39 @@ Binding for Thermal Sensor driver for BCM2835 SoCs.
 Required parameters:
 -------------------
 
-compatible: 	should be one of: "brcm,bcm2835-thermal",
-		"brcm,bcm2836-thermal" or "brcm,bcm2837-thermal"
-reg:		Address range of the thermal registers.
-clocks: 	Phandle of the clock used by the thermal sensor.
+compatible: 		should be one of: "brcm,bcm2835-thermal",
+			"brcm,bcm2836-thermal" or "brcm,bcm2837-thermal"
+reg:			Address range of the thermal registers.
+clocks: 		Phandle of the clock used by the thermal sensor.
+#thermal-sensor-cells:	should be 0 (see thermal.txt)
 
 Example:
 
+thermal-zones {
+	cpu_thermal: cpu-thermal {
+		polling-delay-passive = <0>;
+		polling-delay = <1000>;
+
+		thermal-sensors = <&thermal>;
+
+		trips {
+			cpu-crit {
+				temperature	= <80000>;
+				hysteresis	= <0>;
+				type		= "critical";
+			};
+		};
+
+		coefficients = <(-538)	407000>;
+
+		cooling-maps {
+		};
+	};
+};
+
 thermal: thermal@7e212000 {
 	compatible = "brcm,bcm2835-thermal";
 	reg = <0x7e212000 0x8>;
 	clocks = <&clocks BCM2835_CLOCK_TSENS>;
+	#thermal-sensor-cells = <0>;
 };
-- 
cgit v1.2.3


From bcb7dd9ef206f7d646ed8dac6fe7772083714253 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 31 Mar 2017 20:03:06 +0000
Subject: thermal: bcm2835: add thermal driver for bcm2835 SoC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add basic thermal driver for bcm2835 SoC.

This driver currently make sure that tsense HW block is set up
correctly.

Tested-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Acked-by: Eric Anholt <eric@anholt.net>
Acked-by: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/Kconfig           |   8 +
 drivers/thermal/Makefile          |   1 +
 drivers/thermal/bcm2835_thermal.c | 314 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 323 insertions(+)
 create mode 100644 drivers/thermal/bcm2835_thermal.c

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 776b34396144..3bd24063375e 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -453,4 +453,12 @@ config ZX2967_THERMAL
 	  the primitive temperature sensor embedded in zx2967 SoCs.
 	  This sensor generates the real time die temperature.
 
+config BCM2835_THERMAL
+	tristate "Thermal sensors on bcm2835 SoC"
+	depends on ARCH_BCM2835 || COMPILE_TEST
+	depends on HAS_IOMEM
+	depends on THERMAL_OF
+	help
+	  Support for thermal sensors on Broadcom bcm2835 SoCs.
+
 endif
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 7adae2029355..f23cde05dac6 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -58,3 +58,4 @@ obj-$(CONFIG_HISI_THERMAL)     += hisi_thermal.o
 obj-$(CONFIG_MTK_THERMAL)	+= mtk_thermal.o
 obj-$(CONFIG_GENERIC_ADC_THERMAL)	+= thermal-generic-adc.o
 obj-$(CONFIG_ZX2967_THERMAL)	+= zx2967_thermal.o
+obj-$(CONFIG_BCM2835_THERMAL)	+= bcm2835_thermal.o
diff --git a/drivers/thermal/bcm2835_thermal.c b/drivers/thermal/bcm2835_thermal.c
new file mode 100644
index 000000000000..0ecf80890c84
--- /dev/null
+++ b/drivers/thermal/bcm2835_thermal.c
@@ -0,0 +1,314 @@
+/*
+ * Driver for Broadcom BCM2835 SoC temperature sensor
+ *
+ * Copyright (C) 2016 Martin Sperl
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#define BCM2835_TS_TSENSCTL			0x00
+#define BCM2835_TS_TSENSSTAT			0x04
+
+#define BCM2835_TS_TSENSCTL_PRWDW		BIT(0)
+#define BCM2835_TS_TSENSCTL_RSTB		BIT(1)
+
+/*
+ * bandgap reference voltage in 6 mV increments
+ * 000b = 1178 mV, 001b = 1184 mV, ... 111b = 1220 mV
+ */
+#define BCM2835_TS_TSENSCTL_CTRL_BITS		3
+#define BCM2835_TS_TSENSCTL_CTRL_SHIFT		2
+#define BCM2835_TS_TSENSCTL_CTRL_MASK		    \
+	GENMASK(BCM2835_TS_TSENSCTL_CTRL_BITS +     \
+		BCM2835_TS_TSENSCTL_CTRL_SHIFT - 1, \
+		BCM2835_TS_TSENSCTL_CTRL_SHIFT)
+#define BCM2835_TS_TSENSCTL_CTRL_DEFAULT	1
+#define BCM2835_TS_TSENSCTL_EN_INT		BIT(5)
+#define BCM2835_TS_TSENSCTL_DIRECT		BIT(6)
+#define BCM2835_TS_TSENSCTL_CLR_INT		BIT(7)
+#define BCM2835_TS_TSENSCTL_THOLD_SHIFT		8
+#define BCM2835_TS_TSENSCTL_THOLD_BITS		10
+#define BCM2835_TS_TSENSCTL_THOLD_MASK		     \
+	GENMASK(BCM2835_TS_TSENSCTL_THOLD_BITS +     \
+		BCM2835_TS_TSENSCTL_THOLD_SHIFT - 1, \
+		BCM2835_TS_TSENSCTL_THOLD_SHIFT)
+/*
+ * time how long the block to be asserted in reset
+ * which based on a clock counter (TSENS clock assumed)
+ */
+#define BCM2835_TS_TSENSCTL_RSTDELAY_SHIFT	18
+#define BCM2835_TS_TSENSCTL_RSTDELAY_BITS	8
+#define BCM2835_TS_TSENSCTL_REGULEN		BIT(26)
+
+#define BCM2835_TS_TSENSSTAT_DATA_BITS		10
+#define BCM2835_TS_TSENSSTAT_DATA_SHIFT		0
+#define BCM2835_TS_TSENSSTAT_DATA_MASK		     \
+	GENMASK(BCM2835_TS_TSENSSTAT_DATA_BITS +     \
+		BCM2835_TS_TSENSSTAT_DATA_SHIFT - 1, \
+		BCM2835_TS_TSENSSTAT_DATA_SHIFT)
+#define BCM2835_TS_TSENSSTAT_VALID		BIT(10)
+#define BCM2835_TS_TSENSSTAT_INTERRUPT		BIT(11)
+
+struct bcm2835_thermal_data {
+	struct thermal_zone_device *tz;
+	void __iomem *regs;
+	struct clk *clk;
+	struct dentry *debugfsdir;
+};
+
+static int bcm2835_thermal_adc2temp(u32 adc, int offset, int slope)
+{
+	return offset + slope * adc;
+}
+
+static int bcm2835_thermal_temp2adc(int temp, int offset, int slope)
+{
+	temp -= offset;
+	temp /= slope;
+
+	if (temp < 0)
+		temp = 0;
+	if (temp >= BIT(BCM2835_TS_TSENSSTAT_DATA_BITS))
+		temp = BIT(BCM2835_TS_TSENSSTAT_DATA_BITS) - 1;
+
+	return temp;
+}
+
+static int bcm2835_thermal_get_temp(void *d, int *temp)
+{
+	struct bcm2835_thermal_data *data = d;
+	u32 val = readl(data->regs + BCM2835_TS_TSENSSTAT);
+
+	if (!(val & BCM2835_TS_TSENSSTAT_VALID))
+		return -EIO;
+
+	val &= BCM2835_TS_TSENSSTAT_DATA_MASK;
+
+	*temp = bcm2835_thermal_adc2temp(
+		val,
+		thermal_zone_get_offset(data->tz),
+		thermal_zone_get_slope(data->tz));
+
+	return 0;
+}
+
+static const struct debugfs_reg32 bcm2835_thermal_regs[] = {
+	{
+		.name = "ctl",
+		.offset = 0
+	},
+	{
+		.name = "stat",
+		.offset = 4
+	}
+};
+
+static void bcm2835_thermal_debugfs(struct platform_device *pdev)
+{
+	struct thermal_zone_device *tz = platform_get_drvdata(pdev);
+	struct bcm2835_thermal_data *data = tz->devdata;
+	struct debugfs_regset32 *regset;
+
+	data->debugfsdir = debugfs_create_dir("bcm2835_thermal", NULL);
+	if (!data->debugfsdir)
+		return;
+
+	regset = devm_kzalloc(&pdev->dev, sizeof(*regset), GFP_KERNEL);
+	if (!regset)
+		return;
+
+	regset->regs = bcm2835_thermal_regs;
+	regset->nregs = ARRAY_SIZE(bcm2835_thermal_regs);
+	regset->base = data->regs;
+
+	debugfs_create_regset32("regset", 0444, data->debugfsdir, regset);
+}
+
+static struct thermal_zone_of_device_ops bcm2835_thermal_ops = {
+	.get_temp = bcm2835_thermal_get_temp,
+};
+
+/*
+ * Note: as per Raspberry Foundation FAQ
+ * (https://www.raspberrypi.org/help/faqs/#performanceOperatingTemperature)
+ * the recommended temperature range for the SoC -40C to +85C
+ * so the trip limit is set to 80C.
+ * this applies to all the BCM283X SoC
+ */
+
+static const struct of_device_id bcm2835_thermal_of_match_table[] = {
+	{
+		.compatible = "brcm,bcm2835-thermal",
+	},
+	{
+		.compatible = "brcm,bcm2836-thermal",
+	},
+	{
+		.compatible = "brcm,bcm2837-thermal",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, bcm2835_thermal_of_match_table);
+
+static int bcm2835_thermal_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct thermal_zone_device *tz;
+	struct bcm2835_thermal_data *data;
+	struct resource *res;
+	int err = 0;
+	u32 val;
+	unsigned long rate;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	match = of_match_device(bcm2835_thermal_of_match_table,
+				&pdev->dev);
+	if (!match)
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(data->regs)) {
+		err = PTR_ERR(data->regs);
+		dev_err(&pdev->dev, "Could not get registers: %d\n", err);
+		return err;
+	}
+
+	data->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(data->clk)) {
+		err = PTR_ERR(data->clk);
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "Could not get clk: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(data->clk);
+	if (err)
+		return err;
+
+	rate = clk_get_rate(data->clk);
+	if ((rate < 1920000) || (rate > 5000000))
+		dev_warn(&pdev->dev,
+			 "Clock %pCn running at %pCr Hz is outside of the recommended range: 1.92 to 5MHz\n",
+			 data->clk, data->clk);
+
+	/* register of thermal sensor and get info from DT */
+	tz = thermal_zone_of_sensor_register(&pdev->dev, 0, data,
+					     &bcm2835_thermal_ops);
+	if (IS_ERR(tz)) {
+		err = PTR_ERR(tz);
+		dev_err(&pdev->dev,
+			"Failed to register the thermal device: %d\n",
+			err);
+		goto err_clk;
+	}
+
+	/*
+	 * right now the FW does set up the HW-block, so we are not
+	 * touching the configuration registers.
+	 * But if the HW is not enabled, then set it up
+	 * using "sane" values used by the firmware right now.
+	 */
+	val = readl(data->regs + BCM2835_TS_TSENSCTL);
+	if (!(val & BCM2835_TS_TSENSCTL_RSTB)) {
+		int trip_temp, offset, slope;
+
+		slope = thermal_zone_get_slope(tz);
+		offset = thermal_zone_get_offset(tz);
+		/*
+		 * For now we deal only with critical, otherwise
+		 * would need to iterate
+		 */
+		err = tz->ops->get_trip_temp(tz, 0, &trip_temp);
+		if (err < 0) {
+			err = PTR_ERR(tz);
+			dev_err(&pdev->dev,
+				"Not able to read trip_temp: %d\n",
+				err);
+			goto err_tz;
+		}
+
+		/* set bandgap reference voltage and enable voltage regulator */
+		val = (BCM2835_TS_TSENSCTL_CTRL_DEFAULT <<
+		       BCM2835_TS_TSENSCTL_CTRL_SHIFT) |
+		      BCM2835_TS_TSENSCTL_REGULEN;
+
+		/* use the recommended reset duration */
+		val |= (0xFE << BCM2835_TS_TSENSCTL_RSTDELAY_SHIFT);
+
+		/*  trip_adc value from info */
+		val |= bcm2835_thermal_temp2adc(trip_temp,
+						offset,
+						slope)
+			<< BCM2835_TS_TSENSCTL_THOLD_SHIFT;
+
+		/* write the value back to the register as 2 steps */
+		writel(val, data->regs + BCM2835_TS_TSENSCTL);
+		val |= BCM2835_TS_TSENSCTL_RSTB;
+		writel(val, data->regs + BCM2835_TS_TSENSCTL);
+	}
+
+	data->tz = tz;
+
+	platform_set_drvdata(pdev, tz);
+
+	bcm2835_thermal_debugfs(pdev);
+
+	return 0;
+err_tz:
+	thermal_zone_of_sensor_unregister(&pdev->dev, tz);
+err_clk:
+	clk_disable_unprepare(data->clk);
+
+	return err;
+}
+
+static int bcm2835_thermal_remove(struct platform_device *pdev)
+{
+	struct thermal_zone_device *tz = platform_get_drvdata(pdev);
+	struct bcm2835_thermal_data *data = tz->devdata;
+
+	debugfs_remove_recursive(data->debugfsdir);
+	thermal_zone_of_sensor_unregister(&pdev->dev, tz);
+	clk_disable_unprepare(data->clk);
+
+	return 0;
+}
+
+static struct platform_driver bcm2835_thermal_driver = {
+	.probe = bcm2835_thermal_probe,
+	.remove = bcm2835_thermal_remove,
+	.driver = {
+		.name = "bcm2835_thermal",
+		.of_match_table = bcm2835_thermal_of_match_table,
+	},
+};
+module_platform_driver(bcm2835_thermal_driver);
+
+MODULE_AUTHOR("Martin Sperl");
+MODULE_DESCRIPTION("Thermal driver for bcm2835 chip");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 842fe519f68b4d17ba53c66d69f22a72b1ad08cf Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 31 Mar 2017 15:46:05 +0100
Subject: iommu/dma: Convert to address-based allocation

In preparation for some IOVA allocation improvements, clean up all the
explicit struct iova usage such that all our mapping, unmapping and
cleanup paths deal exclusively with addresses rather than implementation
details. In the process, a few of the things we're touching get renamed
for the sake of internal consistency.

Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
Tested-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 119 ++++++++++++++++++++++++++--------------------
 1 file changed, 67 insertions(+), 52 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 85652110c8ff..8e0b684da1ba 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -365,12 +365,12 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
 	}
 }
 
-static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
-		dma_addr_t dma_limit, struct device *dev)
+static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
+		size_t size, dma_addr_t dma_limit, struct device *dev)
 {
 	struct iova_domain *iovad = cookie_iovad(domain);
 	unsigned long shift = iova_shift(iovad);
-	unsigned long length = iova_align(iovad, size) >> shift;
+	unsigned long iova_len = size >> shift;
 	struct iova *iova = NULL;
 
 	if (domain->geometry.force_aperture)
@@ -378,35 +378,42 @@ static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
 
 	/* Try to get PCI devices a SAC address */
 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
-		iova = alloc_iova(iovad, length, DMA_BIT_MASK(32) >> shift,
+		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
 				  true);
 	/*
 	 * Enforce size-alignment to be safe - there could perhaps be an
 	 * attribute to control this per-device, or at least per-domain...
 	 */
 	if (!iova)
-		iova = alloc_iova(iovad, length, dma_limit >> shift, true);
+		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
 
-	return iova;
+	return (dma_addr_t)iova->pfn_lo << shift;
 }
 
-/* The IOVA allocator knows what we mapped, so just unmap whatever that was */
-static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
+static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
+		dma_addr_t iova, size_t size)
 {
-	struct iova_domain *iovad = cookie_iovad(domain);
-	unsigned long shift = iova_shift(iovad);
-	unsigned long pfn = dma_addr >> shift;
-	struct iova *iova = find_iova(iovad, pfn);
-	size_t size;
+	struct iova_domain *iovad = &cookie->iovad;
+	struct iova *iova_rbnode;
 
-	if (WARN_ON(!iova))
+	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
+	if (WARN_ON(!iova_rbnode))
 		return;
 
-	size = iova_size(iova) << shift;
-	size -= iommu_unmap(domain, pfn << shift, size);
-	/* ...and if we can't, then something is horribly, horribly wrong */
-	WARN_ON(size > 0);
-	__free_iova(iovad, iova);
+	__free_iova(iovad, iova_rbnode);
+}
+
+static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
+		size_t size)
+{
+	struct iova_domain *iovad = cookie_iovad(domain);
+	size_t iova_off = iova_offset(iovad, dma_addr);
+
+	dma_addr -= iova_off;
+	size = iova_align(iovad, size + iova_off);
+
+	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
+	iommu_dma_free_iova(domain->iova_cookie, dma_addr, size);
 }
 
 static void __iommu_dma_free_pages(struct page **pages, int count)
@@ -488,7 +495,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
 void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
 		dma_addr_t *handle)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
 	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
 	*handle = DMA_ERROR_CODE;
 }
@@ -516,11 +523,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 		void (*flush_page)(struct device *, const void *, phys_addr_t))
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = cookie_iovad(domain);
-	struct iova *iova;
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	struct page **pages;
 	struct sg_table sgt;
-	dma_addr_t dma_addr;
+	dma_addr_t iova;
 	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 
 	*handle = DMA_ERROR_CODE;
@@ -540,11 +547,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 	if (!pages)
 		return NULL;
 
-	iova = __alloc_iova(domain, size, dev->coherent_dma_mask, dev);
+	size = iova_align(iovad, size);
+	iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
 	if (!iova)
 		goto out_free_pages;
 
-	size = iova_align(iovad, size);
 	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
 		goto out_free_iova;
 
@@ -560,19 +567,18 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 		sg_miter_stop(&miter);
 	}
 
-	dma_addr = iova_dma_addr(iovad, iova);
-	if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot)
+	if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot)
 			< size)
 		goto out_free_sg;
 
-	*handle = dma_addr;
+	*handle = iova;
 	sg_free_table(&sgt);
 	return pages;
 
 out_free_sg:
 	sg_free_table(&sgt);
 out_free_iova:
-	__free_iova(iovad, iova);
+	iommu_dma_free_iova(cookie, iova, size);
 out_free_pages:
 	__iommu_dma_free_pages(pages, count);
 	return NULL;
@@ -606,22 +612,22 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 		size_t size, int prot)
 {
-	dma_addr_t dma_addr;
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = cookie_iovad(domain);
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	size_t iova_off = iova_offset(iovad, phys);
-	size_t len = iova_align(iovad, size + iova_off);
-	struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev), dev);
+	dma_addr_t iova;
 
+	size = iova_align(iovad, size + iova_off);
+	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
 	if (!iova)
 		return DMA_ERROR_CODE;
 
-	dma_addr = iova_dma_addr(iovad, iova);
-	if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) {
-		__free_iova(iovad, iova);
+	if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
+		iommu_dma_free_iova(cookie, iova, size);
 		return DMA_ERROR_CODE;
 	}
-	return dma_addr + iova_off;
+	return iova + iova_off;
 }
 
 dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
@@ -633,7 +639,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
 }
 
 /*
@@ -722,10 +728,10 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		int nents, int prot)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = cookie_iovad(domain);
-	struct iova *iova;
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	struct scatterlist *s, *prev = NULL;
-	dma_addr_t dma_addr;
+	dma_addr_t iova;
 	size_t iova_len = 0;
 	unsigned long mask = dma_get_seg_boundary(dev);
 	int i;
@@ -769,7 +775,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		prev = s;
 	}
 
-	iova = __alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
+	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
 	if (!iova)
 		goto out_restore_sg;
 
@@ -777,14 +783,13 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	 * We'll leave any physical concatenation to the IOMMU driver's
 	 * implementation - it knows better than we do.
 	 */
-	dma_addr = iova_dma_addr(iovad, iova);
-	if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len)
+	if (iommu_map_sg(domain, iova, sg, nents, prot) < iova_len)
 		goto out_free_iova;
 
-	return __finalise_sg(dev, sg, nents, dma_addr);
+	return __finalise_sg(dev, sg, nents, iova);
 
 out_free_iova:
-	__free_iova(iovad, iova);
+	iommu_dma_free_iova(cookie, iova, iova_len);
 out_restore_sg:
 	__invalidate_sg(sg, nents);
 	return 0;
@@ -793,11 +798,21 @@ out_restore_sg:
 void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 		enum dma_data_direction dir, unsigned long attrs)
 {
+	dma_addr_t start, end;
+	struct scatterlist *tmp;
+	int i;
 	/*
 	 * The scatterlist segments are mapped into a single
 	 * contiguous IOVA allocation, so this is incredibly easy.
 	 */
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
+	start = sg_dma_address(sg);
+	for_each_sg(sg_next(sg), tmp, nents - 1, i) {
+		if (sg_dma_len(tmp) == 0)
+			break;
+		sg = tmp;
+	}
+	end = sg_dma_address(sg) + sg_dma_len(sg);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start);
 }
 
 dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
@@ -810,7 +825,7 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
 void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
 }
 
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -824,7 +839,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iommu_dma_msi_page *msi_page;
 	struct iova_domain *iovad = cookie_iovad(domain);
-	struct iova *iova;
+	dma_addr_t iova;
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 	size_t size = cookie_msi_granule(cookie);
 
@@ -839,10 +854,10 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 
 	msi_page->phys = msi_addr;
 	if (iovad) {
-		iova = __alloc_iova(domain, size, dma_get_mask(dev), dev);
+		iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
 		if (!iova)
 			goto out_free_page;
-		msi_page->iova = iova_dma_addr(iovad, iova);
+		msi_page->iova = iova;
 	} else {
 		msi_page->iova = cookie->msi_iova;
 		cookie->msi_iova += size;
@@ -857,7 +872,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 
 out_free_iova:
 	if (iovad)
-		__free_iova(iovad, iova);
+		iommu_dma_free_iova(cookie, iova, size);
 	else
 		cookie->msi_iova -= size;
 out_free_page:
-- 
cgit v1.2.3


From a44e6657585b15eeebf5681bfcc7ce0b002429c2 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 31 Mar 2017 15:46:06 +0100
Subject: iommu/dma: Clean up MSI IOVA allocation

Now that allocation is suitably abstracted, our private alloc/free
helpers can drive the trivial MSI cookie allocator directly as well,
which lets us clean up its exposed guts from iommu_dma_map_msi_msg() and
simplify things quite a bit.

Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
Tested-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 58 ++++++++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 33 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8e0b684da1ba..1b94beb43036 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -61,15 +61,6 @@ static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
 	return PAGE_SIZE;
 }
 
-static inline struct iova_domain *cookie_iovad(struct iommu_domain *domain)
-{
-	struct iommu_dma_cookie *cookie = domain->iova_cookie;
-
-	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
-		return &cookie->iovad;
-	return NULL;
-}
-
 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
 {
 	struct iommu_dma_cookie *cookie;
@@ -368,11 +359,19 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 		size_t size, dma_addr_t dma_limit, struct device *dev)
 {
-	struct iova_domain *iovad = cookie_iovad(domain);
-	unsigned long shift = iova_shift(iovad);
-	unsigned long iova_len = size >> shift;
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
+	unsigned long shift, iova_len;
 	struct iova *iova = NULL;
 
+	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
+		cookie->msi_iova += size;
+		return cookie->msi_iova - size;
+	}
+
+	shift = iova_shift(iovad);
+	iova_len = size >> shift;
+
 	if (domain->geometry.force_aperture)
 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
 
@@ -396,6 +395,12 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 	struct iova_domain *iovad = &cookie->iovad;
 	struct iova *iova_rbnode;
 
+	/* The MSI case is only ever cleaning up its most recent allocation */
+	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
+		cookie->msi_iova -= size;
+		return;
+	}
+
 	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
 	if (WARN_ON(!iova_rbnode))
 		return;
@@ -406,14 +411,15 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
 		size_t size)
 {
-	struct iova_domain *iovad = cookie_iovad(domain);
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iova_domain *iovad = &cookie->iovad;
 	size_t iova_off = iova_offset(iovad, dma_addr);
 
 	dma_addr -= iova_off;
 	size = iova_align(iovad, size + iova_off);
 
 	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
-	iommu_dma_free_iova(domain->iova_cookie, dma_addr, size);
+	iommu_dma_free_iova(cookie, dma_addr, size);
 }
 
 static void __iommu_dma_free_pages(struct page **pages, int count)
@@ -838,7 +844,6 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 {
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iommu_dma_msi_page *msi_page;
-	struct iova_domain *iovad = cookie_iovad(domain);
 	dma_addr_t iova;
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 	size_t size = cookie_msi_granule(cookie);
@@ -852,29 +857,16 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 	if (!msi_page)
 		return NULL;
 
-	msi_page->phys = msi_addr;
-	if (iovad) {
-		iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
-		if (!iova)
-			goto out_free_page;
-		msi_page->iova = iova;
-	} else {
-		msi_page->iova = cookie->msi_iova;
-		cookie->msi_iova += size;
-	}
-
-	if (iommu_map(domain, msi_page->iova, msi_addr, size, prot))
-		goto out_free_iova;
+	iova = __iommu_dma_map(dev, msi_addr, size, prot);
+	if (iommu_dma_mapping_error(dev, iova))
+		goto out_free_page;
 
 	INIT_LIST_HEAD(&msi_page->list);
+	msi_page->phys = msi_addr;
+	msi_page->iova = iova;
 	list_add(&msi_page->list, &cookie->msi_page_list);
 	return msi_page;
 
-out_free_iova:
-	if (iovad)
-		iommu_dma_free_iova(cookie, iova, size);
-	else
-		cookie->msi_iova -= size;
 out_free_page:
 	kfree(msi_page);
 	return NULL;
-- 
cgit v1.2.3


From bb65a64c7285e7105c1a6c8a33b37770343a4e96 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 31 Mar 2017 15:46:07 +0100
Subject: iommu/dma: Plumb in the per-CPU IOVA caches

With IOVA allocation suitably tidied up, we are finally free to opt in
to the per-CPU caching mechanism. The caching alone can provide a modest
improvement over walking the rbtree for weedier systems (iperf3 shows
~10% more ethernet throughput on an ARM Juno r1 constrained to a single
650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
lock contention which larger ARM-based systems with lots of parallel I/O
are starting to feel the pain of.

Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
Tested-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1b94beb43036..8348f366ddd1 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 {
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
-	unsigned long shift, iova_len;
-	struct iova *iova = NULL;
+	unsigned long shift, iova_len, iova = 0;
 
 	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
 		cookie->msi_iova += size;
@@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 
 	shift = iova_shift(iovad);
 	iova_len = size >> shift;
+	/*
+	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
+	 * will come back to bite us badly, so we have to waste a bit of space
+	 * rounding up anything cacheable to make sure that can't happen. The
+	 * order of the unadjusted size will still match upon freeing.
+	 */
+	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
+		iova_len = roundup_pow_of_two(iova_len);
 
 	if (domain->geometry.force_aperture)
 		dma_limit = min(dma_limit, domain->geometry.aperture_end);
 
 	/* Try to get PCI devices a SAC address */
 	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
-		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
-				  true);
-	/*
-	 * Enforce size-alignment to be safe - there could perhaps be an
-	 * attribute to control this per-device, or at least per-domain...
-	 */
+		iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);
+
 	if (!iova)
-		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
+		iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
 
-	return (dma_addr_t)iova->pfn_lo << shift;
+	return (dma_addr_t)iova << shift;
 }
 
 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 		dma_addr_t iova, size_t size)
 {
 	struct iova_domain *iovad = &cookie->iovad;
-	struct iova *iova_rbnode;
+	unsigned long shift = iova_shift(iovad);
 
 	/* The MSI case is only ever cleaning up its most recent allocation */
-	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
+	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
 		cookie->msi_iova -= size;
-		return;
-	}
-
-	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
-	if (WARN_ON(!iova_rbnode))
-		return;
-
-	__free_iova(iovad, iova_rbnode);
+	else
+		free_iova_fast(iovad, iova >> shift, size >> shift);
 }
 
 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
-- 
cgit v1.2.3


From c9d9f2394c6a953585874a1a6cb2ecea853fdcf2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 31 Mar 2017 16:26:03 +0200
Subject: iommu/rockchip: Make use of 'struct iommu_device'

Register hardware IOMMUs seperatly with the iommu-core code
and add a sysfs representation of the iommu topology.

Tested-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/rockchip-iommu.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 9afcbf79f0b0..36d089025f4c 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -90,6 +90,7 @@ struct rk_iommu {
 	void __iomem **bases;
 	int num_mmu;
 	int irq;
+	struct iommu_device iommu;
 	struct list_head node; /* entry in rk_iommu_domain.iommus */
 	struct iommu_domain *domain; /* domain to which iommu is attached */
 };
@@ -1032,6 +1033,7 @@ static int rk_iommu_group_set_iommudata(struct iommu_group *group,
 static int rk_iommu_add_device(struct device *dev)
 {
 	struct iommu_group *group;
+	struct rk_iommu *iommu;
 	int ret;
 
 	if (!rk_iommu_is_dev_iommu_master(dev))
@@ -1054,6 +1056,10 @@ static int rk_iommu_add_device(struct device *dev)
 	if (ret)
 		goto err_remove_device;
 
+	iommu = rk_iommu_from_dev(dev);
+	if (iommu)
+		iommu_device_link(&iommu->iommu, dev);
+
 	iommu_group_put(group);
 
 	return 0;
@@ -1067,9 +1073,15 @@ err_put_group:
 
 static void rk_iommu_remove_device(struct device *dev)
 {
+	struct rk_iommu *iommu;
+
 	if (!rk_iommu_is_dev_iommu_master(dev))
 		return;
 
+	iommu = rk_iommu_from_dev(dev);
+	if (iommu)
+		iommu_device_unlink(&iommu->iommu, dev);
+
 	iommu_group_remove_device(dev);
 }
 
@@ -1117,7 +1129,7 @@ static int rk_iommu_probe(struct platform_device *pdev)
 	struct rk_iommu *iommu;
 	struct resource *res;
 	int num_res = pdev->num_resources;
-	int i;
+	int err, i;
 
 	iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
 	if (!iommu)
@@ -1150,11 +1162,25 @@ static int rk_iommu_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	return 0;
+	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+	if (err)
+		return err;
+
+	iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops);
+	err = iommu_device_register(&iommu->iommu);
+
+	return err;
 }
 
 static int rk_iommu_remove(struct platform_device *pdev)
 {
+	struct rk_iommu *iommu = platform_get_drvdata(pdev);
+
+	if (iommu) {
+		iommu_device_sysfs_remove(&iommu->iommu);
+		iommu_device_unregister(&iommu->iommu);
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 6f66ea099fc2f31d31d6cd39f3b13b23bdeb6196 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 31 Mar 2017 15:12:31 +0200
Subject: iommu/mediatek: Teach MTK-IOMMUv1 about 'struct iommu_device'

Make use of the iommu_device_register() interface.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu_v1.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 19e010083408..a27ef570c328 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -431,9 +431,10 @@ err_release_mapping:
 
 static int mtk_iommu_add_device(struct device *dev)
 {
-	struct iommu_group *group;
 	struct of_phandle_args iommu_spec;
 	struct of_phandle_iterator it;
+	struct mtk_iommu_data *data;
+	struct iommu_group *group;
 	int err;
 
 	of_for_each_phandle(&it, err, dev->of_node, "iommus",
@@ -450,6 +451,9 @@ static int mtk_iommu_add_device(struct device *dev)
 	if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
 		return -ENODEV; /* Not a iommu client device */
 
+	data = dev->iommu_fwspec->iommu_priv;
+	iommu_device_link(&data->iommu, dev);
+
 	group = iommu_group_get_for_dev(dev);
 	if (IS_ERR(group))
 		return PTR_ERR(group);
@@ -460,9 +464,14 @@ static int mtk_iommu_add_device(struct device *dev)
 
 static void mtk_iommu_remove_device(struct device *dev)
 {
+	struct mtk_iommu_data *data;
+
 	if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
 		return;
 
+	data = dev->iommu_fwspec->iommu_priv;
+	iommu_device_unlink(&data->iommu, dev);
+
 	iommu_group_remove_device(dev);
 	iommu_fwspec_free(dev);
 }
@@ -627,6 +636,17 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL,
+				     dev_name(&pdev->dev));
+	if (ret)
+		return ret;
+
+	iommu_device_set_ops(&data->iommu, &mtk_iommu_ops);
+
+	ret = iommu_device_register(&data->iommu);
+	if (ret)
+		return ret;
+
 	if (!iommu_present(&platform_bus_type))
 		bus_set_iommu(&platform_bus_type,  &mtk_iommu_ops);
 
@@ -637,6 +657,9 @@ static int mtk_iommu_remove(struct platform_device *pdev)
 {
 	struct mtk_iommu_data *data = platform_get_drvdata(pdev);
 
+	iommu_device_sysfs_remove(&data->iommu);
+	iommu_device_unregister(&data->iommu);
+
 	if (iommu_present(&platform_bus_type))
 		bus_set_iommu(&platform_bus_type, NULL);
 
-- 
cgit v1.2.3


From dab5aec64bf5907f65926675807e4ebe83b3b10e Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Wed, 29 Mar 2017 10:30:29 -0700
Subject: rtc: sh: add support for rza series

This same RTC is used in RZ/A series MPUs, therefore with some slight
changes, this driver can be reused. Additionally, since ARM architectures
require Device Tree configurations, device tree support has been added.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/Kconfig  |  4 ++--
 drivers/rtc/rtc-sh.c | 33 ++++++++++++++++++++++++++++-----
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 050bec749fae..8d3b95728326 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1303,10 +1303,10 @@ config RTC_DRV_SA1100
 
 config RTC_DRV_SH
 	tristate "SuperH On-Chip RTC"
-	depends on SUPERH && HAVE_CLK
+	depends on SUPERH || ARCH_RENESAS
 	help
 	  Say Y here to enable support for the on-chip RTC found in
-	  most SuperH processors.
+	  most SuperH processors. This RTC is also found in RZ/A SoCs.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rtc-sh.
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index c626e43a9cbb..00b396e96cbe 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -27,7 +27,15 @@
 #include <linux/log2.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
+#ifdef CONFIG_SUPERH
 #include <asm/rtc.h>
+#else
+/* Default values for RZ/A RTC */
+#define rtc_reg_size		sizeof(u16)
+#define RTC_BIT_INVERTED        0	/* no chip bugs */
+#define RTC_CAP_4_DIGIT_YEAR    (1 << 0)
+#define RTC_DEF_CAPABILITIES    RTC_CAP_4_DIGIT_YEAR
+#endif
 
 #define DRV_NAME	"sh-rtc"
 
@@ -570,6 +578,8 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
 	rtc->alarm_irq = platform_get_irq(pdev, 2);
 
 	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (!res)
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (unlikely(res == NULL)) {
 		dev_err(&pdev->dev, "No IO resource\n");
 		return -ENOENT;
@@ -587,12 +597,15 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
 	if (unlikely(!rtc->regbase))
 		return -EINVAL;
 
-	clk_id = pdev->id;
-	/* With a single device, the clock id is still "rtc0" */
-	if (clk_id < 0)
-		clk_id = 0;
+	if (!pdev->dev.of_node) {
+		clk_id = pdev->id;
+		/* With a single device, the clock id is still "rtc0" */
+		if (clk_id < 0)
+			clk_id = 0;
 
-	snprintf(clk_name, sizeof(clk_name), "rtc%d", clk_id);
+		snprintf(clk_name, sizeof(clk_name), "rtc%d", clk_id);
+	} else
+		snprintf(clk_name, sizeof(clk_name), "fck");
 
 	rtc->clk = devm_clk_get(&pdev->dev, clk_name);
 	if (IS_ERR(rtc->clk)) {
@@ -608,6 +621,8 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
 	clk_enable(rtc->clk);
 
 	rtc->capabilities = RTC_DEF_CAPABILITIES;
+
+#ifdef CONFIG_SUPERH
 	if (dev_get_platdata(&pdev->dev)) {
 		struct sh_rtc_platform_info *pinfo =
 			dev_get_platdata(&pdev->dev);
@@ -618,6 +633,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
 		 */
 		rtc->capabilities |= pinfo->capabilities;
 	}
+#endif
 
 	if (rtc->carry_irq <= 0) {
 		/* register shared periodic/carry/alarm irq */
@@ -738,10 +754,17 @@ static int sh_rtc_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume);
 
+static const struct of_device_id sh_rtc_of_match[] = {
+	{ .compatible = "renesas,sh-rtc", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sh_rtc_of_match);
+
 static struct platform_driver sh_rtc_platform_driver = {
 	.driver		= {
 		.name	= DRV_NAME,
 		.pm	= &sh_rtc_pm_ops,
+		.of_match_table = sh_rtc_of_match,
 	},
 	.remove		= __exit_p(sh_rtc_remove),
 };
-- 
cgit v1.2.3


From 04767b9fc2315c90a1e4bfdee883c0cbc122f30f Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Wed, 29 Mar 2017 10:30:30 -0700
Subject: dt-bindings: rtc: document the rtc-sh bindings

Add the binding documentation for rtc-sh which is an RTC for
SuperH and RZ/A SoCs.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 Documentation/devicetree/bindings/rtc/rtc-sh.txt | 28 ++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/rtc/rtc-sh.txt

diff --git a/Documentation/devicetree/bindings/rtc/rtc-sh.txt b/Documentation/devicetree/bindings/rtc/rtc-sh.txt
new file mode 100644
index 000000000000..7676c7d28874
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-sh.txt
@@ -0,0 +1,28 @@
+* Real Time Clock for Renesas SH and ARM SoCs
+
+Required properties:
+- compatible: Should be "renesas,r7s72100-rtc" and "renesas,sh-rtc" as a
+  fallback.
+- reg: physical base address and length of memory mapped region.
+- interrupts: 3 interrupts for alarm, period, and carry.
+- interrupt-names: The interrupts should be labeled as "alarm", "period", and
+  "carry".
+- clocks: The functional clock source for the RTC controller must be listed
+  first (if exists). Additionally, potential clock counting sources are to be
+  listed.
+- clock-names: The functional clock must be labeled as "fck". Other clocks
+  may be named in accordance to the SoC hardware manuals.
+
+
+Example:
+rtc: rtc@fcff1000 {
+	compatible = "renesas,r7s72100-rtc", "renesas,sh-rtc";
+	reg = <0xfcff1000 0x2e>;
+	interrupts = <GIC_SPI 276 IRQ_TYPE_EDGE_RISING
+		      GIC_SPI 277 IRQ_TYPE_EDGE_RISING
+		      GIC_SPI 278 IRQ_TYPE_EDGE_RISING>;
+	interrupt-names = "alarm", "period", "carry";
+	clocks = <&mstp6_clks R7S72100_CLK_RTC>, <&rtc_x1_clk>,
+		 <&rtc_x3_clk>, <&extal_clk>;
+	clock-names = "fck", "rtc_x1", "rtc_x3", "extal";
+};
-- 
cgit v1.2.3


From a63b6186f932e367133b271714d56fd8abaada7a Mon Sep 17 00:00:00 2001
From: Alex Frid <afrid@nvidia.com>
Date: Wed, 22 Mar 2017 15:38:19 +0200
Subject: clk: tegra: Propagate clk_out_x rate to parent

Given that externx can only be used as a parent for clk_out_x, it makes
sense to propagate requests to make clk_out_x easier to handle.

Signed-off-by: Alex Frid <afrid@nvidia.com>
Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra-pmc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/tegra/clk-tegra-pmc.c b/drivers/clk/tegra/clk-tegra-pmc.c
index 91377abfefa1..a35579a3f884 100644
--- a/drivers/clk/tegra/clk-tegra-pmc.c
+++ b/drivers/clk/tegra/clk-tegra-pmc.c
@@ -95,7 +95,8 @@ void __init tegra_pmc_clk_init(void __iomem *pmc_base,
 			continue;
 
 		clk = clk_register_mux(NULL, data->mux_name, data->parents,
-				data->num_parents, CLK_SET_RATE_NO_REPARENT,
+				data->num_parents,
+				CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT,
 				pmc_base + PMC_CLK_OUT_CNTRL, data->mux_shift,
 				3, 0, &clk_out_lock);
 		*dt_clk = clk;
@@ -106,7 +107,8 @@ void __init tegra_pmc_clk_init(void __iomem *pmc_base,
 			continue;
 
 		clk = clk_register_gate(NULL, data->gate_name, data->mux_name,
-					0, pmc_base + PMC_CLK_OUT_CNTRL,
+					CLK_SET_RATE_PARENT,
+					pmc_base + PMC_CLK_OUT_CNTRL,
 					data->gate_shift, 0, &clk_out_lock);
 		*dt_clk = clk;
 		clk_register_clkdev(clk, data->dev_name, data->gate_name);
-- 
cgit v1.2.3


From 88da44c5edb93d58f704fb9ea21962ca357d06a1 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Wed, 22 Mar 2017 16:23:16 +0200
Subject: clk: tegra: Add missing Tegra210 clocks

iqc1, iqc2, tegra_clk_pll_a_out_adsp, tegra_clk_pll_a_out0_out_adsp, adsp
and adsp neon were not modelled. dp2 wasn't modelled for Tegra210.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-id.h               |  6 ++++++
 drivers/clk/tegra/clk-tegra-periph.c     |  6 ++++++
 drivers/clk/tegra/clk-tegra210.c         |  7 +++++++
 include/dt-bindings/clock/tegra210-car.h | 16 ++++++++--------
 4 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h
index d3a79a8011ca..689f344377a7 100644
--- a/drivers/clk/tegra/clk-id.h
+++ b/drivers/clk/tegra/clk-id.h
@@ -318,6 +318,12 @@ enum clk_id {
 	tegra_clk_dmic1_sync_clk_mux,
 	tegra_clk_dmic2_sync_clk_mux,
 	tegra_clk_dmic3_sync_clk_mux,
+	tegra_clk_iqc1,
+	tegra_clk_iqc2,
+	tegra_clk_pll_a_out_adsp,
+	tegra_clk_pll_a_out0_out_adsp,
+	tegra_clk_adsp,
+	tegra_clk_adsp_neon,
 	tegra_clk_max,
 };
 
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index 9e6ac11ccf75..294bfe40a4f5 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -859,6 +859,12 @@ static struct tegra_periph_init_data gate_clks[] = {
 	GATE("pll_p_out_adsp", "pll_p", 187, 0, tegra_clk_pll_p_out_adsp, 0),
 	GATE("apb2ape", "clk_m", 107, 0, tegra_clk_apb2ape, 0),
 	GATE("cec", "pclk", 136, 0, tegra_clk_cec, 0),
+	GATE("iqc1", "clk_m", 221, 0, tegra_clk_iqc1, 0),
+	GATE("iqc2", "clk_m", 220, 0, tegra_clk_iqc1, 0),
+	GATE("pll_a_out_adsp", "pll_a", 188, 0, tegra_clk_pll_a_out_adsp, 0),
+	GATE("pll_a_out0_out_adsp", "pll_a", 188, 0, tegra_clk_pll_a_out0_out_adsp, 0),
+	GATE("adsp", "aclk", 199, 0, tegra_clk_adsp, 0),
+	GATE("adsp_neon", "aclk", 218, 0, tegra_clk_adsp_neon, 0),
 };
 
 static struct tegra_periph_init_data div_clks[] = {
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 9897dc55676b..cdf1101c40f8 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -2326,6 +2326,13 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_dmic1_sync_clk_mux] = { .dt_id = TEGRA210_CLK_DMIC1_SYNC_CLK_MUX, .present = true },
 	[tegra_clk_dmic2_sync_clk_mux] = { .dt_id = TEGRA210_CLK_DMIC2_SYNC_CLK_MUX, .present = true },
 	[tegra_clk_dmic3_sync_clk_mux] = { .dt_id = TEGRA210_CLK_DMIC3_SYNC_CLK_MUX, .present = true },
+	[tegra_clk_dp2] = { .dt_id = TEGRA210_CLK_DP2, .present = true },
+	[tegra_clk_iqc1] = { .dt_id = TEGRA210_CLK_IQC1, .present = true },
+	[tegra_clk_iqc2] = { .dt_id = TEGRA210_CLK_IQC2, .present = true },
+	[tegra_clk_pll_a_out_adsp] = { .dt_id = TEGRA210_CLK_PLL_A_OUT_ADSP, .present = true },
+	[tegra_clk_pll_a_out0_out_adsp] = { .dt_id = TEGRA210_CLK_PLL_A_OUT0_OUT_ADSP, .present = true },
+	[tegra_clk_adsp] = { .dt_id = TEGRA210_CLK_ADSP, .present = true },
+	[tegra_clk_adsp_neon] = { .dt_id = TEGRA210_CLK_ADSP_NEON, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
diff --git a/include/dt-bindings/clock/tegra210-car.h b/include/dt-bindings/clock/tegra210-car.h
index 8744b19cca3e..46689cd3750b 100644
--- a/include/dt-bindings/clock/tegra210-car.h
+++ b/include/dt-bindings/clock/tegra210-car.h
@@ -173,7 +173,7 @@
 #define TEGRA210_CLK_ENTROPY 149
 /* 150 */
 /* 151 */
-/* 152 */
+#define TEGRA210_CLK_DP2 152
 /* 153 */
 /* 154 */
 /* 155 (bit affects dfll_ref and dfll_soc) */
@@ -210,7 +210,7 @@
 #define TEGRA210_CLK_DBGAPB 185
 /* 186 */
 #define TEGRA210_CLK_PLL_P_OUT_ADSP 187
-/* 188 */
+/* 188 ((bit affects pll_a_out_adsp and pll_a_out0_out_adsp)*/
 #define TEGRA210_CLK_PLL_G_REF 189
 /* 190 */
 /* 191 */
@@ -222,7 +222,7 @@
 /* 196 */
 #define TEGRA210_CLK_DMIC3 197
 #define TEGRA210_CLK_APE 198
-/* 199 */
+#define TEGRA210_CLK_ADSP 199
 /* 200 */
 /* 201 */
 #define TEGRA210_CLK_MAUD 202
@@ -241,10 +241,10 @@
 /* 215 */
 /* 216 */
 /* 217 */
-/* 218 */
+#define TEGRA210_CLK_ADSP_NEON 218
 #define TEGRA210_CLK_NVENC 219
-/* 220 */
-/* 221 */
+#define TEGRA210_CLK_IQC2 220
+#define TEGRA210_CLK_IQC1 221
 #define TEGRA210_CLK_SOR_SAFE 222
 #define TEGRA210_CLK_PLL_P_OUT_CPU 223
 
@@ -350,8 +350,8 @@
 /* 320 */
 /* 321 */
 #define TEGRA210_CLK_ISP 322
-/* 323 */
-/* 324 */
+#define TEGRA210_CLK_PLL_A_OUT_ADSP 323
+#define TEGRA210_CLK_PLL_A_OUT0_OUT_ADSP 324
 /* 325 */
 /* 326 */
 /* 327 */
-- 
cgit v1.2.3


From 1116d5a7aff205b6d9879a6458788cac20d0cdf3 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Mon, 27 Mar 2017 12:01:05 +0100
Subject: clk: tegra: Don't reset PLL-CX if it is already enabled

Commit 8dce89a1c2cf ("clk: tegra: Don't warn for PLL defaults
unnecessarily") changed the tegra210_pllcx_set_defaults() function
causing the PLL to always be reset regardless of whether it is in-use.
This function was changed so that resetting of the PLL will only be
skipped if the PLL is enabled AND 'pllcx->params->defaults_set' is not
true. However, the 'pllcx->params->defaults_set' is always true and
hence, the PLL is now always reset. This causes the boot to fail on the
Tegra210 Smaug where the PLL is already enabled and in-use. Fix this by
only resetting the PLL if not in-use and only printing the warning that
the defaults are not set after we have checked the default settings.

Fixes: 8dce89a1c2cf ("clk: tegra: Don't warn for PLL defaults unnecessarily")
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Acked-By: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index cdf1101c40f8..1024e853ea65 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -555,12 +555,12 @@ static void tegra210_pllcx_set_defaults(const char *name,
 {
 	pllcx->params->defaults_set = true;
 
-	if (readl_relaxed(clk_base + pllcx->params->base_reg) &
-			PLL_ENABLE && !pllcx->params->defaults_set) {
+	if (readl_relaxed(clk_base + pllcx->params->base_reg) & PLL_ENABLE) {
 		/* PLL is ON: only check if defaults already set */
 		pllcx_check_defaults(pllcx->params);
-		pr_warn("%s already enabled. Postponing set full defaults\n",
-			name);
+		if (!pllcx->params->defaults_set)
+			pr_warn("%s already enabled. Postponing set full defaults\n",
+				name);
 		return;
 	}
 
-- 
cgit v1.2.3


From d4879bda912107b137e743b67cf063814191cca7 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.xyz>
Date: Tue, 4 Apr 2017 17:50:56 +0800
Subject: dt-bindings: update device tree binding for Allwinner PRCM CCUs

Many Allwinner SoCs after A31 have a CCU in PRCM block.

Give the ones on H3 and A64 compatible strings.

Signed-off-by: Icenowy Zheng <icenowy@aosc.xyz>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 Documentation/devicetree/bindings/clock/sunxi-ccu.txt | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
index 68512aa398a9..e9c5a1d9834a 100644
--- a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
@@ -7,9 +7,11 @@ Required properties :
 		- "allwinner,sun8i-a23-ccu"
 		- "allwinner,sun8i-a33-ccu"
 		- "allwinner,sun8i-h3-ccu"
+		- "allwinner,sun8i-h3-r-ccu"
 		- "allwinner,sun8i-v3s-ccu"
 		- "allwinner,sun9i-a80-ccu"
 		- "allwinner,sun50i-a64-ccu"
+		- "allwinner,sun50i-a64-r-ccu"
 		- "allwinner,sun50i-h5-ccu"
 
 - reg: Must contain the registers base address and length
@@ -20,7 +22,10 @@ Required properties :
 - #clock-cells : must contain 1
 - #reset-cells : must contain 1
 
-Example:
+For the PRCM CCUs on H3/A64, one more clock is needed:
+- "iosc": the SoC's internal frequency oscillator
+
+Example for generic CCU:
 ccu: clock@01c20000 {
 	compatible = "allwinner,sun8i-h3-ccu";
 	reg = <0x01c20000 0x400>;
@@ -29,3 +34,13 @@ ccu: clock@01c20000 {
 	#clock-cells = <1>;
 	#reset-cells = <1>;
 };
+
+Example for PRCM CCU:
+r_ccu: clock@01f01400 {
+	compatible = "allwinner,sun50i-a64-r-ccu";
+	reg = <0x01f01400 0x100>;
+	clocks = <&osc24M>, <&osc32k>, <&iosc>;
+	clock-names = "hosc", "losc", "iosc";
+	#clock-cells = <1>;
+	#reset-cells = <1>;
+};
-- 
cgit v1.2.3


From cdb8b80b60935515b86cfe534f69934b13937052 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.xyz>
Date: Tue, 4 Apr 2017 17:50:57 +0800
Subject: clk: sunxi-ng: add support for PRCM CCUs

SoCs after A31 has a clock controller module in the PRCM part.

Support the clock controller module on H3/5 and A64 now.

Signed-off-by: Icenowy Zheng <icenowy@aosc.xyz>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/Kconfig            |   6 +
 drivers/clk/sunxi-ng/Makefile           |   1 +
 drivers/clk/sunxi-ng/ccu-sun8i-r.c      | 213 ++++++++++++++++++++++++++++++++
 drivers/clk/sunxi-ng/ccu-sun8i-r.h      |  27 ++++
 include/dt-bindings/clock/sun8i-r-ccu.h |  59 +++++++++
 include/dt-bindings/reset/sun8i-r-ccu.h |  53 ++++++++
 6 files changed, 359 insertions(+)
 create mode 100644 drivers/clk/sunxi-ng/ccu-sun8i-r.c
 create mode 100644 drivers/clk/sunxi-ng/ccu-sun8i-r.h
 create mode 100644 include/dt-bindings/clock/sun8i-r-ccu.h
 create mode 100644 include/dt-bindings/reset/sun8i-r-ccu.h

diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index 213cf64e4fab..056752317aee 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -150,4 +150,10 @@ config SUN9I_A80_CCU
 	default MACH_SUN9I
 	depends on MACH_SUN9I || COMPILE_TEST
 
+config SUN8I_R_CCU
+	bool "Support for Allwinner SoCs' PRCM CCUs"
+	select SUNXI_CCU_DIV
+	select SUNXI_CCU_GATE
+	default MACH_SUN8I || (ARCH_SUNXI && ARM64)
+
 endif
diff --git a/drivers/clk/sunxi-ng/Makefile b/drivers/clk/sunxi-ng/Makefile
index 6feaac0c5600..0ec02fe14c50 100644
--- a/drivers/clk/sunxi-ng/Makefile
+++ b/drivers/clk/sunxi-ng/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SUN8I_A23_CCU)	+= ccu-sun8i-a23.o
 obj-$(CONFIG_SUN8I_A33_CCU)	+= ccu-sun8i-a33.o
 obj-$(CONFIG_SUN8I_H3_CCU)	+= ccu-sun8i-h3.o
 obj-$(CONFIG_SUN8I_V3S_CCU)	+= ccu-sun8i-v3s.o
+obj-$(CONFIG_SUN8I_R_CCU)	+= ccu-sun8i-r.o
 obj-$(CONFIG_SUN9I_A80_CCU)	+= ccu-sun9i-a80.o
 obj-$(CONFIG_SUN9I_A80_CCU)	+= ccu-sun9i-a80-de.o
 obj-$(CONFIG_SUN9I_A80_CCU)	+= ccu-sun9i-a80-usb.o
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r.c b/drivers/clk/sunxi-ng/ccu-sun8i-r.c
new file mode 100644
index 000000000000..0d027d53dbdf
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-r.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2016 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+
+#include "ccu_common.h"
+#include "ccu_reset.h"
+
+#include "ccu_div.h"
+#include "ccu_gate.h"
+#include "ccu_mp.h"
+#include "ccu_nm.h"
+
+#include "ccu-sun8i-r.h"
+
+static const char * const ar100_parents[] = { "osc32k", "osc24M",
+					     "pll-periph0", "iosc" };
+
+static struct ccu_div ar100_clk = {
+	.div		= _SUNXI_CCU_DIV_FLAGS(4, 2, CLK_DIVIDER_POWER_OF_TWO),
+
+	.mux		= {
+		.shift	= 16,
+		.width	= 2,
+
+		.variable_prediv	= {
+			.index	= 2,
+			.shift	= 8,
+			.width	= 5,
+		},
+	},
+
+	.common		= {
+		.reg		= 0x00,
+		.features	= CCU_FEATURE_VARIABLE_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("ar100",
+						      ar100_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static CLK_FIXED_FACTOR(ahb0_clk, "ahb0", "ar100", 1, 1, 0);
+
+static struct ccu_div apb0_clk = {
+	.div		= _SUNXI_CCU_DIV_FLAGS(0, 2, CLK_DIVIDER_POWER_OF_TWO),
+
+	.common		= {
+		.reg		= 0x0c,
+		.hw.init	= CLK_HW_INIT("apb0",
+					      "ahb0",
+					      &ccu_div_ops,
+					      0),
+	},
+};
+
+static SUNXI_CCU_GATE(apb0_pio_clk,	"apb0-pio",	"apb0",
+		      0x28, BIT(0), 0);
+static SUNXI_CCU_GATE(apb0_ir_clk,	"apb0-ir",	"apb0",
+		      0x28, BIT(1), 0);
+static SUNXI_CCU_GATE(apb0_timer_clk,	"apb0-timer",	"apb0",
+		      0x28, BIT(2), 0);
+static SUNXI_CCU_GATE(apb0_rsb_clk,	"apb0-rsb",	"apb0",
+		      0x28, BIT(3), 0);
+static SUNXI_CCU_GATE(apb0_uart_clk,	"apb0-uart",	"apb0",
+		      0x28, BIT(4), 0);
+static SUNXI_CCU_GATE(apb0_i2c_clk,	"apb0-i2c",	"apb0",
+		      0x28, BIT(6), 0);
+static SUNXI_CCU_GATE(apb0_twd_clk,	"apb0-twd",	"apb0",
+		      0x28, BIT(7), 0);
+
+static const char * const r_mod0_default_parents[] = { "osc32K", "osc24M" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(ir_clk, "ir",
+				  r_mod0_default_parents, 0x54,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static struct ccu_common *sun8i_h3_r_ccu_clks[] = {
+	&ar100_clk.common,
+	&apb0_clk.common,
+	&apb0_pio_clk.common,
+	&apb0_ir_clk.common,
+	&apb0_timer_clk.common,
+	&apb0_uart_clk.common,
+	&apb0_i2c_clk.common,
+	&apb0_twd_clk.common,
+	&ir_clk.common,
+};
+
+static struct ccu_common *sun50i_a64_r_ccu_clks[] = {
+	&ar100_clk.common,
+	&apb0_clk.common,
+	&apb0_pio_clk.common,
+	&apb0_ir_clk.common,
+	&apb0_timer_clk.common,
+	&apb0_rsb_clk.common,
+	&apb0_uart_clk.common,
+	&apb0_i2c_clk.common,
+	&apb0_twd_clk.common,
+	&ir_clk.common,
+};
+
+static struct clk_hw_onecell_data sun8i_h3_r_hw_clks = {
+	.hws	= {
+		[CLK_AR100]		= &ar100_clk.common.hw,
+		[CLK_AHB0]		= &ahb0_clk.hw,
+		[CLK_APB0]		= &apb0_clk.common.hw,
+		[CLK_APB0_PIO]		= &apb0_pio_clk.common.hw,
+		[CLK_APB0_IR]		= &apb0_ir_clk.common.hw,
+		[CLK_APB0_TIMER]	= &apb0_timer_clk.common.hw,
+		[CLK_APB0_UART]		= &apb0_uart_clk.common.hw,
+		[CLK_APB0_I2C]		= &apb0_i2c_clk.common.hw,
+		[CLK_APB0_TWD]		= &apb0_twd_clk.common.hw,
+		[CLK_IR]		= &ir_clk.common.hw,
+	},
+	.num	= CLK_NUMBER,
+};
+
+static struct clk_hw_onecell_data sun50i_a64_r_hw_clks = {
+	.hws	= {
+		[CLK_AR100]		= &ar100_clk.common.hw,
+		[CLK_AHB0]		= &ahb0_clk.hw,
+		[CLK_APB0]		= &apb0_clk.common.hw,
+		[CLK_APB0_PIO]		= &apb0_pio_clk.common.hw,
+		[CLK_APB0_IR]		= &apb0_ir_clk.common.hw,
+		[CLK_APB0_TIMER]	= &apb0_timer_clk.common.hw,
+		[CLK_APB0_RSB]		= &apb0_rsb_clk.common.hw,
+		[CLK_APB0_UART]		= &apb0_uart_clk.common.hw,
+		[CLK_APB0_I2C]		= &apb0_i2c_clk.common.hw,
+		[CLK_APB0_TWD]		= &apb0_twd_clk.common.hw,
+		[CLK_IR]		= &ir_clk.common.hw,
+	},
+	.num	= CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun8i_h3_r_ccu_resets[] = {
+	[RST_APB0_IR]		=  { 0xb0, BIT(1) },
+	[RST_APB0_TIMER]	=  { 0xb0, BIT(2) },
+	[RST_APB0_UART]		=  { 0xb0, BIT(4) },
+	[RST_APB0_I2C]		=  { 0xb0, BIT(6) },
+};
+
+static struct ccu_reset_map sun50i_a64_r_ccu_resets[] = {
+	[RST_APB0_IR]		=  { 0xb0, BIT(1) },
+	[RST_APB0_TIMER]	=  { 0xb0, BIT(2) },
+	[RST_APB0_RSB]		=  { 0xb0, BIT(3) },
+	[RST_APB0_UART]		=  { 0xb0, BIT(4) },
+	[RST_APB0_I2C]		=  { 0xb0, BIT(6) },
+};
+
+static const struct sunxi_ccu_desc sun8i_h3_r_ccu_desc = {
+	.ccu_clks	= sun8i_h3_r_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun8i_h3_r_ccu_clks),
+
+	.hw_clks	= &sun8i_h3_r_hw_clks,
+
+	.resets		= sun8i_h3_r_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun8i_h3_r_ccu_resets),
+};
+
+static const struct sunxi_ccu_desc sun50i_a64_r_ccu_desc = {
+	.ccu_clks	= sun50i_a64_r_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun50i_a64_r_ccu_clks),
+
+	.hw_clks	= &sun50i_a64_r_hw_clks,
+
+	.resets		= sun50i_a64_r_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun50i_a64_r_ccu_resets),
+};
+
+static void __init sunxi_r_ccu_init(struct device_node *node,
+				    const struct sunxi_ccu_desc *desc)
+{
+	void __iomem *reg;
+
+	reg = of_io_request_and_map(node, 0, of_node_full_name(node));
+	if (IS_ERR(reg)) {
+		pr_err("%s: Could not map the clock registers\n",
+		       of_node_full_name(node));
+		return;
+	}
+
+	sunxi_ccu_probe(node, reg, desc);
+}
+
+static void __init sun8i_h3_r_ccu_setup(struct device_node *node)
+{
+	sunxi_r_ccu_init(node, &sun8i_h3_r_ccu_desc);
+}
+CLK_OF_DECLARE(sun8i_h3_r_ccu, "allwinner,sun8i-h3-r-ccu",
+	       sun8i_h3_r_ccu_setup);
+
+static void __init sun50i_a64_r_ccu_setup(struct device_node *node)
+{
+	sunxi_r_ccu_init(node, &sun50i_a64_r_ccu_desc);
+}
+CLK_OF_DECLARE(sun50i_a64_r_ccu, "allwinner,sun50i-a64-r-ccu",
+	       sun50i_a64_r_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r.h b/drivers/clk/sunxi-ng/ccu-sun8i-r.h
new file mode 100644
index 000000000000..eaa431fd1d8f
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-r.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2016 Icenowy <icenowy@aosc.xyz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CCU_SUN8I_R_H
+#define _CCU_SUN8I_R_H_
+
+#include <dt-bindings/clock/sun8i-r-ccu.h>
+#include <dt-bindings/reset/sun8i-r-ccu.h>
+
+/* AHB/APB bus clocks are not exported */
+#define CLK_AHB0	1
+#define CLK_APB0	2
+
+#define CLK_NUMBER	(CLK_APB0_TWD + 1)
+
+#endif /* _CCU_SUN8I_R_H */
diff --git a/include/dt-bindings/clock/sun8i-r-ccu.h b/include/dt-bindings/clock/sun8i-r-ccu.h
new file mode 100644
index 000000000000..779d20aa0d05
--- /dev/null
+++ b/include/dt-bindings/clock/sun8i-r-ccu.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_CLK_SUN8I_R_CCU_H_
+#define _DT_BINDINGS_CLK_SUN8I_R_CCU_H_
+
+#define CLK_AR100		0
+
+#define CLK_APB0_PIO		3
+#define CLK_APB0_IR		4
+#define CLK_APB0_TIMER		5
+#define CLK_APB0_RSB		6
+#define CLK_APB0_UART		7
+/* 8 is reserved for CLK_APB0_W1 on A31 */
+#define CLK_APB0_I2C		9
+#define CLK_APB0_TWD		10
+
+#define CLK_IR			11
+
+#endif /* _DT_BINDINGS_CLK_SUN8I_R_CCU_H_ */
diff --git a/include/dt-bindings/reset/sun8i-r-ccu.h b/include/dt-bindings/reset/sun8i-r-ccu.h
new file mode 100644
index 000000000000..4ba64f3d6fc9
--- /dev/null
+++ b/include/dt-bindings/reset/sun8i-r-ccu.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2016 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_RST_SUN8I_R_CCU_H_
+#define _DT_BINDINGS_RST_SUN8I_R_CCU_H_
+
+#define RST_APB0_IR		0
+#define RST_APB0_TIMER		1
+#define RST_APB0_RSB		2
+#define RST_APB0_UART		3
+/* 4 is reserved for RST_APB0_W1 on A31 */
+#define RST_APB0_I2C		5
+
+#endif /* _DT_BINDINGS_RST_SUN8I_R_CCU_H_ */
-- 
cgit v1.2.3


From bf616d21f41174389c6d720ae21bf40f154474c8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:21 +0100
Subject: Annotate module params that specify hardware parameters (eg. ioport)

Provided an annotation for module parameters that specify hardware
parameters (such as io ports, iomem addresses, irqs, dma channels, fixed
dma buffers and other types).

This will enable such parameters to be locked down in the core parameter
parser for secure boot support.

I've also included annotations as to what sort of hardware configuration
each module is dealing with for future use.  Some of these are
straightforward (ioport, iomem, irq, dma), but there are also:

 (1) drivers that switch the semantics of a parameter between ioport and
     iomem depending on a second parameter,

 (2) drivers that appear to reserve a CPU memory buffer at a fixed address,

 (3) other parameters, such as bus types and irq selection bitmasks.

For the moment, the hardware configuration type isn't actually stored,
though its validity is checked.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/moduleparam.h | 65 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 52666d90ca94..6be1949ebcdf 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -60,9 +60,11 @@ struct kernel_param_ops {
  * Flags available for kernel_param
  *
  * UNSAFE - the parameter is dangerous and setting it will taint the kernel
+ * HWPARAM - Hardware param not permitted in lockdown mode
  */
 enum {
-	KERNEL_PARAM_FL_UNSAFE = (1 << 0)
+	KERNEL_PARAM_FL_UNSAFE	= (1 << 0),
+	KERNEL_PARAM_FL_HWPARAM	= (1 << 1),
 };
 
 struct kernel_param {
@@ -451,6 +453,67 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp);
 			    perm, -1, 0);				\
 	__MODULE_PARM_TYPE(name, "array of " #type)
 
+enum hwparam_type {
+	hwparam_ioport,		/* Module parameter configures an I/O port */
+	hwparam_iomem,		/* Module parameter configures an I/O mem address */
+	hwparam_ioport_or_iomem, /* Module parameter could be either, depending on other option */
+	hwparam_irq,		/* Module parameter configures an I/O port */
+	hwparam_dma,		/* Module parameter configures a DMA channel */
+	hwparam_dma_addr,	/* Module parameter configures a DMA buffer address */
+	hwparam_other,		/* Module parameter configures some other value */
+};
+
+/**
+ * module_param_hw_named - A parameter representing a hw parameters
+ * @name: a valid C identifier which is the parameter name.
+ * @value: the actual lvalue to alter.
+ * @type: the type of the parameter
+ * @hwtype: what the value represents (enum hwparam_type)
+ * @perm: visibility in sysfs.
+ *
+ * Usually it's a good idea to have variable names and user-exposed names the
+ * same, but that's harder if the variable must be non-static or is inside a
+ * structure.  This allows exposure under a different name.
+ */
+#define module_param_hw_named(name, value, type, hwtype, perm)		\
+	param_check_##type(name, &(value));				\
+	__module_param_call(MODULE_PARAM_PREFIX, name,			\
+			    &param_ops_##type, &value,			\
+			    perm, -1,					\
+			    KERNEL_PARAM_FL_HWPARAM | (hwparam_##hwtype & 0));	\
+	__MODULE_PARM_TYPE(name, #type)
+
+#define module_param_hw(name, type, hwtype, perm)		\
+	module_param_hw_named(name, name, type, hwtype, perm)
+
+/**
+ * module_param_hw_array - A parameter representing an array of hw parameters
+ * @name: the name of the array variable
+ * @type: the type, as per module_param()
+ * @hwtype: what the value represents (enum hwparam_type)
+ * @nump: optional pointer filled in with the number written
+ * @perm: visibility in sysfs
+ *
+ * Input and output are as comma-separated values.  Commas inside values
+ * don't work properly (eg. an array of charp).
+ *
+ * ARRAY_SIZE(@name) is used to determine the number of elements in the
+ * array, so the definition must be visible.
+ */
+#define module_param_hw_array(name, type, hwtype, nump, perm)		\
+	param_check_##type(name, &(name)[0]);				\
+	static const struct kparam_array __param_arr_##name		\
+	= { .max = ARRAY_SIZE(name), .num = nump,			\
+	    .ops = &param_ops_##type,					\
+	    .elemsize = sizeof(name[0]), .elem = name };		\
+	__module_param_call(MODULE_PARAM_PREFIX, name,			\
+			    &param_array_ops,				\
+			    .arr = &__param_arr_##name,			\
+			    perm, -1,					\
+			    KERNEL_PARAM_FL_HWPARAM | (hwparam_##hwtype & 0));	\
+	__MODULE_PARM_TYPE(name, "array of " #type)
+
+
 extern const struct kernel_param_ops param_array_ops;
 
 extern const struct kernel_param_ops param_ops_string;
-- 
cgit v1.2.3


From 3c2e2e6816930e25c755f2e4fc298a0d05d223cf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:21 +0100
Subject: Annotate hardware config module parameters in arch/x86/mm/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in arch/x86/mm/.

[Note: With respect to testmmiotrace, an additional patch will be added
 separately that makes the module refuse to load if the kernel is locked
 down.]

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
cc: Ingo Molnar <mingo@kernel.org>
cc: Thomas Gleixner <tglx@linutronix.de>
cc: "H. Peter Anvin" <hpa@zytor.com>
cc: x86@kernel.org
cc: linux-kernel@vger.kernel.org
cc: nouveau@lists.freedesktop.org
---
 arch/x86/mm/testmmiotrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index 38868adf07ea..f6ae6830b341 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -9,7 +9,7 @@
 #include <linux/mmiotrace.h>
 
 static unsigned long mmio_address;
-module_param(mmio_address, ulong, 0);
+module_param_hw(mmio_address, ulong, iomem, 0);
 MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
 				"(or 8 MB if read_far is non-zero).");
 
-- 
cgit v1.2.3


From fac9a55b66c9b266171b69e73818a18225c41626 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 22 Mar 2017 11:18:54 +0100
Subject: clk: meson-gxbb: Add MALI clocks

The Mali is clocked by two identical clock paths behind a glitch free mux
to safely change frequency while running.

The two "mali_0" and "mali_1" clocks are composed of a mux, divider and gate.
Expose these two clocks trees using generic clocks.
Finally the glitch free mux is added as "mali" clock.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/1490177935-9646-3-git-send-email-narmstrong@baylibre.com
---
 drivers/clk/meson/gxbb.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 5059c7bbdbb3..d07dc2255faf 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -634,6 +634,131 @@ static struct clk_gate gxbb_sar_adc_clk = {
 	},
 };
 
+/*
+ * The MALI IP is clocked by two identical clocks (mali_0 and mali_1)
+ * muxed by a glitch-free switch.
+ */
+
+static u32 mux_table_mali_0_1[] = {0, 1, 2, 3, 4, 5, 6, 7};
+static const char *gxbb_mali_0_1_parent_names[] = {
+	"xtal", "gp0_pll", "mpll2", "mpll1", "fclk_div7",
+	"fclk_div4", "fclk_div3", "fclk_div5"
+};
+
+static struct clk_mux gxbb_mali_0_sel = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.mask = 0x7,
+	.shift = 9,
+	.table = mux_table_mali_0_1,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_0_sel",
+		.ops = &clk_mux_ops,
+		/*
+		 * bits 10:9 selects from 8 possible parents:
+		 * xtal, gp0_pll, mpll2, mpll1, fclk_div7,
+		 * fclk_div4, fclk_div3, fclk_div5
+		 */
+		.parent_names = gxbb_mali_0_1_parent_names,
+		.num_parents = 8,
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
+static struct clk_divider gxbb_mali_0_div = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.shift = 0,
+	.width = 7,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_0_div",
+		.ops = &clk_divider_ops,
+		.parent_names = (const char *[]){ "mali_0_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
+static struct clk_gate gxbb_mali_0 = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.bit_idx = 8,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_0",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "mali_0_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_mux gxbb_mali_1_sel = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.mask = 0x7,
+	.shift = 25,
+	.table = mux_table_mali_0_1,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_1_sel",
+		.ops = &clk_mux_ops,
+		/*
+		 * bits 10:9 selects from 8 possible parents:
+		 * xtal, gp0_pll, mpll2, mpll1, fclk_div7,
+		 * fclk_div4, fclk_div3, fclk_div5
+		 */
+		.parent_names = gxbb_mali_0_1_parent_names,
+		.num_parents = 8,
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
+static struct clk_divider gxbb_mali_1_div = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.shift = 16,
+	.width = 7,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_1_div",
+		.ops = &clk_divider_ops,
+		.parent_names = (const char *[]){ "mali_1_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
+static struct clk_gate gxbb_mali_1 = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.bit_idx = 24,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali_1",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "mali_1_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static u32 mux_table_mali[] = {0, 1};
+static const char *gxbb_mali_parent_names[] = {
+	"mali_0", "mali_1"
+};
+
+static struct clk_mux gxbb_mali = {
+	.reg = (void *)HHI_MALI_CLK_CNTL,
+	.mask = 1,
+	.shift = 31,
+	.table = mux_table_mali,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mali",
+		.ops = &clk_mux_ops,
+		.parent_names = gxbb_mali_parent_names,
+		.num_parents = 2,
+		.flags = CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
 /* Everything Else (EE) domain gates */
 static MESON_GATE(gxbb_ddr, HHI_GCLK_MPEG0, 0);
 static MESON_GATE(gxbb_dos, HHI_GCLK_MPEG0, 1);
@@ -827,6 +952,13 @@ static struct clk_hw_onecell_data gxbb_hw_onecell_data = {
 		[CLKID_SAR_ADC_CLK]	    = &gxbb_sar_adc_clk.hw,
 		[CLKID_SAR_ADC_SEL]	    = &gxbb_sar_adc_clk_sel.hw,
 		[CLKID_SAR_ADC_DIV]	    = &gxbb_sar_adc_clk_div.hw,
+		[CLKID_MALI_0_SEL]	    = &gxbb_mali_0_sel.hw,
+		[CLKID_MALI_0_DIV]	    = &gxbb_mali_0_div.hw,
+		[CLKID_MALI_0]		    = &gxbb_mali_0.hw,
+		[CLKID_MALI_1_SEL]	    = &gxbb_mali_1_sel.hw,
+		[CLKID_MALI_1_DIV]	    = &gxbb_mali_1_div.hw,
+		[CLKID_MALI_1]		    = &gxbb_mali_1.hw,
+		[CLKID_MALI]		    = &gxbb_mali.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -930,16 +1062,23 @@ static struct clk_gate *const gxbb_clk_gates[] = {
 	&gxbb_emmc_b,
 	&gxbb_emmc_c,
 	&gxbb_sar_adc_clk,
+	&gxbb_mali_0,
+	&gxbb_mali_1,
 };
 
 static struct clk_mux *const gxbb_clk_muxes[] = {
 	&gxbb_mpeg_clk_sel,
 	&gxbb_sar_adc_clk_sel,
+	&gxbb_mali_0_sel,
+	&gxbb_mali_1_sel,
+	&gxbb_mali,
 };
 
 static struct clk_divider *const gxbb_clk_dividers[] = {
 	&gxbb_mpeg_clk_div,
 	&gxbb_sar_adc_clk_div,
+	&gxbb_mali_0_div,
+	&gxbb_mali_1_div,
 };
 
 static int gxbb_clkc_probe(struct platform_device *pdev)
-- 
cgit v1.2.3


From 45fcbec70c084631dc430810dad14a7ece5000b8 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 22 Mar 2017 11:32:23 +0100
Subject: clk: meson: Add support for parameters for specific PLLs

In recent Amlogic GXBB, GXL and GXM SoCs, the GP0 PLL needs some specific
parameters in order to initialize and lock correctly.

This patch adds an optional PARAM table used to initialize the PLL to a
default value with it's parameters in order to achieve to desired frequency.

The GP0 PLL in GXBB, GXL/GXM also needs some tweaks in the initialization
steps, and these are exposed along the PARAM table.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/1490178747-14837-2-git-send-email-narmstrong@baylibre.com
---
 drivers/clk/meson/clk-pll.c | 53 +++++++++++++++++++++++++++++++++++++++++++--
 drivers/clk/meson/clkc.h    | 23 ++++++++++++++++++++
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/meson/clk-pll.c b/drivers/clk/meson/clk-pll.c
index 4adc1e89212c..01341553f50b 100644
--- a/drivers/clk/meson/clk-pll.c
+++ b/drivers/clk/meson/clk-pll.c
@@ -116,6 +116,30 @@ static const struct pll_rate_table *meson_clk_get_pll_settings(struct meson_clk_
 	return NULL;
 }
 
+/* Specific wait loop for GXL/GXM GP0 PLL */
+static int meson_clk_pll_wait_lock_reset(struct meson_clk_pll *pll,
+					 struct parm *p_n)
+{
+	int delay = 100;
+	u32 reg;
+
+	while (delay > 0) {
+		reg = readl(pll->base + p_n->reg_off);
+		writel(reg | MESON_PLL_RESET, pll->base + p_n->reg_off);
+		udelay(10);
+		writel(reg & ~MESON_PLL_RESET, pll->base + p_n->reg_off);
+
+		/* This delay comes from AMLogic tree clk-gp0-gxl driver */
+		mdelay(1);
+
+		reg = readl(pll->base + p_n->reg_off);
+		if (reg & MESON_PLL_LOCK)
+			return 0;
+		delay--;
+	}
+	return -ETIMEDOUT;
+}
+
 static int meson_clk_pll_wait_lock(struct meson_clk_pll *pll,
 				   struct parm *p_n)
 {
@@ -132,6 +156,15 @@ static int meson_clk_pll_wait_lock(struct meson_clk_pll *pll,
 	return -ETIMEDOUT;
 }
 
+static void meson_clk_pll_init_params(struct meson_clk_pll *pll)
+{
+	int i;
+
+	for (i = 0 ; i < pll->params.params_count ; ++i)
+		writel(pll->params.params_table[i].value,
+		       pll->base + pll->params.params_table[i].reg_off);
+}
+
 static int meson_clk_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 				  unsigned long parent_rate)
 {
@@ -151,10 +184,16 @@ static int meson_clk_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (!rate_set)
 		return -EINVAL;
 
+	/* Initialize the PLL in a clean state if specified */
+	if (pll->params.params_count)
+		meson_clk_pll_init_params(pll);
+
 	/* PLL reset */
 	p = &pll->n;
 	reg = readl(pll->base + p->reg_off);
-	writel(reg | MESON_PLL_RESET, pll->base + p->reg_off);
+	/* If no_init_reset is provided, avoid resetting at this point */
+	if (!pll->params.no_init_reset)
+		writel(reg | MESON_PLL_RESET, pll->base + p->reg_off);
 
 	reg = PARM_SET(p->width, p->shift, reg, rate_set->n);
 	writel(reg, pll->base + p->reg_off);
@@ -184,7 +223,17 @@ static int meson_clk_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 	}
 
 	p = &pll->n;
-	ret = meson_clk_pll_wait_lock(pll, p);
+	/* If clear_reset_for_lock is provided, remove the reset bit here */
+	if (pll->params.clear_reset_for_lock) {
+		reg = readl(pll->base + p->reg_off);
+		writel(reg & ~MESON_PLL_RESET, pll->base + p->reg_off);
+	}
+
+	/* If reset_lock_loop, use a special loop including resetting */
+	if (pll->params.reset_lock_loop)
+		ret = meson_clk_pll_wait_lock_reset(pll, p);
+	else
+		ret = meson_clk_pll_wait_lock(pll, p);
 	if (ret) {
 		pr_warn("%s: pll did not lock, trying to restore old rate %lu\n",
 			__func__, old_rate);
diff --git a/drivers/clk/meson/clkc.h b/drivers/clk/meson/clkc.h
index ad254675edd8..b0c9999d03de 100644
--- a/drivers/clk/meson/clkc.h
+++ b/drivers/clk/meson/clkc.h
@@ -62,6 +62,28 @@ struct pll_rate_table {
 		.frac		= (_frac),				\
 	}								\
 
+struct pll_params_table {
+	unsigned int reg_off;
+	unsigned int value;
+};
+
+#define PLL_PARAM(_reg, _val)						\
+	{								\
+		.reg_off	= (_reg),				\
+		.value		= (_val),				\
+	}
+
+struct pll_setup_params {
+	struct pll_params_table *params_table;
+	unsigned int params_count;
+	/* Workaround for GP0, do not reset before configuring */
+	bool no_init_reset;
+	/* Workaround for GP0, unreset right before checking for lock */
+	bool clear_reset_for_lock;
+	/* Workaround for GXL GP0, reset in the lock checking loop */
+	bool reset_lock_loop;
+};
+
 struct meson_clk_pll {
 	struct clk_hw hw;
 	void __iomem *base;
@@ -70,6 +92,7 @@ struct meson_clk_pll {
 	struct parm frac;
 	struct parm od;
 	struct parm od2;
+	const struct pll_setup_params params;
 	const struct pll_rate_table *rate_table;
 	unsigned int rate_count;
 	spinlock_t *lock;
-- 
cgit v1.2.3


From e194401cf4d49e7fe2f8ec994130d59e94f09137 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 22 Mar 2017 11:32:24 +0100
Subject: clk: meson-gxbb: Add GP0 PLL init parameters

Tha Amlogic GXBB SoC GP0 PLL needs some vendor provided parameters to be
initializated in the the GP0 control registers before configuring the rate
with the rate table provided parameters.

GXBB GP0 PLL tweaks are also selected to respect the vendor init procedure.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/1490178747-14837-3-git-send-email-narmstrong@baylibre.com
---
 drivers/clk/meson/gxbb.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index d07dc2255faf..db95038e50c6 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -352,6 +352,13 @@ static struct meson_clk_pll gxbb_sys_pll = {
 	},
 };
 
+struct pll_params_table gxbb_gp0_params_table[] = {
+	PLL_PARAM(HHI_GP0_PLL_CNTL, 0x6a000228),
+	PLL_PARAM(HHI_GP0_PLL_CNTL2, 0x69c80000),
+	PLL_PARAM(HHI_GP0_PLL_CNTL3, 0x0a5590c4),
+	PLL_PARAM(HHI_GP0_PLL_CNTL4, 0x0000500d),
+};
+
 static struct meson_clk_pll gxbb_gp0_pll = {
 	.m = {
 		.reg_off = HHI_GP0_PLL_CNTL,
@@ -368,6 +375,12 @@ static struct meson_clk_pll gxbb_gp0_pll = {
 		.shift   = 16,
 		.width   = 2,
 	},
+	.params = {
+		.params_table = gxbb_gp0_params_table,
+		.params_count =	ARRAY_SIZE(gxbb_gp0_params_table),
+		.no_init_reset = true,
+		.clear_reset_for_lock = true,
+	},
 	.rate_table = gp0_pll_rate_table,
 	.rate_count = ARRAY_SIZE(gp0_pll_rate_table),
 	.lock = &clk_lock,
-- 
cgit v1.2.3


From 0d48fc558d01ded71ffad3fe6cca8081847ac9a7 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 22 Mar 2017 11:32:25 +0100
Subject: clk: meson-gxbb: Add GXL/GXM GP0 Variant

The clock tree in the Amlogic GXBB and GXL/GXM SoCs is shared, but the GXL/GXM
SoCs embeds a different GP0 PLL, and needs different parameters with a vendor
provided reduced rate table.

This patch adds the GXL GP0 variant, and adds a GXL DT compatible in order
to use the GXL GP0 PLL instead of the GXBB specific one.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/1490178747-14837-4-git-send-email-narmstrong@baylibre.com
---
 drivers/clk/meson/gxbb.c | 301 ++++++++++++++++++++++++++++++++++++++++++-----
 drivers/clk/meson/gxbb.h |   2 +
 2 files changed, 275 insertions(+), 28 deletions(-)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index db95038e50c6..75197664a7ee 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -20,6 +20,7 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/init.h>
 
@@ -120,7 +121,7 @@ static const struct pll_rate_table sys_pll_rate_table[] = {
 	{ /* sentinel */ },
 };
 
-static const struct pll_rate_table gp0_pll_rate_table[] = {
+static const struct pll_rate_table gxbb_gp0_pll_rate_table[] = {
 	PLL_RATE(96000000, 32, 1, 3),
 	PLL_RATE(99000000, 33, 1, 3),
 	PLL_RATE(102000000, 34, 1, 3),
@@ -248,6 +249,35 @@ static const struct pll_rate_table gp0_pll_rate_table[] = {
 	{ /* sentinel */ },
 };
 
+static const struct pll_rate_table gxl_gp0_pll_rate_table[] = {
+	PLL_RATE(504000000, 42, 1, 1),
+	PLL_RATE(516000000, 43, 1, 1),
+	PLL_RATE(528000000, 44, 1, 1),
+	PLL_RATE(540000000, 45, 1, 1),
+	PLL_RATE(552000000, 46, 1, 1),
+	PLL_RATE(564000000, 47, 1, 1),
+	PLL_RATE(576000000, 48, 1, 1),
+	PLL_RATE(588000000, 49, 1, 1),
+	PLL_RATE(600000000, 50, 1, 1),
+	PLL_RATE(612000000, 51, 1, 1),
+	PLL_RATE(624000000, 52, 1, 1),
+	PLL_RATE(636000000, 53, 1, 1),
+	PLL_RATE(648000000, 54, 1, 1),
+	PLL_RATE(660000000, 55, 1, 1),
+	PLL_RATE(672000000, 56, 1, 1),
+	PLL_RATE(684000000, 57, 1, 1),
+	PLL_RATE(696000000, 58, 1, 1),
+	PLL_RATE(708000000, 59, 1, 1),
+	PLL_RATE(720000000, 60, 1, 1),
+	PLL_RATE(732000000, 61, 1, 1),
+	PLL_RATE(744000000, 62, 1, 1),
+	PLL_RATE(756000000, 63, 1, 1),
+	PLL_RATE(768000000, 64, 1, 1),
+	PLL_RATE(780000000, 65, 1, 1),
+	PLL_RATE(792000000, 66, 1, 1),
+	{ /* sentinel */ },
+};
+
 static const struct clk_div_table cpu_div_table[] = {
 	{ .val = 1, .div = 1 },
 	{ .val = 2, .div = 2 },
@@ -381,8 +411,51 @@ static struct meson_clk_pll gxbb_gp0_pll = {
 		.no_init_reset = true,
 		.clear_reset_for_lock = true,
 	},
-	.rate_table = gp0_pll_rate_table,
-	.rate_count = ARRAY_SIZE(gp0_pll_rate_table),
+	.rate_table = gxbb_gp0_pll_rate_table,
+	.rate_count = ARRAY_SIZE(gxbb_gp0_pll_rate_table),
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "gp0_pll",
+		.ops = &meson_clk_pll_ops,
+		.parent_names = (const char *[]){ "xtal" },
+		.num_parents = 1,
+		.flags = CLK_GET_RATE_NOCACHE,
+	},
+};
+
+struct pll_params_table gxl_gp0_params_table[] = {
+	PLL_PARAM(HHI_GP0_PLL_CNTL, 0x40010250),
+	PLL_PARAM(HHI_GP0_PLL_CNTL1, 0xc084a000),
+	PLL_PARAM(HHI_GP0_PLL_CNTL2, 0xb75020be),
+	PLL_PARAM(HHI_GP0_PLL_CNTL3, 0x0a59a288),
+	PLL_PARAM(HHI_GP0_PLL_CNTL4, 0xc000004d),
+	PLL_PARAM(HHI_GP0_PLL_CNTL5, 0x00078000),
+};
+
+static struct meson_clk_pll gxl_gp0_pll = {
+	.m = {
+		.reg_off = HHI_GP0_PLL_CNTL,
+		.shift   = 0,
+		.width   = 9,
+	},
+	.n = {
+		.reg_off = HHI_GP0_PLL_CNTL,
+		.shift   = 9,
+		.width   = 5,
+	},
+	.od = {
+		.reg_off = HHI_GP0_PLL_CNTL,
+		.shift   = 16,
+		.width   = 2,
+	},
+	.params = {
+		.params_table = gxl_gp0_params_table,
+		.params_count =	ARRAY_SIZE(gxl_gp0_params_table),
+		.no_init_reset = true,
+		.reset_lock_loop = true,
+	},
+	.rate_table = gxl_gp0_pll_rate_table,
+	.rate_count = ARRAY_SIZE(gxl_gp0_pll_rate_table),
 	.lock = &clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "gp0_pll",
@@ -976,6 +1049,119 @@ static struct clk_hw_onecell_data gxbb_hw_onecell_data = {
 	.num = NR_CLKS,
 };
 
+static struct clk_hw_onecell_data gxl_hw_onecell_data = {
+	.hws = {
+		[CLKID_SYS_PLL]		    = &gxbb_sys_pll.hw,
+		[CLKID_CPUCLK]		    = &gxbb_cpu_clk.hw,
+		[CLKID_HDMI_PLL]	    = &gxbb_hdmi_pll.hw,
+		[CLKID_FIXED_PLL]	    = &gxbb_fixed_pll.hw,
+		[CLKID_FCLK_DIV2]	    = &gxbb_fclk_div2.hw,
+		[CLKID_FCLK_DIV3]	    = &gxbb_fclk_div3.hw,
+		[CLKID_FCLK_DIV4]	    = &gxbb_fclk_div4.hw,
+		[CLKID_FCLK_DIV5]	    = &gxbb_fclk_div5.hw,
+		[CLKID_FCLK_DIV7]	    = &gxbb_fclk_div7.hw,
+		[CLKID_GP0_PLL]		    = &gxl_gp0_pll.hw,
+		[CLKID_MPEG_SEL]	    = &gxbb_mpeg_clk_sel.hw,
+		[CLKID_MPEG_DIV]	    = &gxbb_mpeg_clk_div.hw,
+		[CLKID_CLK81]		    = &gxbb_clk81.hw,
+		[CLKID_MPLL0]		    = &gxbb_mpll0.hw,
+		[CLKID_MPLL1]		    = &gxbb_mpll1.hw,
+		[CLKID_MPLL2]		    = &gxbb_mpll2.hw,
+		[CLKID_DDR]		    = &gxbb_ddr.hw,
+		[CLKID_DOS]		    = &gxbb_dos.hw,
+		[CLKID_ISA]		    = &gxbb_isa.hw,
+		[CLKID_PL301]		    = &gxbb_pl301.hw,
+		[CLKID_PERIPHS]		    = &gxbb_periphs.hw,
+		[CLKID_SPICC]		    = &gxbb_spicc.hw,
+		[CLKID_I2C]		    = &gxbb_i2c.hw,
+		[CLKID_SAR_ADC]		    = &gxbb_sar_adc.hw,
+		[CLKID_SMART_CARD]	    = &gxbb_smart_card.hw,
+		[CLKID_RNG0]		    = &gxbb_rng0.hw,
+		[CLKID_UART0]		    = &gxbb_uart0.hw,
+		[CLKID_SDHC]		    = &gxbb_sdhc.hw,
+		[CLKID_STREAM]		    = &gxbb_stream.hw,
+		[CLKID_ASYNC_FIFO]	    = &gxbb_async_fifo.hw,
+		[CLKID_SDIO]		    = &gxbb_sdio.hw,
+		[CLKID_ABUF]		    = &gxbb_abuf.hw,
+		[CLKID_HIU_IFACE]	    = &gxbb_hiu_iface.hw,
+		[CLKID_ASSIST_MISC]	    = &gxbb_assist_misc.hw,
+		[CLKID_SPI]		    = &gxbb_spi.hw,
+		[CLKID_I2S_SPDIF]	    = &gxbb_i2s_spdif.hw,
+		[CLKID_ETH]		    = &gxbb_eth.hw,
+		[CLKID_DEMUX]		    = &gxbb_demux.hw,
+		[CLKID_AIU_GLUE]	    = &gxbb_aiu_glue.hw,
+		[CLKID_IEC958]		    = &gxbb_iec958.hw,
+		[CLKID_I2S_OUT]		    = &gxbb_i2s_out.hw,
+		[CLKID_AMCLK]		    = &gxbb_amclk.hw,
+		[CLKID_AIFIFO2]		    = &gxbb_aififo2.hw,
+		[CLKID_MIXER]		    = &gxbb_mixer.hw,
+		[CLKID_MIXER_IFACE]	    = &gxbb_mixer_iface.hw,
+		[CLKID_ADC]		    = &gxbb_adc.hw,
+		[CLKID_BLKMV]		    = &gxbb_blkmv.hw,
+		[CLKID_AIU]		    = &gxbb_aiu.hw,
+		[CLKID_UART1]		    = &gxbb_uart1.hw,
+		[CLKID_G2D]		    = &gxbb_g2d.hw,
+		[CLKID_USB0]		    = &gxbb_usb0.hw,
+		[CLKID_USB1]		    = &gxbb_usb1.hw,
+		[CLKID_RESET]		    = &gxbb_reset.hw,
+		[CLKID_NAND]		    = &gxbb_nand.hw,
+		[CLKID_DOS_PARSER]	    = &gxbb_dos_parser.hw,
+		[CLKID_USB]		    = &gxbb_usb.hw,
+		[CLKID_VDIN1]		    = &gxbb_vdin1.hw,
+		[CLKID_AHB_ARB0]	    = &gxbb_ahb_arb0.hw,
+		[CLKID_EFUSE]		    = &gxbb_efuse.hw,
+		[CLKID_BOOT_ROM]	    = &gxbb_boot_rom.hw,
+		[CLKID_AHB_DATA_BUS]	    = &gxbb_ahb_data_bus.hw,
+		[CLKID_AHB_CTRL_BUS]	    = &gxbb_ahb_ctrl_bus.hw,
+		[CLKID_HDMI_INTR_SYNC]	    = &gxbb_hdmi_intr_sync.hw,
+		[CLKID_HDMI_PCLK]	    = &gxbb_hdmi_pclk.hw,
+		[CLKID_USB1_DDR_BRIDGE]	    = &gxbb_usb1_ddr_bridge.hw,
+		[CLKID_USB0_DDR_BRIDGE]	    = &gxbb_usb0_ddr_bridge.hw,
+		[CLKID_MMC_PCLK]	    = &gxbb_mmc_pclk.hw,
+		[CLKID_DVIN]		    = &gxbb_dvin.hw,
+		[CLKID_UART2]		    = &gxbb_uart2.hw,
+		[CLKID_SANA]		    = &gxbb_sana.hw,
+		[CLKID_VPU_INTR]	    = &gxbb_vpu_intr.hw,
+		[CLKID_SEC_AHB_AHB3_BRIDGE] = &gxbb_sec_ahb_ahb3_bridge.hw,
+		[CLKID_CLK81_A53]	    = &gxbb_clk81_a53.hw,
+		[CLKID_VCLK2_VENCI0]	    = &gxbb_vclk2_venci0.hw,
+		[CLKID_VCLK2_VENCI1]	    = &gxbb_vclk2_venci1.hw,
+		[CLKID_VCLK2_VENCP0]	    = &gxbb_vclk2_vencp0.hw,
+		[CLKID_VCLK2_VENCP1]	    = &gxbb_vclk2_vencp1.hw,
+		[CLKID_GCLK_VENCI_INT0]	    = &gxbb_gclk_venci_int0.hw,
+		[CLKID_GCLK_VENCI_INT]	    = &gxbb_gclk_vencp_int.hw,
+		[CLKID_DAC_CLK]		    = &gxbb_dac_clk.hw,
+		[CLKID_AOCLK_GATE]	    = &gxbb_aoclk_gate.hw,
+		[CLKID_IEC958_GATE]	    = &gxbb_iec958_gate.hw,
+		[CLKID_ENC480P]		    = &gxbb_enc480p.hw,
+		[CLKID_RNG1]		    = &gxbb_rng1.hw,
+		[CLKID_GCLK_VENCI_INT1]	    = &gxbb_gclk_venci_int1.hw,
+		[CLKID_VCLK2_VENCLMCC]	    = &gxbb_vclk2_venclmcc.hw,
+		[CLKID_VCLK2_VENCL]	    = &gxbb_vclk2_vencl.hw,
+		[CLKID_VCLK_OTHER]	    = &gxbb_vclk_other.hw,
+		[CLKID_EDP]		    = &gxbb_edp.hw,
+		[CLKID_AO_MEDIA_CPU]	    = &gxbb_ao_media_cpu.hw,
+		[CLKID_AO_AHB_SRAM]	    = &gxbb_ao_ahb_sram.hw,
+		[CLKID_AO_AHB_BUS]	    = &gxbb_ao_ahb_bus.hw,
+		[CLKID_AO_IFACE]	    = &gxbb_ao_iface.hw,
+		[CLKID_AO_I2C]		    = &gxbb_ao_i2c.hw,
+		[CLKID_SD_EMMC_A]	    = &gxbb_emmc_a.hw,
+		[CLKID_SD_EMMC_B]	    = &gxbb_emmc_b.hw,
+		[CLKID_SD_EMMC_C]	    = &gxbb_emmc_c.hw,
+		[CLKID_SAR_ADC_CLK]	    = &gxbb_sar_adc_clk.hw,
+		[CLKID_SAR_ADC_SEL]	    = &gxbb_sar_adc_clk_sel.hw,
+		[CLKID_SAR_ADC_DIV]	    = &gxbb_sar_adc_clk_div.hw,
+		[CLKID_MALI_0_SEL]	    = &gxbb_mali_0_sel.hw,
+		[CLKID_MALI_0_DIV]	    = &gxbb_mali_0_div.hw,
+		[CLKID_MALI_0]		    = &gxbb_mali_0.hw,
+		[CLKID_MALI_1_SEL]	    = &gxbb_mali_1_sel.hw,
+		[CLKID_MALI_1_DIV]	    = &gxbb_mali_1_div.hw,
+		[CLKID_MALI_1]		    = &gxbb_mali_1.hw,
+		[CLKID_MALI]		    = &gxbb_mali.hw,
+	},
+	.num = NR_CLKS,
+};
+
 /* Convenience tables to populate base addresses in .probe */
 
 static struct meson_clk_pll *const gxbb_clk_plls[] = {
@@ -985,6 +1171,13 @@ static struct meson_clk_pll *const gxbb_clk_plls[] = {
 	&gxbb_gp0_pll,
 };
 
+static struct meson_clk_pll *const gxl_clk_plls[] = {
+	&gxbb_fixed_pll,
+	&gxbb_hdmi_pll,
+	&gxbb_sys_pll,
+	&gxl_gp0_pll,
+};
+
 static struct meson_clk_mpll *const gxbb_clk_mplls[] = {
 	&gxbb_mpll0,
 	&gxbb_mpll1,
@@ -1094,14 +1287,70 @@ static struct clk_divider *const gxbb_clk_dividers[] = {
 	&gxbb_mali_1_div,
 };
 
+struct clkc_data {
+	struct clk_gate *const *clk_gates;
+	unsigned int clk_gates_count;
+	struct meson_clk_mpll *const *clk_mplls;
+	unsigned int clk_mplls_count;
+	struct meson_clk_pll *const *clk_plls;
+	unsigned int clk_plls_count;
+	struct clk_mux *const *clk_muxes;
+	unsigned int clk_muxes_count;
+	struct clk_divider *const *clk_dividers;
+	unsigned int clk_dividers_count;
+	struct meson_clk_cpu *cpu_clk;
+	struct clk_hw_onecell_data *hw_onecell_data;
+};
+
+static const struct clkc_data gxbb_clkc_data = {
+	.clk_gates = gxbb_clk_gates,
+	.clk_gates_count = ARRAY_SIZE(gxbb_clk_gates),
+	.clk_mplls = gxbb_clk_mplls,
+	.clk_mplls_count = ARRAY_SIZE(gxbb_clk_mplls),
+	.clk_plls = gxbb_clk_plls,
+	.clk_plls_count = ARRAY_SIZE(gxbb_clk_plls),
+	.clk_muxes = gxbb_clk_muxes,
+	.clk_muxes_count = ARRAY_SIZE(gxbb_clk_muxes),
+	.clk_dividers = gxbb_clk_dividers,
+	.clk_dividers_count = ARRAY_SIZE(gxbb_clk_dividers),
+	.cpu_clk = &gxbb_cpu_clk,
+	.hw_onecell_data = &gxbb_hw_onecell_data,
+};
+
+static const struct clkc_data gxl_clkc_data = {
+	.clk_gates = gxbb_clk_gates,
+	.clk_gates_count = ARRAY_SIZE(gxbb_clk_gates),
+	.clk_mplls = gxbb_clk_mplls,
+	.clk_mplls_count = ARRAY_SIZE(gxbb_clk_mplls),
+	.clk_plls = gxl_clk_plls,
+	.clk_plls_count = ARRAY_SIZE(gxl_clk_plls),
+	.clk_muxes = gxbb_clk_muxes,
+	.clk_muxes_count = ARRAY_SIZE(gxbb_clk_muxes),
+	.clk_dividers = gxbb_clk_dividers,
+	.clk_dividers_count = ARRAY_SIZE(gxbb_clk_dividers),
+	.cpu_clk = &gxbb_cpu_clk,
+	.hw_onecell_data = &gxl_hw_onecell_data,
+};
+
+static const struct of_device_id clkc_match_table[] = {
+	{ .compatible = "amlogic,gxbb-clkc", .data = &gxbb_clkc_data },
+	{ .compatible = "amlogic,gxl-clkc", .data = &gxl_clkc_data },
+	{},
+};
+
 static int gxbb_clkc_probe(struct platform_device *pdev)
 {
+	const struct clkc_data *clkc_data;
 	void __iomem *clk_base;
 	int ret, clkid, i;
 	struct clk_hw *parent_hw;
 	struct clk *parent_clk;
 	struct device *dev = &pdev->dev;
 
+	clkc_data = of_device_get_match_data(&pdev->dev);
+	if (!clkc_data)
+		return -EINVAL;
+
 	/*  Generic clocks and PLLs */
 	clk_base = of_iomap(dev->of_node, 0);
 	if (!clk_base) {
@@ -1110,36 +1359,37 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 	}
 
 	/* Populate base address for PLLs */
-	for (i = 0; i < ARRAY_SIZE(gxbb_clk_plls); i++)
-		gxbb_clk_plls[i]->base = clk_base;
+	for (i = 0; i < clkc_data->clk_plls_count; i++)
+		clkc_data->clk_plls[i]->base = clk_base;
 
 	/* Populate base address for MPLLs */
-	for (i = 0; i < ARRAY_SIZE(gxbb_clk_mplls); i++)
-		gxbb_clk_mplls[i]->base = clk_base;
+	for (i = 0; i < clkc_data->clk_mplls_count; i++)
+		clkc_data->clk_mplls[i]->base = clk_base;
 
 	/* Populate the base address for CPU clk */
-	gxbb_cpu_clk.base = clk_base;
+	clkc_data->cpu_clk->base = clk_base;
 
 	/* Populate base address for gates */
-	for (i = 0; i < ARRAY_SIZE(gxbb_clk_gates); i++)
-		gxbb_clk_gates[i]->reg = clk_base +
-			(u64)gxbb_clk_gates[i]->reg;
+	for (i = 0; i < clkc_data->clk_gates_count; i++)
+		clkc_data->clk_gates[i]->reg = clk_base +
+			(u64)clkc_data->clk_gates[i]->reg;
 
 	/* Populate base address for muxes */
-	for (i = 0; i < ARRAY_SIZE(gxbb_clk_muxes); i++)
-		gxbb_clk_muxes[i]->reg = clk_base +
-			(u64)gxbb_clk_muxes[i]->reg;
+	for (i = 0; i < clkc_data->clk_muxes_count; i++)
+		clkc_data->clk_muxes[i]->reg = clk_base +
+			(u64)clkc_data->clk_muxes[i]->reg;
 
 	/* Populate base address for dividers */
-	for (i = 0; i < ARRAY_SIZE(gxbb_clk_dividers); i++)
-		gxbb_clk_dividers[i]->reg = clk_base +
-			(u64)gxbb_clk_dividers[i]->reg;
+	for (i = 0; i < clkc_data->clk_dividers_count; i++)
+		clkc_data->clk_dividers[i]->reg = clk_base +
+			(u64)clkc_data->clk_dividers[i]->reg;
 
 	/*
 	 * register all clks
 	 */
-	for (clkid = 0; clkid < NR_CLKS; clkid++) {
-		ret = devm_clk_hw_register(dev, gxbb_hw_onecell_data.hws[clkid]);
+	for (clkid = 0; clkid < clkc_data->hw_onecell_data->num; clkid++) {
+		ret = devm_clk_hw_register(dev,
+					clkc_data->hw_onecell_data->hws[clkid]);
 		if (ret)
 			goto iounmap;
 	}
@@ -1158,9 +1408,9 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 	 * a new clk_hw, and this hack will no longer work. Releasing the ccr
 	 * feature before that time solves the problem :-)
 	 */
-	parent_hw = clk_hw_get_parent(&gxbb_cpu_clk.hw);
+	parent_hw = clk_hw_get_parent(&clkc_data->cpu_clk->hw);
 	parent_clk = parent_hw->clk;
-	ret = clk_notifier_register(parent_clk, &gxbb_cpu_clk.clk_nb);
+	ret = clk_notifier_register(parent_clk, &clkc_data->cpu_clk->clk_nb);
 	if (ret) {
 		pr_err("%s: failed to register clock notifier for cpu_clk\n",
 				__func__);
@@ -1168,23 +1418,18 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 	}
 
 	return of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
-			&gxbb_hw_onecell_data);
+			clkc_data->hw_onecell_data);
 
 iounmap:
 	iounmap(clk_base);
 	return ret;
 }
 
-static const struct of_device_id gxbb_clkc_match_table[] = {
-	{ .compatible = "amlogic,gxbb-clkc" },
-	{ }
-};
-
 static struct platform_driver gxbb_driver = {
 	.probe		= gxbb_clkc_probe,
 	.driver		= {
 		.name	= "gxbb-clkc",
-		.of_match_table = gxbb_clkc_match_table,
+		.of_match_table = clkc_match_table,
 	},
 };
 
diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index 8ee2022ce5d5..7f99bf6ca10c 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -71,6 +71,8 @@
 #define HHI_GP0_PLL_CNTL2		0x44 /* 0x11 offset in data sheet */
 #define HHI_GP0_PLL_CNTL3		0x48 /* 0x12 offset in data sheet */
 #define HHI_GP0_PLL_CNTL4		0x4c /* 0x13 offset in data sheet */
+#define	HHI_GP0_PLL_CNTL5		0x50 /* 0x14 offset in data sheet */
+#define	HHI_GP0_PLL_CNTL1		0x58 /* 0x16 offset in data sheet */
 
 #define HHI_XTAL_DIVN_CNTL		0xbc /* 0x2f offset in data sheet */
 #define HHI_TIMER90K			0xec /* 0x3b offset in data sheet */
-- 
cgit v1.2.3


From cf719012b23278c65d0bca4975a7ea46e5bb75be Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Wed, 5 Apr 2017 14:37:42 +0800
Subject: clk: sunxi-ng: mult: Support PLL lock detection

Some PLL clocks are N (multiplier) type clocks, or can be simplified
as such. An example of the former is the DDR1 PLL clock on the A33.
An example of the latter is the CPU PLL clock on the A80, in which
the P divider is only used for low frequencies that are of little
use. Both clocks support PLL lock detection.

The mult clock macro implies support for this, but that is not true.
The field is simply discarded. This patch adds proper support for it.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu_mult.c | 2 ++
 drivers/clk/sunxi-ng/ccu_mult.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/clk/sunxi-ng/ccu_mult.c b/drivers/clk/sunxi-ng/ccu_mult.c
index 8724c01171b1..671141359895 100644
--- a/drivers/clk/sunxi-ng/ccu_mult.c
+++ b/drivers/clk/sunxi-ng/ccu_mult.c
@@ -137,6 +137,8 @@ static int ccu_mult_set_rate(struct clk_hw *hw, unsigned long rate,
 
 	spin_unlock_irqrestore(cm->common.lock, flags);
 
+	ccu_helper_wait_for_lock(&cm->common, cm->lock);
+
 	return 0;
 }
 
diff --git a/drivers/clk/sunxi-ng/ccu_mult.h b/drivers/clk/sunxi-ng/ccu_mult.h
index 524acddfcb2e..f9c37b987d72 100644
--- a/drivers/clk/sunxi-ng/ccu_mult.h
+++ b/drivers/clk/sunxi-ng/ccu_mult.h
@@ -33,6 +33,7 @@ struct ccu_mult_internal {
 
 struct ccu_mult {
 	u32			enable;
+	u32			lock;
 
 	struct ccu_frac_internal	frac;
 	struct ccu_mult_internal	mult;
@@ -45,6 +46,7 @@ struct ccu_mult {
 				   _flags)				\
 	struct ccu_mult _struct = {					\
 		.enable	= _gate,					\
+		.lock	= _lock,					\
 		.mult	= _SUNXI_CCU_MULT(_mshift, _mwidth),		\
 		.common	= {						\
 			.reg		= _reg,				\
-- 
cgit v1.2.3


From 25eb035c3f22bde1eff43fe5c59b207d54a3d520 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Wed, 5 Apr 2017 14:37:43 +0800
Subject: clk: sunxi-ng: a80: Remodel CPU cluster PLLs as N-type multiplier
 clocks

The CPU cluster PLLs on the A80 are NP clocks that are atypical in two ways:

  - The P factor is 1 bit wide, and translates to a /1 or /4 divider.

  - The P factor should only be used for output frequencies lower than
    288 MHz. The N factor has a lower limit of 12, which likely contributed
    to this extra divider.

According to the user manual, the clocks can only go as low as 200 MHz.
The vendor BSP kernel does not even define operating points below 360
MHz for these clocks. The lower end for cpufreq in the vendor kernel is
even higher. The mainline Linux kernel doesn't support cpufreq for the
A80 at the moment. This means the lower frequencies are untested, and
will likely remain unused.

The new sunxi-ng style clocks don't support the quirks listed above.
Instead of trying to work the quirks in for something of little usage,
we re-model the clocks into N-type multipler clocks, with P fixed at 1.
At probe time we check if P is set to 4, and fix it up if needed. This
is highly unlikely though.

Fixes: b8eb71dcdd08 ("clk: sunxi-ng: Add A80 CCU")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun9i-a80.c | 70 ++++++++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 18 deletions(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
index e13e313ce4f5..51f6d495de5b 100644
--- a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
+++ b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
@@ -29,41 +29,41 @@
 
 #define CCU_SUN9I_LOCK_REG	0x09c
 
-static struct clk_div_table pll_cpux_p_div_table[] = {
-	{ .val = 0, .div = 1 },
-	{ .val = 1, .div = 4 },
-	{ /* Sentinel */ },
-};
-
 /*
- * The CPU PLLs are actually NP clocks, but P is /1 or /4, so here we
- * use the NM clocks with a divider table for M.
+ * The CPU PLLs are actually NP clocks, with P being /1 or /4. However
+ * P should only be used for output frequencies lower than 228 MHz.
+ * Neither mainline Linux, U-boot, nor the vendor BSPs use these.
+ *
+ * For now we can just model it as a multiplier clock, and force P to /1.
  */
-static struct ccu_nm pll_c0cpux_clk = {
+#define SUN9I_A80_PLL_C0CPUX_REG	0x000
+#define SUN9I_A80_PLL_C1CPUX_REG	0x004
+
+static struct ccu_mult pll_c0cpux_clk = {
 	.enable		= BIT(31),
 	.lock		= BIT(0),
-	.n		= _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
-	.m		= _SUNXI_CCU_DIV_TABLE(16, 1, pll_cpux_p_div_table),
+	.mult		= _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
 	.common		= {
-		.reg		= 0x000,
+		.reg		= SUN9I_A80_PLL_C0CPUX_REG,
 		.lock_reg	= CCU_SUN9I_LOCK_REG,
 		.features	= CCU_FEATURE_LOCK_REG,
 		.hw.init	= CLK_HW_INIT("pll-c0cpux", "osc24M",
-					      &ccu_nm_ops, CLK_SET_RATE_UNGATE),
+					      &ccu_mult_ops,
+					      CLK_SET_RATE_UNGATE),
 	},
 };
 
-static struct ccu_nm pll_c1cpux_clk = {
+static struct ccu_mult pll_c1cpux_clk = {
 	.enable		= BIT(31),
 	.lock		= BIT(1),
-	.n		= _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
-	.m		= _SUNXI_CCU_DIV_TABLE(16, 1, pll_cpux_p_div_table),
+	.mult		= _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
 	.common		= {
-		.reg		= 0x004,
+		.reg		= SUN9I_A80_PLL_C1CPUX_REG,
 		.lock_reg	= CCU_SUN9I_LOCK_REG,
 		.features	= CCU_FEATURE_LOCK_REG,
 		.hw.init	= CLK_HW_INIT("pll-c1cpux", "osc24M",
-					      &ccu_nm_ops, CLK_SET_RATE_UNGATE),
+					      &ccu_mult_ops,
+					      CLK_SET_RATE_UNGATE),
 	},
 };
 
@@ -1189,6 +1189,36 @@ static const struct sunxi_ccu_desc sun9i_a80_ccu_desc = {
 	.num_resets	= ARRAY_SIZE(sun9i_a80_ccu_resets),
 };
 
+#define SUN9I_A80_PLL_P_SHIFT	16
+#define SUN9I_A80_PLL_N_SHIFT	8
+#define SUN9I_A80_PLL_N_WIDTH	8
+
+static void sun9i_a80_cpu_pll_fixup(void __iomem *reg)
+{
+	u32 val = readl(reg);
+
+	/* bail out if P divider is not used */
+	if (!(val & BIT(SUN9I_A80_PLL_P_SHIFT)))
+		return;
+
+	/*
+	 * If P is used, output should be less than 288 MHz. When we
+	 * set P to 1, we should also decrease the multiplier so the
+	 * output doesn't go out of range, but not too much such that
+	 * the multiplier stays above 12, the minimal operation value.
+	 *
+	 * To keep it simple, set the multiplier to 17, the reset value.
+	 */
+	val &= ~GENMASK(SUN9I_A80_PLL_N_SHIFT + SUN9I_A80_PLL_N_WIDTH - 1,
+			SUN9I_A80_PLL_N_SHIFT);
+	val |= 17 << SUN9I_A80_PLL_N_SHIFT;
+
+	/* And clear P */
+	val &= ~BIT(SUN9I_A80_PLL_P_SHIFT);
+
+	writel(val, reg);
+}
+
 static int sun9i_a80_ccu_probe(struct platform_device *pdev)
 {
 	struct resource *res;
@@ -1205,6 +1235,10 @@ static int sun9i_a80_ccu_probe(struct platform_device *pdev)
 	val &= (BIT(16) & BIT(18));
 	writel(val, reg + SUN9I_A80_PLL_AUDIO_REG);
 
+	/* Enforce P = 1 for both CPU cluster PLLs */
+	sun9i_a80_cpu_pll_fixup(reg + SUN9I_A80_PLL_C0CPUX_REG);
+	sun9i_a80_cpu_pll_fixup(reg + SUN9I_A80_PLL_C1CPUX_REG);
+
 	return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun9i_a80_ccu_desc);
 }
 
-- 
cgit v1.2.3


From 68f37d862403e8f95337b2eca90af15d0b8cd5d7 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Wed, 5 Apr 2017 14:37:44 +0800
Subject: clk: sunxi-ng: a33: Add offset and minimum value for DDR1 PLL N
 factor

The DDR1 PLL on the A33 is an oddball amongst the A33 CCU clocks.
It is a clock multiplier, with the effective multiplier in the
range of 12 ~ 255 and no offset between the multiplier value and
the value programmed into the register.

Implement the zero offset and minimum value of 12 for this clock.

Fixes: d05c748bd730 ("clk: sunxi-ng: Add A33 CCU support")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun8i-a33.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
index a7b3c08ed0e2..56370c2c7f98 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
@@ -159,13 +159,17 @@ static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_de_clk, "pll-de",
 					BIT(28),	/* lock */
 					CLK_SET_RATE_UNGATE);
 
-/* TODO: Fix N */
-static SUNXI_CCU_N_WITH_GATE_LOCK(pll_ddr1_clk, "pll-ddr1",
-				  "osc24M", 0x04c,
-				  8, 6,			/* N */
-				  BIT(31),		/* gate */
-				  BIT(28),		/* lock */
-				  CLK_SET_RATE_UNGATE);
+static struct ccu_mult pll_ddr1_clk = {
+	.enable	= BIT(31),
+	.lock	= BIT(28),
+	.mult	= _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 6, 0, 12, 0),
+	.common	= {
+		.reg		= 0x04c,
+		.hw.init	= CLK_HW_INIT("pll-ddr1", "osc24M",
+					      &ccu_mult_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
 
 static const char * const cpux_parents[] = { "osc32k", "osc24M",
 					     "pll-cpux" , "pll-cpux" };
-- 
cgit v1.2.3


From 049520dcb3966ee0471f2757ca3f5f5e317f33cd Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 4 Apr 2017 07:53:38 +0100
Subject: metag/usercopy: Reformat rapf loop inline asm

Reformat rapf loop inline assembly to make it more readable and easier
to modify in future.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
---
 arch/metag/lib/usercopy.c | 214 ++++++++++++++++++----------------------------
 1 file changed, 85 insertions(+), 129 deletions(-)

diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index 2792fc621088..7abed2f45c83 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -246,65 +246,49 @@
 #define __asm_copy_user_64bit_rapf_loop(				\
 		to, from, ret, n, id, FIXUP)				\
 	asm volatile (							\
-		".balign 8\n"						\
-		"MOV	RAPF, %1\n"					\
-		"MSETL	[A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n"	\
-		"MOV	D0Ar6, #0\n"					\
-		"LSR	D1Ar5, %3, #6\n"				\
-		"SUB	TXRPT, D1Ar5, #2\n"				\
-		"MOV	RAPF, %1\n"					\
+			".balign 8\n"					\
+		"	MOV	RAPF, %1\n"				\
+		"	MSETL	[A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n" \
+		"	MOV	D0Ar6, #0\n"				\
+		"	LSR	D1Ar5, %3, #6\n"			\
+		"	SUB	TXRPT, D1Ar5, #2\n"			\
+		"	MOV	RAPF, %1\n"				\
 		"$Lloop"id":\n"						\
-		"ADD	RAPF, %1, #64\n"				\
-		"21:\n"							\
-		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"22:\n"							\
-		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"23:\n"							\
-		"SUB	%3, %3, #32\n"					\
-		"24:\n"							\
-		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"25:\n"							\
-		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"26:\n"							\
-		"SUB	%3, %3, #32\n"					\
-		"DCACHE	[%1+#-64], D0Ar6\n"				\
-		"BR	$Lloop"id"\n"					\
+		"	ADD	RAPF, %1, #64\n"			\
+		"21:	MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"22:	MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"23:	SUB	%3, %3, #32\n"				\
+		"24:	MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"25:	MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"26:	SUB	%3, %3, #32\n"				\
+		"	DCACHE	[%1+#-64], D0Ar6\n"			\
+		"	BR	$Lloop"id"\n"				\
 									\
-		"MOV	RAPF, %1\n"					\
-		"27:\n"							\
-		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"28:\n"							\
-		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"29:\n"							\
-		"SUB	%3, %3, #32\n"					\
-		"30:\n"							\
-		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"31:\n"							\
-		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"32:\n"							\
-		"SUB	%0, %0, #8\n"					\
-		"33:\n"							\
-		"SETL	[%0++], D0.7, D1.7\n"				\
-		"SUB	%3, %3, #32\n"					\
-		"1:"							\
-		"DCACHE	[%1+#-64], D0Ar6\n"				\
-		"GETL    D0Ar6, D1Ar5, [A0StP+#-40]\n"			\
-		"GETL    D0FrT, D1RtP, [A0StP+#-32]\n"			\
-		"GETL    D0.5, D1.5, [A0StP+#-24]\n"			\
-		"GETL    D0.6, D1.6, [A0StP+#-16]\n"			\
-		"GETL    D0.7, D1.7, [A0StP+#-8]\n"			\
-		"SUB A0StP, A0StP, #40\n"				\
+		"	MOV	RAPF, %1\n"				\
+		"27:	MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"28:	MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"29:	SUB	%3, %3, #32\n"				\
+		"30:	MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"31:	MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"32:	SUB	%0, %0, #8\n"				\
+		"33:	SETL	[%0++], D0.7, D1.7\n"			\
+		"	SUB	%3, %3, #32\n"				\
+		"1:	DCACHE	[%1+#-64], D0Ar6\n"			\
+		"	GETL	D0Ar6, D1Ar5, [A0StP+#-40]\n"		\
+		"	GETL	D0FrT, D1RtP, [A0StP+#-32]\n"		\
+		"	GETL	D0.5, D1.5, [A0StP+#-24]\n"		\
+		"	GETL	D0.6, D1.6, [A0StP+#-16]\n"		\
+		"	GETL	D0.7, D1.7, [A0StP+#-8]\n"		\
+		"	SUB	A0StP, A0StP, #40\n"			\
 		"	.section .fixup,\"ax\"\n"			\
-		"4:\n"							\
-		"	ADD	%0, %0, #8\n"				\
-		"3:\n"							\
-		"	MOV	D0Ar2, TXSTATUS\n"			\
+		"4:	ADD	%0, %0, #8\n"				\
+		"3:	MOV	D0Ar2, TXSTATUS\n"			\
 		"	MOV	D1Ar1, TXSTATUS\n"			\
 		"	AND	D1Ar1, D1Ar1, #0xFFFFF8FF\n"		\
 		"	MOV	TXSTATUS, D1Ar1\n"			\
 			FIXUP						\
-		"	MOVT    D0Ar2,#HI(1b)\n"			\
-		"	JUMP    D0Ar2,#LO(1b)\n"			\
+		"	MOVT	D0Ar2, #HI(1b)\n"			\
+		"	JUMP	D0Ar2, #LO(1b)\n"			\
 		"	.previous\n"					\
 		"	.section __ex_table,\"a\"\n"			\
 		"	.long 21b,3b\n"					\
@@ -397,89 +381,61 @@
 #define __asm_copy_user_32bit_rapf_loop(				\
 			to,	from, ret, n, id, FIXUP)		\
 	asm volatile (							\
-		".balign 8\n"						\
-		"MOV	RAPF, %1\n"					\
-		"MSETL	[A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n"	\
-		"MOV	D0Ar6, #0\n"					\
-		"LSR	D1Ar5, %3, #6\n"				\
-		"SUB	TXRPT, D1Ar5, #2\n"				\
-		"MOV	RAPF, %1\n"					\
-	"$Lloop"id":\n"							\
-		"ADD	RAPF, %1, #64\n"				\
-		"21:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"22:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"23:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"24:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"25:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"26:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"27:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"28:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"29:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"30:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"31:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"32:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"DCACHE	[%1+#-64], D0Ar6\n"				\
-		"BR	$Lloop"id"\n"					\
+			".balign 8\n"					\
+		"	MOV	RAPF, %1\n"				\
+		"	MSETL	[A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n" \
+		"	MOV	D0Ar6, #0\n"				\
+		"	LSR	D1Ar5, %3, #6\n"			\
+		"	SUB	TXRPT, D1Ar5, #2\n"			\
+		"	MOV	RAPF, %1\n"				\
+		"$Lloop"id":\n"						\
+		"	ADD	RAPF, %1, #64\n"			\
+		"21:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"22:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"23:	SUB	%3, %3, #16\n"				\
+		"24:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"25:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"26:	SUB	%3, %3, #16\n"				\
+		"27:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"28:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"29:	SUB	%3, %3, #16\n"				\
+		"30:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"31:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"32:	SUB	%3, %3, #16\n"				\
+		"	DCACHE	[%1+#-64], D0Ar6\n"			\
+		"	BR	$Lloop"id"\n"				\
 									\
-		"MOV	RAPF, %1\n"					\
-		"33:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"34:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"35:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"36:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"37:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"38:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"39:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"40:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"41:\n"							\
-		"SUB	%3, %3, #16\n"					\
-		"42:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"43:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"44:\n"							\
-		"SUB	%0, %0, #4\n"					\
-		"45:\n"							\
-		"SETD	[%0++], D0.7\n"					\
-		"SUB	%3, %3, #16\n"					\
-		"1:"							\
-		"DCACHE	[%1+#-64], D0Ar6\n"				\
-		"GETL    D0Ar6, D1Ar5, [A0StP+#-40]\n"			\
-		"GETL    D0FrT, D1RtP, [A0StP+#-32]\n"			\
-		"GETL    D0.5, D1.5, [A0StP+#-24]\n"			\
-		"GETL    D0.6, D1.6, [A0StP+#-16]\n"			\
-		"GETL    D0.7, D1.7, [A0StP+#-8]\n"			\
-		"SUB A0StP, A0StP, #40\n"				\
+		"	MOV	RAPF, %1\n"				\
+		"33:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"34:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"35:	SUB	%3, %3, #16\n"				\
+		"36:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"37:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"38:	SUB	%3, %3, #16\n"				\
+		"39:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"40:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"41:	SUB	%3, %3, #16\n"				\
+		"42:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
+		"43:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
+		"44:	SUB	%0, %0, #4\n"				\
+		"45:	SETD	[%0++], D0.7\n"				\
+		"	SUB	%3, %3, #16\n"				\
+		"1:	DCACHE	[%1+#-64], D0Ar6\n"			\
+		"	GETL	D0Ar6, D1Ar5, [A0StP+#-40]\n"		\
+		"	GETL	D0FrT, D1RtP, [A0StP+#-32]\n"		\
+		"	GETL	D0.5, D1.5, [A0StP+#-24]\n"		\
+		"	GETL	D0.6, D1.6, [A0StP+#-16]\n"		\
+		"	GETL	D0.7, D1.7, [A0StP+#-8]\n"		\
+		"	SUB A0StP, A0StP, #40\n"			\
 		"	.section .fixup,\"ax\"\n"			\
-		"4:\n"							\
-		"	ADD		%0, %0, #4\n"			\
-		"3:\n"							\
-		"	MOV	D0Ar2, TXSTATUS\n"			\
+		"4:	ADD	%0, %0, #4\n"				\
+		"3:	MOV	D0Ar2, TXSTATUS\n"			\
 		"	MOV	D1Ar1, TXSTATUS\n"			\
 		"	AND	D1Ar1, D1Ar1, #0xFFFFF8FF\n"		\
 		"	MOV	TXSTATUS, D1Ar1\n"			\
 			FIXUP						\
-		"	MOVT    D0Ar2,#HI(1b)\n"			\
-		"	JUMP    D0Ar2,#LO(1b)\n"			\
+		"	MOVT	D0Ar2, #HI(1b)\n"			\
+		"	JUMP	D0Ar2, #LO(1b)\n"			\
 		"	.previous\n"					\
 		"	.section __ex_table,\"a\"\n"			\
 		"	.long 21b,3b\n"					\
-- 
cgit v1.2.3


From fc1b759ae4e0f636c56ca8410207a8a36630a96e Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 4 Apr 2017 11:42:35 +0100
Subject: metag/usercopy: Simplify rapf loop fixup corner case

The final fixup in the rapf loops must handle a corner case due to the
intermediate decrementing of the destination pointer before writing the
last element to it again and re-incrementing it. This decrement (and the
associated increment in the fixup code) can be easily avoided by using
SETL/SETD with an offset of -8/-4.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
---
 arch/metag/lib/usercopy.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index 7abed2f45c83..ceb4590fbca5 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -270,8 +270,7 @@
 		"29:	SUB	%3, %3, #32\n"				\
 		"30:	MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
 		"31:	MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
-		"32:	SUB	%0, %0, #8\n"				\
-		"33:	SETL	[%0++], D0.7, D1.7\n"			\
+		"32:	SETL	[%0+#-8], D0.7, D1.7\n"			\
 		"	SUB	%3, %3, #32\n"				\
 		"1:	DCACHE	[%1+#-64], D0Ar6\n"			\
 		"	GETL	D0Ar6, D1Ar5, [A0StP+#-40]\n"		\
@@ -281,7 +280,6 @@
 		"	GETL	D0.7, D1.7, [A0StP+#-8]\n"		\
 		"	SUB	A0StP, A0StP, #40\n"			\
 		"	.section .fixup,\"ax\"\n"			\
-		"4:	ADD	%0, %0, #8\n"				\
 		"3:	MOV	D0Ar2, TXSTATUS\n"			\
 		"	MOV	D1Ar1, TXSTATUS\n"			\
 		"	AND	D1Ar1, D1Ar1, #0xFFFFF8FF\n"		\
@@ -303,7 +301,6 @@
 		"	.long 30b,3b\n"					\
 		"	.long 31b,3b\n"					\
 		"	.long 32b,3b\n"					\
-		"	.long 33b,4b\n"					\
 		"	.previous\n"					\
 		: "=r" (to), "=r" (from), "=r" (ret), "=d" (n)		\
 		: "0" (to), "1" (from), "2" (ret), "3" (n)		\
@@ -417,8 +414,7 @@
 		"41:	SUB	%3, %3, #16\n"				\
 		"42:	MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"	\
 		"43:	MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"	\
-		"44:	SUB	%0, %0, #4\n"				\
-		"45:	SETD	[%0++], D0.7\n"				\
+		"44:	SETD	[%0+#-4], D0.7\n"			\
 		"	SUB	%3, %3, #16\n"				\
 		"1:	DCACHE	[%1+#-64], D0Ar6\n"			\
 		"	GETL	D0Ar6, D1Ar5, [A0StP+#-40]\n"		\
@@ -428,7 +424,6 @@
 		"	GETL	D0.7, D1.7, [A0StP+#-8]\n"		\
 		"	SUB A0StP, A0StP, #40\n"			\
 		"	.section .fixup,\"ax\"\n"			\
-		"4:	ADD	%0, %0, #4\n"				\
 		"3:	MOV	D0Ar2, TXSTATUS\n"			\
 		"	MOV	D1Ar1, TXSTATUS\n"			\
 		"	AND	D1Ar1, D1Ar1, #0xFFFFF8FF\n"		\
@@ -462,7 +457,6 @@
 		"	.long 42b,3b\n"					\
 		"	.long 43b,3b\n"					\
 		"	.long 44b,3b\n"					\
-		"	.long 45b,4b\n"					\
 		"	.previous\n"					\
 		: "=r" (to), "=r" (from), "=r" (ret), "=d" (n)		\
 		: "0" (to), "1" (from), "2" (ret), "3" (n)		\
-- 
cgit v1.2.3


From d3ba2e922d4d1d61806fcb6e09512d2bee734d06 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Fri, 31 Mar 2017 15:40:52 +0100
Subject: metag/usercopy: Add 64-bit get_user support

Metag already supports 64-bit put_user, so add support for 64-bit
get_user too so that the test_user_copy module can test both.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
---
 arch/metag/include/asm/uaccess.h |  9 +++++++--
 arch/metag/lib/usercopy.c        | 24 ++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
index 07238b39638c..469a2f1393d3 100644
--- a/arch/metag/include/asm/uaccess.h
+++ b/arch/metag/include/asm/uaccess.h
@@ -138,7 +138,8 @@ extern long __get_user_bad(void);
 
 #define __get_user_nocheck(x, ptr, size)			\
 ({                                                              \
-	long __gu_err, __gu_val;                                \
+	long __gu_err;						\
+	long long __gu_val;					\
 	__get_user_size(__gu_val, (ptr), (size), __gu_err);	\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;             \
 	__gu_err;                                               \
@@ -146,7 +147,8 @@ extern long __get_user_bad(void);
 
 #define __get_user_check(x, ptr, size)					\
 ({                                                                      \
-	long __gu_err = -EFAULT, __gu_val = 0;                          \
+	long __gu_err = -EFAULT;					\
+	long long __gu_val = 0;						\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
 	if (access_ok(VERIFY_READ, __gu_addr, size))			\
 		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
@@ -157,6 +159,7 @@ extern long __get_user_bad(void);
 extern unsigned char __get_user_asm_b(const void __user *addr, long *err);
 extern unsigned short __get_user_asm_w(const void __user *addr, long *err);
 extern unsigned int __get_user_asm_d(const void __user *addr, long *err);
+extern unsigned long long __get_user_asm_l(const void __user *addr, long *err);
 
 #define __get_user_size(x, ptr, size, retval)			\
 do {                                                            \
@@ -168,6 +171,8 @@ do {                                                            \
 		x = __get_user_asm_w(ptr, &retval); break;	\
 	case 4:							\
 		x = __get_user_asm_d(ptr, &retval); break;	\
+	case 8:							\
+		x = __get_user_asm_l(ptr, &retval); break;	\
 	default:						\
 		(x) = __get_user_bad();				\
 	}                                                       \
diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index ceb4590fbca5..45e7b79eca19 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -1044,6 +1044,30 @@ unsigned int __get_user_asm_d(const void __user *addr, long *err)
 }
 EXPORT_SYMBOL(__get_user_asm_d);
 
+unsigned long long __get_user_asm_l(const void __user *addr, long *err)
+{
+	register unsigned long long x asm ("D0Re0") = 0;
+	asm volatile (
+		"	GETL %0,%t0,[%2]\n"
+		"1:\n"
+		"	GETL %0,%t0,[%2]\n"
+		"2:\n"
+		"	.section .fixup,\"ax\"\n"
+		"3:	MOV     D0FrT,%3\n"
+		"	SETD    [%1],D0FrT\n"
+		"	MOVT    D0FrT,#HI(2b)\n"
+		"	JUMP    D0FrT,#LO(2b)\n"
+		"	.previous\n"
+		"	.section __ex_table,\"a\"\n"
+		"	.long 1b,3b\n"
+		"	.previous\n"
+		: "=r" (x)
+		: "r" (err), "r" (addr), "P" (-EFAULT)
+		: "D0FrT");
+	return x;
+}
+EXPORT_SYMBOL(__get_user_asm_l);
+
 long __put_user_asm_b(unsigned int x, void __user *addr)
 {
 	register unsigned int err asm ("D0Re0") = 0;
-- 
cgit v1.2.3


From cb545960dea2749771c88b0cb26e5adfd12a0315 Mon Sep 17 00:00:00 2001
From: Priit Laes <plaes@plaes.org>
Date: Wed, 5 Apr 2017 19:52:27 +0300
Subject: clk: sunxi-ng: Display index when clock registration fails

Add clock index to clock registration failure message. Clock name
is sometimes not available, when things go really wrong.

Signed-off-by: Priit Laes <plaes@plaes.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c
index 8a47bafd7890..188fa50d0380 100644
--- a/drivers/clk/sunxi-ng/ccu_common.c
+++ b/drivers/clk/sunxi-ng/ccu_common.c
@@ -63,8 +63,8 @@ int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
 
 		ret = clk_hw_register(NULL, hw);
 		if (ret) {
-			pr_err("Couldn't register clock %s\n",
-			       clk_hw_get_name(hw));
+			pr_err("Couldn't register clock %d - %s\n",
+			       i, clk_hw_get_name(hw));
 			goto err_clk_unreg;
 		}
 	}
-- 
cgit v1.2.3


From eda6f4e868e74adfae8ac6b06a863a090affbb14 Mon Sep 17 00:00:00 2001
From: Alexandre Bailon <abailon@baylibre.com>
Date: Wed, 5 Apr 2017 18:35:16 +0200
Subject: dmaengine: cppi41: Fix an Oops happening in cppi41_dma_probe()

This fix an Oops happening on all platforms using the old dt bindings
(all platforms but da8xx).
This update cppi41_dma_probe() to use the index variable which is
required to keep compatibility between old and new dt bindings.

Fixes: 8e3ba95f4190 ("dmaengine: cppi41: use managed functions devm_*()")
Reported-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 0be56c971786..f7e965f63274 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -1038,17 +1038,17 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	if (index < 0)
 		return index;
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, index);
 	cdd->ctrl_mem = devm_ioremap_resource(dev, mem);
 	if (IS_ERR(cdd->ctrl_mem))
 		return PTR_ERR(cdd->ctrl_mem);
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 1);
 	cdd->sched_mem = devm_ioremap_resource(dev, mem);
 	if (IS_ERR(cdd->sched_mem))
 		return PTR_ERR(cdd->sched_mem);
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 2);
 	cdd->qmgr_mem = devm_ioremap_resource(dev, mem);
 	if (IS_ERR(cdd->qmgr_mem))
 		return PTR_ERR(cdd->qmgr_mem);
-- 
cgit v1.2.3


From 2267517cd3ee4a1b02c7b9fead051c9d079c9fc3 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 1 Mar 2017 15:48:51 +0100
Subject: pwm: atmel-hlcdc: Convert to the atomic PWM API

Implement the ->apply() hook and drop the ->enable(), ->disable,
->set_polarity and ->config() ones.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-atmel-hlcdc.c | 227 +++++++++++++++++++-----------------------
 1 file changed, 101 insertions(+), 126 deletions(-)

diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index 999187277ea5..bcb6d946131d 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -49,162 +49,137 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip)
 	return container_of(chip, struct atmel_hlcdc_pwm, chip);
 }
 
-static int atmel_hlcdc_pwm_config(struct pwm_chip *c,
-				  struct pwm_device *pwm,
-				  int duty_ns, int period_ns)
+static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
+				 struct pwm_state *state)
 {
 	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
 	struct atmel_hlcdc *hlcdc = chip->hlcdc;
-	struct clk *new_clk = hlcdc->slow_clk;
-	u64 pwmcval = duty_ns * 256;
-	unsigned long clk_freq;
-	u64 clk_period_ns;
-	u32 pwmcfg;
-	int pres;
-
-	if (!chip->errata || !chip->errata->slow_clk_erratum) {
-		clk_freq = clk_get_rate(new_clk);
-		if (!clk_freq)
-			return -EINVAL;
-
-		clk_period_ns = (u64)NSEC_PER_SEC * 256;
-		do_div(clk_period_ns, clk_freq);
-	}
-
-	/* Errata: cannot use slow clk on some IP revisions */
-	if ((chip->errata && chip->errata->slow_clk_erratum) ||
-	    clk_period_ns > period_ns) {
-		new_clk = hlcdc->sys_clk;
-		clk_freq = clk_get_rate(new_clk);
-		if (!clk_freq)
-			return -EINVAL;
-
-		clk_period_ns = (u64)NSEC_PER_SEC * 256;
-		do_div(clk_period_ns, clk_freq);
-	}
+	unsigned int status;
+	int ret;
 
-	for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) {
+	if (state->enabled) {
+		struct clk *new_clk = hlcdc->slow_clk;
+		u64 pwmcval = state->duty_cycle * 256;
+		unsigned long clk_freq;
+		u64 clk_period_ns;
+		u32 pwmcfg;
+		int pres;
+
+		if (!chip->errata || !chip->errata->slow_clk_erratum) {
+			clk_freq = clk_get_rate(new_clk);
+			if (!clk_freq)
+				return -EINVAL;
+
+			clk_period_ns = (u64)NSEC_PER_SEC * 256;
+			do_div(clk_period_ns, clk_freq);
+		}
+
+		/* Errata: cannot use slow clk on some IP revisions */
+		if ((chip->errata && chip->errata->slow_clk_erratum) ||
+		    clk_period_ns > state->period) {
+			new_clk = hlcdc->sys_clk;
+			clk_freq = clk_get_rate(new_clk);
+			if (!clk_freq)
+				return -EINVAL;
+
+			clk_period_ns = (u64)NSEC_PER_SEC * 256;
+			do_div(clk_period_ns, clk_freq);
+		}
+
+		for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) {
 		/* Errata: cannot divide by 1 on some IP revisions */
-		if (!pres && chip->errata && chip->errata->div1_clk_erratum)
-			continue;
-
-		if ((clk_period_ns << pres) >= period_ns)
-			break;
-	}
+			if (!pres && chip->errata &&
+			    chip->errata->div1_clk_erratum)
+				continue;
 
-	if (pres > ATMEL_HLCDC_PWMPS_MAX)
-		return -EINVAL;
+			if ((clk_period_ns << pres) >= state->period)
+				break;
+		}
 
-	pwmcfg = ATMEL_HLCDC_PWMPS(pres);
-
-	if (new_clk != chip->cur_clk) {
-		u32 gencfg = 0;
-		int ret;
-
-		ret = clk_prepare_enable(new_clk);
-		if (ret)
-			return ret;
+		if (pres > ATMEL_HLCDC_PWMPS_MAX)
+			return -EINVAL;
 
-		clk_disable_unprepare(chip->cur_clk);
-		chip->cur_clk = new_clk;
+		pwmcfg = ATMEL_HLCDC_PWMPS(pres);
 
-		if (new_clk == hlcdc->sys_clk)
-			gencfg = ATMEL_HLCDC_CLKPWMSEL;
+		if (new_clk != chip->cur_clk) {
+			u32 gencfg = 0;
+			int ret;
 
-		ret = regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(0),
-					 ATMEL_HLCDC_CLKPWMSEL, gencfg);
-		if (ret)
-			return ret;
-	}
+			ret = clk_prepare_enable(new_clk);
+			if (ret)
+				return ret;
 
-	do_div(pwmcval, period_ns);
+			clk_disable_unprepare(chip->cur_clk);
+			chip->cur_clk = new_clk;
 
-	/*
-	 * The PWM duty cycle is configurable from 0/256 to 255/256 of the
-	 * period cycle. Hence we can't set a duty cycle occupying the
-	 * whole period cycle if we're asked to.
-	 * Set it to 255 if pwmcval is greater than 256.
-	 */
-	if (pwmcval > 255)
-		pwmcval = 255;
+			if (new_clk == hlcdc->sys_clk)
+				gencfg = ATMEL_HLCDC_CLKPWMSEL;
 
-	pwmcfg |= ATMEL_HLCDC_PWMCVAL(pwmcval);
+			ret = regmap_update_bits(hlcdc->regmap,
+						 ATMEL_HLCDC_CFG(0),
+						 ATMEL_HLCDC_CLKPWMSEL,
+						 gencfg);
+			if (ret)
+				return ret;
+		}
 
-	return regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(6),
-				  ATMEL_HLCDC_PWMCVAL_MASK |
-				  ATMEL_HLCDC_PWMPS_MASK,
-				  pwmcfg);
-}
+		do_div(pwmcval, state->period);
 
-static int atmel_hlcdc_pwm_set_polarity(struct pwm_chip *c,
-					struct pwm_device *pwm,
-					enum pwm_polarity polarity)
-{
-	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
-	struct atmel_hlcdc *hlcdc = chip->hlcdc;
-	u32 cfg = 0;
+		/*
+		 * The PWM duty cycle is configurable from 0/256 to 255/256 of
+		 * the period cycle. Hence we can't set a duty cycle occupying
+		 * the whole period cycle if we're asked to.
+		 * Set it to 255 if pwmcval is greater than 256.
+		 */
+		if (pwmcval > 255)
+			pwmcval = 255;
 
-	if (polarity == PWM_POLARITY_NORMAL)
-		cfg = ATMEL_HLCDC_PWMPOL;
+		pwmcfg |= ATMEL_HLCDC_PWMCVAL(pwmcval);
 
-	return regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(6),
-				  ATMEL_HLCDC_PWMPOL, cfg);
-}
+		if (state->polarity == PWM_POLARITY_NORMAL)
+			pwmcfg |= ATMEL_HLCDC_PWMPOL;
 
-static int atmel_hlcdc_pwm_enable(struct pwm_chip *c, struct pwm_device *pwm)
-{
-	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
-	struct atmel_hlcdc *hlcdc = chip->hlcdc;
-	u32 status;
-	int ret;
+		ret = regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(6),
+					 ATMEL_HLCDC_PWMCVAL_MASK |
+					 ATMEL_HLCDC_PWMPS_MASK |
+					 ATMEL_HLCDC_PWMPOL,
+					 pwmcfg);
+		if (ret)
+			return ret;
 
-	ret = regmap_write(hlcdc->regmap, ATMEL_HLCDC_EN, ATMEL_HLCDC_PWM);
-	if (ret)
-		return ret;
+		ret = regmap_write(hlcdc->regmap, ATMEL_HLCDC_EN,
+				   ATMEL_HLCDC_PWM);
+		if (ret)
+			return ret;
 
-	while (true) {
-		ret = regmap_read(hlcdc->regmap, ATMEL_HLCDC_SR, &status);
+		ret = regmap_read_poll_timeout(hlcdc->regmap, ATMEL_HLCDC_SR,
+					       status,
+					       status & ATMEL_HLCDC_PWM,
+					       10, 0);
+		if (ret)
+			return ret;
+	} else {
+		ret = regmap_write(hlcdc->regmap, ATMEL_HLCDC_DIS,
+				   ATMEL_HLCDC_PWM);
 		if (ret)
 			return ret;
 
-		if ((status & ATMEL_HLCDC_PWM) != 0)
-			break;
+		ret = regmap_read_poll_timeout(hlcdc->regmap, ATMEL_HLCDC_SR,
+					       status,
+					       !(status & ATMEL_HLCDC_PWM),
+					       10, 0);
+		if (ret)
+			return ret;
 
-		usleep_range(1, 10);
+		clk_disable_unprepare(chip->cur_clk);
+		chip->cur_clk = NULL;
 	}
 
 	return 0;
 }
 
-static void atmel_hlcdc_pwm_disable(struct pwm_chip *c,
-				    struct pwm_device *pwm)
-{
-	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
-	struct atmel_hlcdc *hlcdc = chip->hlcdc;
-	u32 status;
-	int ret;
-
-	ret = regmap_write(hlcdc->regmap, ATMEL_HLCDC_DIS, ATMEL_HLCDC_PWM);
-	if (ret)
-		return;
-
-	while (true) {
-		ret = regmap_read(hlcdc->regmap, ATMEL_HLCDC_SR, &status);
-		if (ret)
-			return;
-
-		if ((status & ATMEL_HLCDC_PWM) == 0)
-			break;
-
-		usleep_range(1, 10);
-	}
-}
-
 static const struct pwm_ops atmel_hlcdc_pwm_ops = {
-	.config = atmel_hlcdc_pwm_config,
-	.set_polarity = atmel_hlcdc_pwm_set_polarity,
-	.enable = atmel_hlcdc_pwm_enable,
-	.disable = atmel_hlcdc_pwm_disable,
+	.apply = atmel_hlcdc_pwm_apply,
 	.owner = THIS_MODULE,
 };
 
-- 
cgit v1.2.3


From adf5e5168bd51c42332ebaa709351fa6ed65ea73 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 27 Jan 2017 12:22:54 +0000
Subject: iommu: Better document the IOMMU_PRIV flag

This is a fairly subtle thing - let's make sure it's described as
clearly as possible to avoid potential misunderstandings.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 2e4de0deee53..88ec8c6580d3 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -32,10 +32,13 @@
 #define IOMMU_NOEXEC	(1 << 3)
 #define IOMMU_MMIO	(1 << 4) /* e.g. things like MSI doorbells */
 /*
- * This is to make the IOMMU API setup privileged
- * mapppings accessible by the master only at higher
- * privileged execution level and inaccessible at
- * less privileged levels.
+ * Where the bus hardware includes a privilege level as part of its access type
+ * markings, and certain devices are capable of issuing transactions marked as
+ * either 'supervisor' or 'user', the IOMMU_PRIV flag requests that the other
+ * given permission flags only apply to accesses at the higher privilege level,
+ * and that unprivileged transactions should have as little access as possible.
+ * This would usually imply the same permissions as kernel mappings on the CPU,
+ * if the IOMMU page table format is equivalent.
  */
 #define IOMMU_PRIV	(1 << 5)
 
-- 
cgit v1.2.3


From 53c35dce45713d2a554109c21a8cd617d09eba50 Mon Sep 17 00:00:00 2001
From: Robert Richter <rrichter@cavium.com>
Date: Mon, 13 Mar 2017 11:39:01 +0100
Subject: iommu/arm-smmu: Print message when Cavium erratum 27704 was detected

Firmware is responsible for properly enabling smmu workarounds. Print
a message for better diagnostics when Cavium erratum 27704 was
detected.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Robert Richter <rrichter@cavium.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index b493c99e17f7..8e16da64e72e 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1887,6 +1887,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 			atomic_add_return(smmu->num_context_banks,
 					  &cavium_smmu_context_count);
 		smmu->cavium_id_base -= smmu->num_context_banks;
+		dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
 	}
 
 	/* ID2 */
-- 
cgit v1.2.3


From 125458ab3aefe9cf2f72dcfe7338dc9ad967da0b Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Tue, 28 Mar 2017 16:11:12 +0530
Subject: iommu/arm-smmu: Fix 16-bit ASID configuration

16-bit ASID should be enabled before initializing TTBR0/1,
otherwise only LSB 8-bit ASID will be considered. Hence
moving configuration of TTBCR register ahead of TTBR0/1
while initializing context bank.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
[will: rewrote comment]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 8e16da64e72e..e021b360e315 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -758,6 +758,29 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 	}
 	writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
 
+	/*
+	 * TTBCR
+	 * We must write this before the TTBRs, since it determines the
+	 * access behaviour of some fields (in particular, ASID[15:8]).
+	 */
+	if (stage1) {
+		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+			reg = pgtbl_cfg->arm_v7s_cfg.tcr;
+			reg2 = 0;
+		} else {
+			reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
+			reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
+			reg2 |= TTBCR2_SEP_UPSTREAM;
+			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+				reg2 |= TTBCR2_AS;
+		}
+		if (smmu->version > ARM_SMMU_V1)
+			writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
+	} else {
+		reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
+	}
+	writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
+
 	/* TTBRs */
 	if (stage1) {
 		u16 asid = ARM_SMMU_CB_ASID(smmu, cfg);
@@ -781,25 +804,6 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
 	}
 
-	/* TTBCR */
-	if (stage1) {
-		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
-			reg = pgtbl_cfg->arm_v7s_cfg.tcr;
-			reg2 = 0;
-		} else {
-			reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
-			reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
-			reg2 |= TTBCR2_SEP_UPSTREAM;
-			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
-				reg2 |= TTBCR2_AS;
-		}
-		if (smmu->version > ARM_SMMU_V1)
-			writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
-	} else {
-		reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
-	}
-	writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
-
 	/* MAIRs (stage-1 only) */
 	if (stage1) {
 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
-- 
cgit v1.2.3


From 280b683ceaceb7508dc1e9c7e148ea1dcdf36543 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 30 Mar 2017 17:56:29 +0100
Subject: iommu/arm-smmu: Simplify ASID/VMID handling

Calculating ASIDs/VMIDs dynamically from arm_smmu_cfg was a neat trick,
but the global uniqueness workaround makes it somewhat more awkward, and
means we end up having to pass extra state around in certain cases just
to keep a handle on the offset.

We already have 16 bits going spare in arm_smmu_cfg; let's just
precalculate an ASID/VMID, plop it in there, and tidy up the users
accordingly. We'd also need something like this anyway if we ever get
near to thinking about SVM, so it's no bad thing.

Reviewed-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index e021b360e315..05f17ebd70f6 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -404,14 +404,15 @@ enum arm_smmu_context_fmt {
 struct arm_smmu_cfg {
 	u8				cbndx;
 	u8				irptndx;
+	union {
+		u16			asid;
+		u16			vmid;
+	};
 	u32				cbar;
 	enum arm_smmu_context_fmt	fmt;
 };
 #define INVALID_IRPTNDX			0xff
 
-#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx)
-#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1)
-
 enum arm_smmu_domain_stage {
 	ARM_SMMU_DOMAIN_S1 = 0,
 	ARM_SMMU_DOMAIN_S2,
@@ -603,12 +604,10 @@ static void arm_smmu_tlb_inv_context(void *cookie)
 
 	if (stage1) {
 		base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
-		writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
-			       base + ARM_SMMU_CB_S1_TLBIASID);
+		writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
 	} else {
 		base = ARM_SMMU_GR0(smmu);
-		writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
-			       base + ARM_SMMU_GR0_TLBIVMID);
+		writel_relaxed(cfg->vmid, base + ARM_SMMU_GR0_TLBIVMID);
 	}
 
 	__arm_smmu_tlb_sync(smmu);
@@ -629,14 +628,14 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 
 		if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
 			iova &= ~12UL;
-			iova |= ARM_SMMU_CB_ASID(smmu, cfg);
+			iova |= cfg->asid;
 			do {
 				writel_relaxed(iova, reg);
 				iova += granule;
 			} while (size -= granule);
 		} else {
 			iova >>= 12;
-			iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
+			iova |= (u64)cfg->asid << 48;
 			do {
 				writeq_relaxed(iova, reg);
 				iova += granule >> 12;
@@ -653,7 +652,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 		} while (size -= granule);
 	} else {
 		reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
-		writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
+		writel_relaxed(cfg->vmid, reg);
 	}
 }
 
@@ -735,7 +734,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 			reg = CBA2R_RW64_32BIT;
 		/* 16-bit VMIDs live in CBA2R */
 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
-			reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
+			reg |= cfg->vmid << CBA2R_VMID_SHIFT;
 
 		writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
 	}
@@ -754,7 +753,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 			(CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
 		/* 8-bit VMIDs live in CBAR */
-		reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT;
+		reg |= cfg->vmid << CBAR_VMID_SHIFT;
 	}
 	writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
 
@@ -783,20 +782,18 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 
 	/* TTBRs */
 	if (stage1) {
-		u16 asid = ARM_SMMU_CB_ASID(smmu, cfg);
-
 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
 			reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
 			writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0);
 			reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
 			writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1);
-			writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
+			writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
 		} else {
 			reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
-			reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
+			reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
 			writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
 			reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-			reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
+			reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
 			writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
 		}
 	} else {
@@ -945,6 +942,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		cfg->irptndx = cfg->cbndx;
 	}
 
+	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
+		cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
+	else
+		cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+
 	pgtbl_cfg = (struct io_pgtable_cfg) {
 		.pgsize_bitmap	= smmu->pgsize_bitmap,
 		.ias		= ias,
-- 
cgit v1.2.3


From 452107c79035c6654b963e83c4862d9faa195af4 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 30 Mar 2017 17:56:30 +0100
Subject: iommu/arm-smmu: Tidy up context bank indexing

ARM_AMMU_CB() is calculated relative to ARM_SMMU_CB_BASE(), but the
latter is never of use on its own, and what we end up with is the same
ARM_SMMU_CB_BASE() + ARM_AMMU_CB() expression being duplicated at every
callsite. Folding the two together gives us a self-contained context
bank accessor which is much more pleasant to work with.

Secondly, we might as well simplify CB_BASE itself at the same time.
We use the address space size for its own sake precisely once, at probe
time, and every other usage is to dynamically calculate CB_BASE over
and over and over again. Let's flip things around so that we just
maintain the CB_BASE address directly.

Reviewed-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 05f17ebd70f6..27922ffd330c 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -216,8 +216,7 @@ enum arm_smmu_s2cr_privcfg {
 #define CBA2R_VMID_MASK			0xffff
 
 /* Translation context bank */
-#define ARM_SMMU_CB_BASE(smmu)		((smmu)->base + ((smmu)->size >> 1))
-#define ARM_SMMU_CB(smmu, n)		((n) * (1 << (smmu)->pgshift))
+#define ARM_SMMU_CB(smmu, n)	((smmu)->cb_base + ((n) << (smmu)->pgshift))
 
 #define ARM_SMMU_CB_SCTLR		0x0
 #define ARM_SMMU_CB_ACTLR		0x4
@@ -344,7 +343,7 @@ struct arm_smmu_device {
 	struct device			*dev;
 
 	void __iomem			*base;
-	unsigned long			size;
+	void __iomem			*cb_base;
 	unsigned long			pgshift;
 
 #define ARM_SMMU_FEAT_COHERENT_WALK	(1 << 0)
@@ -603,7 +602,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
 	void __iomem *base;
 
 	if (stage1) {
-		base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+		base = ARM_SMMU_CB(smmu, cfg->cbndx);
 		writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
 	} else {
 		base = ARM_SMMU_GR0(smmu);
@@ -623,7 +622,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	void __iomem *reg;
 
 	if (stage1) {
-		reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+		reg = ARM_SMMU_CB(smmu, cfg->cbndx);
 		reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
 
 		if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
@@ -642,7 +641,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 			} while (size -= granule);
 		}
 	} else if (smmu->version == ARM_SMMU_V2) {
-		reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+		reg = ARM_SMMU_CB(smmu, cfg->cbndx);
 		reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
 			      ARM_SMMU_CB_S2_TLBIIPAS2;
 		iova >>= 12;
@@ -672,7 +671,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	void __iomem *cb_base;
 
-	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+	cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
 	fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
 
 	if (!(fsr & FSR_FAULT))
@@ -725,7 +724,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 
 	gr1_base = ARM_SMMU_GR1(smmu);
 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
-	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+	cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
 
 	if (smmu->version > ARM_SMMU_V1) {
 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
@@ -1011,7 +1010,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
 	 * Disable the context bank and free the page tables before freeing
 	 * it.
 	 */
-	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+	cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
 	writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
 
 	if (cfg->irptndx != INVALID_IRPTNDX) {
@@ -1362,7 +1361,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 	u64 phys;
 	unsigned long va;
 
-	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+	cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
 
 	/* ATS1 registers can only be written atomically */
 	va = iova & ~0xfffUL;
@@ -1689,7 +1688,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 
 	/* Make sure all context banks are disabled and clear CB_FSR  */
 	for (i = 0; i < smmu->num_context_banks; ++i) {
-		cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
+		cb_base = ARM_SMMU_CB(smmu, i);
 		writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
 		writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
 		/*
@@ -1869,11 +1868,11 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 
 	/* Check for size mismatch of SMMU address space from mapped region */
 	size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
-	size *= 2 << smmu->pgshift;
-	if (smmu->size != size)
+	size <<= smmu->pgshift;
+	if (smmu->cb_base != gr0_base + size)
 		dev_warn(smmu->dev,
-			"SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n",
-			size, smmu->size);
+			"SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
+			size * 2, (smmu->cb_base - gr0_base) * 2);
 
 	smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
 	smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
@@ -2110,7 +2109,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	smmu->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(smmu->base))
 		return PTR_ERR(smmu->base);
-	smmu->size = resource_size(res);
+	smmu->cb_base = smmu->base + resource_size(res) / 2;
 
 	num_irqs = 0;
 	while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
-- 
cgit v1.2.3


From 11febfca2419652f28804879a58ffd32adce2372 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 30 Mar 2017 17:56:31 +0100
Subject: iommu/arm-smmu: Use per-context TLB sync as appropriate

TLB synchronisation typically involves the SMMU blocking all incoming
transactions until the TLBs report completion of all outstanding
operations. In the common SMMUv2 configuration of a single distributed
SMMU serving multiple peripherals, that means that a single unmap
request has the potential to bring the hammer down on the entire system
if synchronised globally. Since stage 1 contexts, and stage 2 contexts
under SMMUv2, offer local sync operations, let's make use of those
wherever we can in the hope of minimising global disruption.

To that end, rather than add any more branches to the already unwieldy
monolithic TLB maintenance ops, break them up into smaller, neater,
functions which we can then mix and match as appropriate.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 113 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 80 insertions(+), 33 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 27922ffd330c..83f50142e63e 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -237,6 +237,8 @@ enum arm_smmu_s2cr_privcfg {
 #define ARM_SMMU_CB_S1_TLBIVAL		0x620
 #define ARM_SMMU_CB_S2_TLBIIPAS2	0x630
 #define ARM_SMMU_CB_S2_TLBIIPAS2L	0x638
+#define ARM_SMMU_CB_TLBSYNC		0x7f0
+#define ARM_SMMU_CB_TLBSTATUS		0x7f4
 #define ARM_SMMU_CB_ATS1PR		0x800
 #define ARM_SMMU_CB_ATSR		0x8f0
 
@@ -569,14 +571,13 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
 }
 
 /* Wait for any pending TLB invalidations to complete */
-static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
+static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
+				void __iomem *sync, void __iomem *status)
 {
 	int count = 0;
-	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 
-	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC);
-	while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS)
-	       & sTLBGSTATUS_GSACTIVE) {
+	writel_relaxed(0, sync);
+	while (readl_relaxed(status) & sTLBGSTATUS_GSACTIVE) {
 		cpu_relax();
 		if (++count == TLB_LOOP_TIMEOUT) {
 			dev_err_ratelimited(smmu->dev,
@@ -587,29 +588,49 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 	}
 }
 
-static void arm_smmu_tlb_sync(void *cookie)
+static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
+{
+	void __iomem *base = ARM_SMMU_GR0(smmu);
+
+	__arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
+			    base + ARM_SMMU_GR0_sTLBGSTATUS);
+}
+
+static void arm_smmu_tlb_sync_context(void *cookie)
+{
+	struct arm_smmu_domain *smmu_domain = cookie;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
+
+	__arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
+			    base + ARM_SMMU_CB_TLBSTATUS);
+}
+
+static void arm_smmu_tlb_sync_vmid(void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
-	__arm_smmu_tlb_sync(smmu_domain->smmu);
+
+	arm_smmu_tlb_sync_global(smmu_domain->smmu);
 }
 
-static void arm_smmu_tlb_inv_context(void *cookie)
+static void arm_smmu_tlb_inv_context_s1(void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
-	void __iomem *base;
+	void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
 
-	if (stage1) {
-		base = ARM_SMMU_CB(smmu, cfg->cbndx);
-		writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
-	} else {
-		base = ARM_SMMU_GR0(smmu);
-		writel_relaxed(cfg->vmid, base + ARM_SMMU_GR0_TLBIVMID);
-	}
+	writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
+	arm_smmu_tlb_sync_context(cookie);
+}
 
-	__arm_smmu_tlb_sync(smmu);
+static void arm_smmu_tlb_inv_context_s2(void *cookie)
+{
+	struct arm_smmu_domain *smmu_domain = cookie;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	void __iomem *base = ARM_SMMU_GR0(smmu);
+
+	writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+	arm_smmu_tlb_sync_global(smmu);
 }
 
 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
@@ -617,12 +638,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
-	void __iomem *reg;
+	void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
 
 	if (stage1) {
-		reg = ARM_SMMU_CB(smmu, cfg->cbndx);
 		reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
 
 		if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
@@ -640,8 +659,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 				iova += granule >> 12;
 			} while (size -= granule);
 		}
-	} else if (smmu->version == ARM_SMMU_V2) {
-		reg = ARM_SMMU_CB(smmu, cfg->cbndx);
+	} else {
 		reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
 			      ARM_SMMU_CB_S2_TLBIIPAS2;
 		iova >>= 12;
@@ -649,16 +667,40 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 			smmu_write_atomic_lq(iova, reg);
 			iova += granule >> 12;
 		} while (size -= granule);
-	} else {
-		reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
-		writel_relaxed(cfg->vmid, reg);
 	}
 }
 
-static const struct iommu_gather_ops arm_smmu_gather_ops = {
-	.tlb_flush_all	= arm_smmu_tlb_inv_context,
+/*
+ * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
+ * almost negligible, but the benefit of getting the first one in as far ahead
+ * of the sync as possible is significant, hence we don't just make this a
+ * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
+ */
+static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
+					 size_t granule, bool leaf, void *cookie)
+{
+	struct arm_smmu_domain *smmu_domain = cookie;
+	void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
+
+	writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+}
+
+static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
+	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
+	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
+	.tlb_sync	= arm_smmu_tlb_sync_context,
+};
+
+static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
+	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
-	.tlb_sync	= arm_smmu_tlb_sync,
+	.tlb_sync	= arm_smmu_tlb_sync_context,
+};
+
+static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
+	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
+	.tlb_add_flush	= arm_smmu_tlb_inv_vmid_nosync,
+	.tlb_sync	= arm_smmu_tlb_sync_vmid,
 };
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
@@ -833,6 +875,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 	enum io_pgtable_fmt fmt;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+	const struct iommu_gather_ops *tlb_ops;
 
 	mutex_lock(&smmu_domain->init_mutex);
 	if (smmu_domain->smmu)
@@ -904,6 +947,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 			ias = min(ias, 32UL);
 			oas = min(oas, 32UL);
 		}
+		tlb_ops = &arm_smmu_s1_tlb_ops;
 		break;
 	case ARM_SMMU_DOMAIN_NESTED:
 		/*
@@ -922,12 +966,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 			ias = min(ias, 40UL);
 			oas = min(oas, 40UL);
 		}
+		if (smmu->version == ARM_SMMU_V2)
+			tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+		else
+			tlb_ops = &arm_smmu_s2_tlb_ops_v1;
 		break;
 	default:
 		ret = -EINVAL;
 		goto out_unlock;
 	}
-
 	ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
 				      smmu->num_context_banks);
 	if (ret < 0)
@@ -950,7 +997,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		.pgsize_bitmap	= smmu->pgsize_bitmap,
 		.ias		= ias,
 		.oas		= oas,
-		.tlb		= &arm_smmu_gather_ops,
+		.tlb		= tlb_ops,
 		.iommu_dev	= smmu->dev,
 	};
 
@@ -1734,7 +1781,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 		reg |= sCR0_EXIDENABLE;
 
 	/* Push the button */
-	__arm_smmu_tlb_sync(smmu);
+	arm_smmu_tlb_sync_global(smmu);
 	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
 }
 
-- 
cgit v1.2.3


From 8513c89300696a68963cc504c21d034c6369e183 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 30 Mar 2017 17:56:32 +0100
Subject: iommu/arm-smmu: Poll for TLB sync completion more effectively

On relatively slow development platforms and software models, the
inefficiency of our TLB sync loop tends not to show up - for instance on
a Juno r1 board I typically see the TLBI has completed of its own accord
by the time we get to the sync, such that the latter finishes instantly.

However, on larger systems doing real I/O, it's less realistic for the
TLBs to go idle immediately, and at that point falling into the 1MHz
polling loop turns out to throw away performance drastically. Let's
strike a balance by polling more than once between pauses, such that we
have much more chance of catching normal operations completing before
committing to the fixed delay, but also backing off exponentially, since
if a sync really hasn't completed within one or two "reasonable time"
periods, it becomes increasingly unlikely that it ever will.

Reviewed-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 83f50142e63e..618e133f643a 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -162,6 +162,7 @@
 #define ARM_SMMU_GR0_sTLBGSTATUS	0x74
 #define sTLBGSTATUS_GSACTIVE		(1 << 0)
 #define TLB_LOOP_TIMEOUT		1000000	/* 1s! */
+#define TLB_SPIN_COUNT			10
 
 /* Stream mapping registers */
 #define ARM_SMMU_GR0_SMR(n)		(0x800 + ((n) << 2))
@@ -574,18 +575,19 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
 				void __iomem *sync, void __iomem *status)
 {
-	int count = 0;
+	unsigned int spin_cnt, delay;
 
 	writel_relaxed(0, sync);
-	while (readl_relaxed(status) & sTLBGSTATUS_GSACTIVE) {
-		cpu_relax();
-		if (++count == TLB_LOOP_TIMEOUT) {
-			dev_err_ratelimited(smmu->dev,
-			"TLB sync timed out -- SMMU may be deadlocked\n");
-			return;
+	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
+		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
+			if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
+				return;
+			cpu_relax();
 		}
-		udelay(1);
+		udelay(delay);
 	}
+	dev_err_ratelimited(smmu->dev,
+			    "TLB sync timed out -- SMMU may be deadlocked\n");
 }
 
 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
-- 
cgit v1.2.3


From 56fbf600dd8e2f32a5317437fe310b56719f7d2b Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 31 Mar 2017 12:03:33 +0100
Subject: iommu/arm-smmu: Add global SMR masking property

The current SMR masking support using a 2-cell iommu-specifier is
primarily intended to handle individual masters with large and/or
complex Stream ID assignments; it quickly gets a bit clunky in other SMR
use-cases where we just want to consistently mask out the same part of
every Stream ID (e.g. for MMU-500 configurations where the appended TBU
number gets in the way unnecessarily). Let's add a new property to allow
a single global mask value to better fit the latter situation.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Nipun Gupta <nipun.gupta@nxp.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 .../devicetree/bindings/iommu/arm,smmu.txt         | 28 ++++++++++++++++++++++
 drivers/iommu/arm-smmu.c                           |  4 +++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 6cdf32d037fc..8a6ffce12af5 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -60,6 +60,17 @@ conditions.
                   aliases of secure registers have to be used during
                   SMMU configuration.
 
+- stream-match-mask : For SMMUs supporting stream matching and using
+                  #iommu-cells = <1>, specifies a mask of bits to ignore
+		  when matching stream IDs (e.g. this may be programmed
+		  into the SMRn.MASK field of every stream match register
+		  used). For cases where it is desirable to ignore some
+                  portion of every Stream ID (e.g. for certain MMU-500
+                  configurations given globally unique input IDs). This
+                  property is not valid for SMMUs using stream indexing,
+                  or using stream matching with #iommu-cells = <2>, and
+                  may be ignored if present in such cases.
+
 ** Deprecated properties:
 
 - mmu-masters (deprecated in favour of the generic "iommus" binding) :
@@ -109,3 +120,20 @@ conditions.
         master3 {
                 iommus = <&smmu2 1 0x30>;
         };
+
+
+        /* ARM MMU-500 with 10-bit stream ID input configuration */
+        smmu3: iommu {
+                compatible = "arm,mmu-500", "arm,smmu-v2";
+                ...
+                #iommu-cells = <1>;
+                /* always ignore appended 5-bit TBU number */
+                stream-match-mask = 0x7c00;
+        };
+
+        bus {
+                /* bus whose child devices emit one unique 10-bit stream
+                   ID each, but may master through multiple SMMU TBUs */
+                iommu-map = <0 &smmu3 0 0x400>;
+                ...
+        };
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 618e133f643a..6560c4a34b96 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1644,13 +1644,15 @@ out_unlock:
 
 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
 {
-	u32 fwid = 0;
+	u32 mask, fwid = 0;
 
 	if (args->args_count > 0)
 		fwid |= (u16)args->args[0];
 
 	if (args->args_count > 1)
 		fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
+	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
+		fwid |= (u16)mask << SMR_MASK_SHIFT;
 
 	return iommu_fwspec_add_ids(dev, &fwid, 1);
 }
-- 
cgit v1.2.3


From 0834cc28fa56c65887c614b6c045be2ba06fdcb0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jan 2017 16:28:17 +0000
Subject: iommu/arm-smmu: Restrict domain attributes to UNMANAGED domains

The ARM SMMU drivers provide a DOMAIN_ATTR_NESTING domain attribute,
which allows callers of the IOMMU API to request that the page table
for a domain is installed at stage-2, if supported by the hardware.

Since setting this attribute only makes sense for UNMANAGED domains,
this patch returns -ENODEV if the domain_{get,set}_attr operations are
called on other domain types.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 6 ++++++
 drivers/iommu/arm-smmu.c    | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 591bb96047c9..b47a88757c18 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1837,6 +1837,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
+	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
+		return -EINVAL;
+
 	switch (attr) {
 	case DOMAIN_ATTR_NESTING:
 		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
@@ -1852,6 +1855,9 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
 	int ret = 0;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
+	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
+		return -EINVAL;
+
 	mutex_lock(&smmu_domain->init_mutex);
 
 	switch (attr) {
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 6560c4a34b96..099215bbff89 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1603,6 +1603,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
+	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
+		return -EINVAL;
+
 	switch (attr) {
 	case DOMAIN_ATTR_NESTING:
 		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
@@ -1618,6 +1621,9 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
 	int ret = 0;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
+	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
+		return -EINVAL;
+
 	mutex_lock(&smmu_domain->init_mutex);
 
 	switch (attr) {
-- 
cgit v1.2.3


From 61bc671179f19060be883068b6d3d82ae0b24bc0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jan 2017 16:56:03 +0000
Subject: iommu/arm-smmu: Install bypass S2CRs for IOMMU_DOMAIN_IDENTITY
 domains

In preparation for allowing the default domain type to be overridden,
this patch adds support for IOMMU_DOMAIN_IDENTITY domains to the
ARM SMMU driver.

An identity domain is created by placing the corresponding S2CR
registers into "bypass" mode, which allows transactions to flow through
the SMMU without any translation.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 099215bbff89..dbd4998661c6 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -419,6 +419,7 @@ enum arm_smmu_domain_stage {
 	ARM_SMMU_DOMAIN_S1 = 0,
 	ARM_SMMU_DOMAIN_S2,
 	ARM_SMMU_DOMAIN_NESTED,
+	ARM_SMMU_DOMAIN_BYPASS,
 };
 
 struct arm_smmu_domain {
@@ -883,6 +884,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 	if (smmu_domain->smmu)
 		goto out_unlock;
 
+	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
+		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
+		smmu_domain->smmu = smmu;
+		goto out_unlock;
+	}
+
 	/*
 	 * Mapping the requested stage onto what we support is surprisingly
 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
@@ -1052,7 +1059,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
 	void __iomem *cb_base;
 	int irq;
 
-	if (!smmu)
+	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
 		return;
 
 	/*
@@ -1075,7 +1082,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 {
 	struct arm_smmu_domain *smmu_domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
+	if (type != IOMMU_DOMAIN_UNMANAGED &&
+	    type != IOMMU_DOMAIN_DMA &&
+	    type != IOMMU_DOMAIN_IDENTITY)
 		return NULL;
 	/*
 	 * Allocate the domain and initialise some of its data structures.
@@ -1304,10 +1313,15 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
 {
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
-	enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS;
 	u8 cbndx = smmu_domain->cfg.cbndx;
+	enum arm_smmu_s2cr_type type;
 	int i, idx;
 
+	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
+		type = S2CR_TYPE_BYPASS;
+	else
+		type = S2CR_TYPE_TRANS;
+
 	for_each_cfg_sme(fwspec, i, idx) {
 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
 			continue;
-- 
cgit v1.2.3


From 67560edcd8e5c57eccec4df562abbfc21c17ad75 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 1 Mar 2017 21:11:29 +0000
Subject: iommu/arm-smmu-v3: Make arm_smmu_install_ste_for_dev return void

arm_smmu_install_ste_for_dev cannot fail and always returns 0, however
the fact that it returns int means that callers end up implementing
redundant error handling code which complicates STE tracking and is
never executed.

This patch changes the return type of arm_smmu_install_ste_for_dev
to void, to make it explicit that it cannot fail.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index b47a88757c18..97be8de3e834 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1579,7 +1579,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
 	return step;
 }
 
-static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
+static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
 {
 	int i;
 	struct arm_smmu_master_data *master = fwspec->iommu_priv;
@@ -1591,8 +1591,6 @@ static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
 
 		arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
 	}
-
-	return 0;
 }
 
 static void arm_smmu_detach_dev(struct device *dev)
@@ -1600,8 +1598,7 @@ static void arm_smmu_detach_dev(struct device *dev)
 	struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
 
 	master->ste.bypass = true;
-	if (arm_smmu_install_ste_for_dev(dev->iommu_fwspec) < 0)
-		dev_warn(dev, "failed to install bypass STE\n");
+	arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
 }
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -1653,10 +1650,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		ste->s2_cfg = &smmu_domain->s2_cfg;
 	}
 
-	ret = arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
-	if (ret < 0)
-		ste->valid = false;
-
+	arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
 out_unlock:
 	mutex_unlock(&smmu_domain->init_mutex);
 	return ret;
-- 
cgit v1.2.3


From beb3c6a066bff1ba412f983cb9d1a42f4cd8f76a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jan 2017 16:27:30 +0000
Subject: iommu/arm-smmu-v3: Install bypass STEs for IOMMU_DOMAIN_IDENTITY
 domains

In preparation for allowing the default domain type to be overridden,
this patch adds support for IOMMU_DOMAIN_IDENTITY domains to the
ARM SMMUv3 driver.

An identity domain is created by placing the corresponding stream table
entries into "bypass" mode, which allows transactions to flow through
the SMMU without any translation.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 58 +++++++++++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 97be8de3e834..803352d78d43 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -554,9 +554,14 @@ struct arm_smmu_s2_cfg {
 };
 
 struct arm_smmu_strtab_ent {
-	bool				valid;
-
-	bool				bypass;	/* Overrides s1/s2 config */
+	/*
+	 * An STE is "assigned" if the master emitting the corresponding SID
+	 * is attached to a domain. The behaviour of an unassigned STE is
+	 * determined by the disable_bypass parameter, whereas an assigned
+	 * STE behaves according to s1_cfg/s2_cfg, which themselves are
+	 * configured according to the domain type.
+	 */
+	bool				assigned;
 	struct arm_smmu_s1_cfg		*s1_cfg;
 	struct arm_smmu_s2_cfg		*s2_cfg;
 };
@@ -632,6 +637,7 @@ enum arm_smmu_domain_stage {
 	ARM_SMMU_DOMAIN_S1 = 0,
 	ARM_SMMU_DOMAIN_S2,
 	ARM_SMMU_DOMAIN_NESTED,
+	ARM_SMMU_DOMAIN_BYPASS,
 };
 
 struct arm_smmu_domain {
@@ -1005,9 +1011,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 	 * This is hideously complicated, but we only really care about
 	 * three cases at the moment:
 	 *
-	 * 1. Invalid (all zero) -> bypass  (init)
-	 * 2. Bypass -> translation (attach)
-	 * 3. Translation -> bypass (detach)
+	 * 1. Invalid (all zero) -> bypass/fault (init)
+	 * 2. Bypass/fault -> translation/bypass (attach)
+	 * 3. Translation/bypass -> bypass/fault (detach)
 	 *
 	 * Given that we can't update the STE atomically and the SMMU
 	 * doesn't read the thing in a defined order, that leaves us
@@ -1046,11 +1052,15 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 	}
 
 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
-	val = ste->valid ? STRTAB_STE_0_V : 0;
+	val = STRTAB_STE_0_V;
+
+	/* Bypass/fault */
+	if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
+		if (!ste->assigned && disable_bypass)
+			val |= STRTAB_STE_0_CFG_ABORT;
+		else
+			val |= STRTAB_STE_0_CFG_BYPASS;
 
-	if (ste->bypass) {
-		val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
-				      : STRTAB_STE_0_CFG_BYPASS;
 		dst[0] = cpu_to_le64(val);
 		dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
 			 << STRTAB_STE_1_SHCFG_SHIFT);
@@ -1111,10 +1121,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
 {
 	unsigned int i;
-	struct arm_smmu_strtab_ent ste = {
-		.valid	= true,
-		.bypass	= true,
-	};
+	struct arm_smmu_strtab_ent ste = { .assigned = false };
 
 	for (i = 0; i < nent; ++i) {
 		arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
@@ -1378,7 +1385,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 {
 	struct arm_smmu_domain *smmu_domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
+	if (type != IOMMU_DOMAIN_UNMANAGED &&
+	    type != IOMMU_DOMAIN_DMA &&
+	    type != IOMMU_DOMAIN_IDENTITY)
 		return NULL;
 
 	/*
@@ -1509,6 +1518,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 
+	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
+		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
+		return 0;
+	}
+
 	/* Restrict the stage to what we can actually support */
 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
@@ -1597,7 +1611,7 @@ static void arm_smmu_detach_dev(struct device *dev)
 {
 	struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
 
-	master->ste.bypass = true;
+	master->ste.assigned = false;
 	arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
 }
 
@@ -1617,7 +1631,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	ste = &master->ste;
 
 	/* Already attached to a different domain? */
-	if (!ste->bypass)
+	if (ste->assigned)
 		arm_smmu_detach_dev(dev);
 
 	mutex_lock(&smmu_domain->init_mutex);
@@ -1638,10 +1652,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		goto out_unlock;
 	}
 
-	ste->bypass = false;
-	ste->valid = true;
+	ste->assigned = true;
 
-	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
+		ste->s1_cfg = NULL;
+		ste->s2_cfg = NULL;
+	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
 		ste->s1_cfg = &smmu_domain->s1_cfg;
 		ste->s2_cfg = NULL;
 		arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
@@ -1801,7 +1817,7 @@ static void arm_smmu_remove_device(struct device *dev)
 
 	master = fwspec->iommu_priv;
 	smmu = master->smmu;
-	if (master && master->ste.valid)
+	if (master && master->ste.assigned)
 		arm_smmu_detach_dev(dev);
 	iommu_group_remove_device(dev);
 	iommu_device_unlink(&smmu->iommu, dev);
-- 
cgit v1.2.3


From fccb4e3b8ab0957628abec82675691c72f67003e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 5 Jan 2017 18:38:26 +0000
Subject: iommu: Allow default domain type to be set on the kernel command line

The IOMMU core currently initialises the default domain for each group
to IOMMU_DOMAIN_DMA, under the assumption that devices will use
IOMMU-backed DMA ops by default. However, in some cases it is desirable
for the DMA ops to bypass the IOMMU for performance reasons, reserving
use of translation for subsystems such as VFIO that require it for
enforcing device isolation.

Rather than modify each IOMMU driver to provide different semantics for
DMA domains, instead we introduce a command line parameter that can be
used to change the type of the default domain. Passthrough can then be
specified using "iommu.passthrough=1" on the kernel command line.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  6 ++++++
 drivers/iommu/iommu.c                           | 28 ++++++++++++++++++++++---
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2ba45caabada..187ac5bc8b40 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1644,6 +1644,12 @@
 		nobypass	[PPC/POWERNV]
 			Disable IOMMU bypass, using IOMMU for PCI devices.
 
+	iommu.passthrough=
+			[ARM64] Configure DMA to bypass the IOMMU by default.
+			Format: { "0" | "1" }
+			0 - Use IOMMU translation for DMA.
+			1 - Bypass the IOMMU for DMA.
+			unset - Use IOMMU translation for DMA.
 
 	io7=		[HW] IO7 for Marvel based alpha systems
 			See comment before marvel_specify_io7 in
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3b67144dead2..770ba7e7ef4d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -36,6 +36,7 @@
 
 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
+static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
 
 struct iommu_callback_data {
 	const struct iommu_ops *ops;
@@ -112,6 +113,18 @@ static int __iommu_attach_group(struct iommu_domain *domain,
 static void __iommu_detach_group(struct iommu_domain *domain,
 				 struct iommu_group *group);
 
+static int __init iommu_set_def_domain_type(char *str)
+{
+	bool pt;
+
+	if (!str || strtobool(str, &pt))
+		return -EINVAL;
+
+	iommu_def_domain_type = pt ? IOMMU_DOMAIN_IDENTITY : IOMMU_DOMAIN_DMA;
+	return 0;
+}
+early_param("iommu.passthrough", iommu_set_def_domain_type);
+
 static ssize_t iommu_group_attr_show(struct kobject *kobj,
 				     struct attribute *__attr, char *buf)
 {
@@ -1015,10 +1028,19 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 	 * IOMMU driver.
 	 */
 	if (!group->default_domain) {
-		group->default_domain = __iommu_domain_alloc(dev->bus,
-							     IOMMU_DOMAIN_DMA);
+		struct iommu_domain *dom;
+
+		dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type);
+		if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) {
+			dev_warn(dev,
+				 "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA",
+				 iommu_def_domain_type);
+			dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA);
+		}
+
+		group->default_domain = dom;
 		if (!group->domain)
-			group->domain = group->default_domain;
+			group->domain = dom;
 	}
 
 	ret = iommu_group_add_device(group, dev);
-- 
cgit v1.2.3


From 022f4e4f31fea69702f3ec810dc567af6a6d86d8 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 3 Apr 2017 13:12:10 +0100
Subject: iommu/io-pgtable-arm: Avoid shift overflow in block size

The recursive nature of __arm_lpae_{map,unmap}() means that
ARM_LPAE_BLOCK_SIZE() is evaluated for every level, including those
where block mappings aren't possible. This in itself is harmless enough,
as we will only ever be called with valid sizes from the pgsize_bitmap,
and thus always recurse down past any imaginary block sizes. The only
problem is that most of those imaginary sizes overflow the type used for
the calculation, and thus trigger warnings under UBsan:

[   63.020939] ================================================================================
[   63.021284] UBSAN: Undefined behaviour in drivers/iommu/io-pgtable-arm.c:312:22
[   63.021602] shift exponent 39 is too large for 32-bit type 'int'
[   63.021909] CPU: 0 PID: 1119 Comm: lkvm Not tainted 4.7.0-rc3+ #819
[   63.022163] Hardware name: FVP Base (DT)
[   63.022345] Call trace:
[   63.022629] [<ffffff900808f258>] dump_backtrace+0x0/0x3a8
[   63.022975] [<ffffff900808f614>] show_stack+0x14/0x20
[   63.023294] [<ffffff90086bc9dc>] dump_stack+0x104/0x148
[   63.023609] [<ffffff9008713ce8>] ubsan_epilogue+0x18/0x68
[   63.023956] [<ffffff9008714410>] __ubsan_handle_shift_out_of_bounds+0x18c/0x1bc
[   63.024365] [<ffffff900890fcb0>] __arm_lpae_map+0x720/0xae0
[   63.024732] [<ffffff9008910170>] arm_lpae_map+0x100/0x190
[   63.025049] [<ffffff90089183d8>] arm_smmu_map+0x78/0xc8
[   63.025390] [<ffffff9008906c18>] iommu_map+0x130/0x230
[   63.025763] [<ffffff9008bf7564>] vfio_iommu_type1_attach_group+0x4bc/0xa00
[   63.026156] [<ffffff9008bf3c78>] vfio_fops_unl_ioctl+0x320/0x580
[   63.026515] [<ffffff9008377420>] do_vfs_ioctl+0x140/0xd28
[   63.026858] [<ffffff9008378094>] SyS_ioctl+0x8c/0xa0
[   63.027179] [<ffffff9008086e70>] el0_svc_naked+0x24/0x28
[   63.027412] ================================================================================

Perform the shift in a 64-bit type to prevent the theoretical overflow
and keep the peace. As it turns out, this generates identical code for
32-bit ARM, and marginally shorter AArch64 code, so it's good all round.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index f9bc6ebb8140..6e5df5e0a3bd 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -74,7 +74,7 @@
 
 /* Calculate the block/page mapping size at level l for pagetable in d. */
 #define ARM_LPAE_BLOCK_SIZE(l,d)					\
-	(1 << (ilog2(sizeof(arm_lpae_iopte)) +				\
+	(1ULL << (ilog2(sizeof(arm_lpae_iopte)) +			\
 		((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level)))
 
 /* Page table bits */
-- 
cgit v1.2.3


From f9bb9da7c09d76f00c9abdf34cbeaec65e50d78b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 1 Mar 2017 15:52:27 +0100
Subject: pwm: atmel-hlcdc: Implement the suspend/resume hooks

Implement the suspend/resume hooks to make sure the PWM device is
restored to a correct state after a suspend.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-atmel-hlcdc.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index bcb6d946131d..54c6633d9b5d 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -191,6 +191,40 @@ static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_sama5d3_errata = {
 	.div1_clk_erratum = true,
 };
 
+#ifdef CONFIG_PM_SLEEP
+static int atmel_hlcdc_pwm_suspend(struct device *dev)
+{
+	struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev);
+
+	/* Keep the periph clock enabled if the PWM is still running. */
+	if (pwm_is_enabled(&chip->chip.pwms[0]))
+		clk_disable_unprepare(chip->hlcdc->periph_clk);
+
+	return 0;
+}
+
+static int atmel_hlcdc_pwm_resume(struct device *dev)
+{
+	struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev);
+	struct pwm_state state;
+	int ret;
+
+	pwm_get_state(&chip->chip.pwms[0], &state);
+
+	/* Re-enable the periph clock it was stopped during suspend. */
+	if (!state.enabled) {
+		ret = clk_prepare_enable(chip->hlcdc->periph_clk);
+		if (ret)
+			return ret;
+	}
+
+	return atmel_hlcdc_pwm_apply(&chip->chip, &chip->chip.pwms[0], &state);
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(atmel_hlcdc_pwm_pm_ops,
+			 atmel_hlcdc_pwm_suspend, atmel_hlcdc_pwm_resume);
+
 static const struct of_device_id atmel_hlcdc_dt_ids[] = {
 	{
 		.compatible = "atmel,at91sam9n12-hlcdc",
@@ -280,6 +314,7 @@ static struct platform_driver atmel_hlcdc_pwm_driver = {
 	.driver = {
 		.name = "atmel-hlcdc-pwm",
 		.of_match_table = atmel_hlcdc_pwm_dt_ids,
+		.pm = &atmel_hlcdc_pwm_pm_ops,
 	},
 	.probe = atmel_hlcdc_pwm_probe,
 	.remove = atmel_hlcdc_pwm_remove,
-- 
cgit v1.2.3


From 1a722aad587967985f927e4ec2650ca64632c3e1 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Wed, 22 Mar 2017 15:29:34 +0200
Subject: pwm: atmel: Switch to atomic PWM

The currently Atmel PWM controllers supported by this driver could
change period or duty factor without channel disable, for regular
channels (sama5d3 support this by using period or duty factor update
registers, sam9rl support this by writing channel update register and
select the corresponding update: period or duty factor). The chip
doesn't support run time changings of signal polarity. To take advantage
of atomic PWM framework and let controller works without glitches, in
this patch only the duty factor could be changed without disabling PWM
channel. For period and signal polarity the atomic PWM is simulated by
disabling + enabling the right PWM channel.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-atmel.c | 273 +++++++++++++++++++++++-------------------------
 1 file changed, 129 insertions(+), 144 deletions(-)

diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index 67a7023be5c2..f147154d9cb2 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c
@@ -58,17 +58,22 @@
 #define PWM_MAX_PRD		0xFFFF
 #define PRD_MAX_PRES		10
 
+struct atmel_pwm_registers {
+	u8 period;
+	u8 period_upd;
+	u8 duty;
+	u8 duty_upd;
+};
+
 struct atmel_pwm_chip {
 	struct pwm_chip chip;
 	struct clk *clk;
 	void __iomem *base;
+	const struct atmel_pwm_registers *regs;
 
 	unsigned int updated_pwms;
 	/* ISR is cleared when read, ensure only one thread does that */
 	struct mutex isr_lock;
-
-	void (*config)(struct pwm_chip *chip, struct pwm_device *pwm,
-		       unsigned long dty, unsigned long prd);
 };
 
 static inline struct atmel_pwm_chip *to_atmel_pwm_chip(struct pwm_chip *chip)
@@ -105,153 +110,71 @@ static inline void atmel_pwm_ch_writel(struct atmel_pwm_chip *chip,
 	writel_relaxed(val, chip->base + base + offset);
 }
 
-static int atmel_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-			    int duty_ns, int period_ns)
+static int atmel_pwm_calculate_cprd_and_pres(struct pwm_chip *chip,
+					     const struct pwm_state *state,
+					     unsigned long *cprd, u32 *pres)
 {
 	struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
-	unsigned long prd, dty;
-	unsigned long long div;
-	unsigned int pres = 0;
-	u32 val;
-	int ret;
-
-	if (pwm_is_enabled(pwm) && (period_ns != pwm_get_period(pwm))) {
-		dev_err(chip->dev, "cannot change PWM period while enabled\n");
-		return -EBUSY;
-	}
+	unsigned long long cycles = state->period;
 
 	/* Calculate the period cycles and prescale value */
-	div = (unsigned long long)clk_get_rate(atmel_pwm->clk) * period_ns;
-	do_div(div, NSEC_PER_SEC);
+	cycles *= clk_get_rate(atmel_pwm->clk);
+	do_div(cycles, NSEC_PER_SEC);
 
-	while (div > PWM_MAX_PRD) {
-		div >>= 1;
-		pres++;
-	}
+	for (*pres = 0; cycles > PWM_MAX_PRD; cycles >>= 1)
+		(*pres)++;
 
-	if (pres > PRD_MAX_PRES) {
+	if (*pres > PRD_MAX_PRES) {
 		dev_err(chip->dev, "pres exceeds the maximum value\n");
 		return -EINVAL;
 	}
 
-	/* Calculate the duty cycles */
-	prd = div;
-	div *= duty_ns;
-	do_div(div, period_ns);
-	dty = prd - div;
-
-	ret = clk_enable(atmel_pwm->clk);
-	if (ret) {
-		dev_err(chip->dev, "failed to enable PWM clock\n");
-		return ret;
-	}
-
-	/* It is necessary to preserve CPOL, inside CMR */
-	val = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm, PWM_CMR);
-	val = (val & ~PWM_CMR_CPRE_MSK) | (pres & PWM_CMR_CPRE_MSK);
-	atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWM_CMR, val);
-	atmel_pwm->config(chip, pwm, dty, prd);
-	mutex_lock(&atmel_pwm->isr_lock);
-	atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR);
-	atmel_pwm->updated_pwms &= ~(1 << pwm->hwpwm);
-	mutex_unlock(&atmel_pwm->isr_lock);
+	*cprd = cycles;
 
-	clk_disable(atmel_pwm->clk);
-	return ret;
+	return 0;
 }
 
-static void atmel_pwm_config_v1(struct pwm_chip *chip, struct pwm_device *pwm,
-				unsigned long dty, unsigned long prd)
+static void atmel_pwm_calculate_cdty(const struct pwm_state *state,
+				     unsigned long cprd, unsigned long *cdty)
 {
-	struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
-	unsigned int val;
+	unsigned long long cycles = state->duty_cycle;
 
-
-	atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWMV1_CUPD, dty);
-
-	val = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm, PWM_CMR);
-	val &= ~PWM_CMR_UPD_CDTY;
-	atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWM_CMR, val);
-
-	/*
-	 * If the PWM channel is enabled, only update CDTY by using the update
-	 * register, it needs to set bit 10 of CMR to 0
-	 */
-	if (pwm_is_enabled(pwm))
-		return;
-	/*
-	 * If the PWM channel is disabled, write value to duty and period
-	 * registers directly.
-	 */
-	atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWMV1_CDTY, dty);
-	atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWMV1_CPRD, prd);
+	cycles *= cprd;
+	do_div(cycles, state->period);
+	*cdty = cprd - cycles;
 }
 
-static void atmel_pwm_config_v2(struct pwm_chip *chip, struct pwm_device *pwm,
-				unsigned long dty, unsigned long prd)
-{
-	struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
-
-	if (pwm_is_enabled(pwm)) {
-		/*
-		 * If the PWM channel is enabled, using the duty update register
-		 * to update the value.
-		 */
-		atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWMV2_CDTYUPD, dty);
-	} else {
-		/*
-		 * If the PWM channel is disabled, write value to duty and
-		 * period registers directly.
-		 */
-		atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWMV2_CDTY, dty);
-		atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWMV2_CPRD, prd);
-	}
-}
-
-static int atmel_pwm_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
-				  enum pwm_polarity polarity)
+static void atmel_pwm_update_cdty(struct pwm_chip *chip, struct pwm_device *pwm,
+				  unsigned long cdty)
 {
 	struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
 	u32 val;
-	int ret;
-
-	val = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm, PWM_CMR);
 
-	if (polarity == PWM_POLARITY_NORMAL)
-		val &= ~PWM_CMR_CPOL;
-	else
-		val |= PWM_CMR_CPOL;
-
-	ret = clk_enable(atmel_pwm->clk);
-	if (ret) {
-		dev_err(chip->dev, "failed to enable PWM clock\n");
-		return ret;
+	if (atmel_pwm->regs->duty_upd ==
+	    atmel_pwm->regs->period_upd) {
+		val = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm, PWM_CMR);
+		val &= ~PWM_CMR_UPD_CDTY;
+		atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWM_CMR, val);
 	}
 
-	atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWM_CMR, val);
-
-	clk_disable(atmel_pwm->clk);
-
-	return 0;
+	atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm,
+			    atmel_pwm->regs->duty_upd, cdty);
 }
 
-static int atmel_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void atmel_pwm_set_cprd_cdty(struct pwm_chip *chip,
+				    struct pwm_device *pwm,
+				    unsigned long cprd, unsigned long cdty)
 {
 	struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
-	int ret;
-
-	ret = clk_enable(atmel_pwm->clk);
-	if (ret) {
-		dev_err(chip->dev, "failed to enable PWM clock\n");
-		return ret;
-	}
 
-	atmel_pwm_writel(atmel_pwm, PWM_ENA, 1 << pwm->hwpwm);
-
-	return 0;
+	atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm,
+			    atmel_pwm->regs->duty, cdty);
+	atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm,
+			    atmel_pwm->regs->period, cprd);
 }
 
-static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm,
+			      bool disable_clk)
 {
 	struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
 	unsigned long timeout = jiffies + 2 * HZ;
@@ -282,37 +205,99 @@ static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 	       time_before(jiffies, timeout))
 		usleep_range(10, 100);
 
-	clk_disable(atmel_pwm->clk);
+	if (disable_clk)
+		clk_disable(atmel_pwm->clk);
+}
+
+static int atmel_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+			   struct pwm_state *state)
+{
+	struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
+	struct pwm_state cstate;
+	unsigned long cprd, cdty;
+	u32 pres, val;
+	int ret;
+
+	pwm_get_state(pwm, &cstate);
+
+	if (state->enabled) {
+		if (cstate.enabled &&
+		    cstate.polarity == state->polarity &&
+		    cstate.period == state->period) {
+			cprd = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm,
+						  atmel_pwm->regs->period);
+			atmel_pwm_calculate_cdty(state, cprd, &cdty);
+			atmel_pwm_update_cdty(chip, pwm, cdty);
+			return 0;
+		}
+
+		ret = atmel_pwm_calculate_cprd_and_pres(chip, state, &cprd,
+							&pres);
+		if (ret) {
+			dev_err(chip->dev,
+				"failed to calculate cprd and prescaler\n");
+			return ret;
+		}
+
+		atmel_pwm_calculate_cdty(state, cprd, &cdty);
+
+		if (cstate.enabled) {
+			atmel_pwm_disable(chip, pwm, false);
+		} else {
+			ret = clk_enable(atmel_pwm->clk);
+			if (ret) {
+				dev_err(chip->dev, "failed to enable clock\n");
+				return ret;
+			}
+		}
+
+		/* It is necessary to preserve CPOL, inside CMR */
+		val = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm, PWM_CMR);
+		val = (val & ~PWM_CMR_CPRE_MSK) | (pres & PWM_CMR_CPRE_MSK);
+		if (state->polarity == PWM_POLARITY_NORMAL)
+			val &= ~PWM_CMR_CPOL;
+		else
+			val |= PWM_CMR_CPOL;
+		atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWM_CMR, val);
+		atmel_pwm_set_cprd_cdty(chip, pwm, cprd, cdty);
+		mutex_lock(&atmel_pwm->isr_lock);
+		atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR);
+		atmel_pwm->updated_pwms &= ~(1 << pwm->hwpwm);
+		mutex_unlock(&atmel_pwm->isr_lock);
+		atmel_pwm_writel(atmel_pwm, PWM_ENA, 1 << pwm->hwpwm);
+	} else if (cstate.enabled) {
+		atmel_pwm_disable(chip, pwm, true);
+	}
+
+	return 0;
 }
 
 static const struct pwm_ops atmel_pwm_ops = {
-	.config = atmel_pwm_config,
-	.set_polarity = atmel_pwm_set_polarity,
-	.enable = atmel_pwm_enable,
-	.disable = atmel_pwm_disable,
+	.apply = atmel_pwm_apply,
 	.owner = THIS_MODULE,
 };
 
-struct atmel_pwm_data {
-	void (*config)(struct pwm_chip *chip, struct pwm_device *pwm,
-		       unsigned long dty, unsigned long prd);
-};
-
-static const struct atmel_pwm_data atmel_pwm_data_v1 = {
-	.config = atmel_pwm_config_v1,
+static const struct atmel_pwm_registers atmel_pwm_regs_v1 = {
+	.period		= PWMV1_CPRD,
+	.period_upd	= PWMV1_CUPD,
+	.duty		= PWMV1_CDTY,
+	.duty_upd	= PWMV1_CUPD,
 };
 
-static const struct atmel_pwm_data atmel_pwm_data_v2 = {
-	.config = atmel_pwm_config_v2,
+static const struct atmel_pwm_registers atmel_pwm_regs_v2 = {
+	.period		= PWMV2_CPRD,
+	.period_upd	= PWMV2_CPRDUPD,
+	.duty		= PWMV2_CDTY,
+	.duty_upd	= PWMV2_CDTYUPD,
 };
 
 static const struct platform_device_id atmel_pwm_devtypes[] = {
 	{
 		.name = "at91sam9rl-pwm",
-		.driver_data = (kernel_ulong_t)&atmel_pwm_data_v1,
+		.driver_data = (kernel_ulong_t)&atmel_pwm_regs_v1,
 	}, {
 		.name = "sama5d3-pwm",
-		.driver_data = (kernel_ulong_t)&atmel_pwm_data_v2,
+		.driver_data = (kernel_ulong_t)&atmel_pwm_regs_v2,
 	}, {
 		/* sentinel */
 	},
@@ -322,17 +307,17 @@ MODULE_DEVICE_TABLE(platform, atmel_pwm_devtypes);
 static const struct of_device_id atmel_pwm_dt_ids[] = {
 	{
 		.compatible = "atmel,at91sam9rl-pwm",
-		.data = &atmel_pwm_data_v1,
+		.data = &atmel_pwm_regs_v1,
 	}, {
 		.compatible = "atmel,sama5d3-pwm",
-		.data = &atmel_pwm_data_v2,
+		.data = &atmel_pwm_regs_v2,
 	}, {
 		/* sentinel */
 	},
 };
 MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids);
 
-static inline const struct atmel_pwm_data *
+static inline const struct atmel_pwm_registers *
 atmel_pwm_get_driver_data(struct platform_device *pdev)
 {
 	const struct platform_device_id *id;
@@ -342,18 +327,18 @@ atmel_pwm_get_driver_data(struct platform_device *pdev)
 
 	id = platform_get_device_id(pdev);
 
-	return (struct atmel_pwm_data *)id->driver_data;
+	return (struct atmel_pwm_registers *)id->driver_data;
 }
 
 static int atmel_pwm_probe(struct platform_device *pdev)
 {
-	const struct atmel_pwm_data *data;
+	const struct atmel_pwm_registers *regs;
 	struct atmel_pwm_chip *atmel_pwm;
 	struct resource *res;
 	int ret;
 
-	data = atmel_pwm_get_driver_data(pdev);
-	if (!data)
+	regs = atmel_pwm_get_driver_data(pdev);
+	if (!regs)
 		return -ENODEV;
 
 	atmel_pwm = devm_kzalloc(&pdev->dev, sizeof(*atmel_pwm), GFP_KERNEL);
@@ -385,7 +370,7 @@ static int atmel_pwm_probe(struct platform_device *pdev)
 
 	atmel_pwm->chip.base = -1;
 	atmel_pwm->chip.npwm = 4;
-	atmel_pwm->config = data->config;
+	atmel_pwm->regs = regs;
 	atmel_pwm->updated_pwms = 0;
 	mutex_init(&atmel_pwm->isr_lock);
 
-- 
cgit v1.2.3


From 44521afa57bb6f0016c1c1ad70cc8ef352c6d83b Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Wed, 22 Mar 2017 15:29:35 +0200
Subject: pwm: atmel: Enable PWM on sama5d2

sama5d2 can use the same atmel_pwm_data as sama5d3.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/atmel-pwm.txt | 1 +
 drivers/pwm/pwm-atmel.c                             | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/pwm/atmel-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-pwm.txt
index 02331b904d4e..c8c831d7b0d1 100644
--- a/Documentation/devicetree/bindings/pwm/atmel-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/atmel-pwm.txt
@@ -4,6 +4,7 @@ Required properties:
   - compatible: should be one of:
     - "atmel,at91sam9rl-pwm"
     - "atmel,sama5d3-pwm"
+    - "atmel,sama5d2-pwm"
   - reg: physical base address and length of the controller's registers
   - #pwm-cells: Should be 3. See pwm.txt in this directory for a
     description of the cells format.
diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index f147154d9cb2..530d7dc5f1b5 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c
@@ -311,6 +311,9 @@ static const struct of_device_id atmel_pwm_dt_ids[] = {
 	}, {
 		.compatible = "atmel,sama5d3-pwm",
 		.data = &atmel_pwm_regs_v2,
+	}, {
+		.compatible = "atmel,sama5d2-pwm",
+		.data = &atmel_pwm_regs_v2,
 	}, {
 		/* sentinel */
 	},
-- 
cgit v1.2.3


From 2ab15f580a874bf597c98acc16891bb9f95cb45c Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Mon, 23 Jan 2017 19:34:36 +0100
Subject: dt-bindings: pwm: Add MediaTek PWM bindings

Document the device-tree binding of MediaTek PWM. The PWM has 5 channels.
This has been tested on MT7623 only but should work on all the other MTK
SoCs that contain this core.

Signed-off-by: John Crispin <john@phrozen.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 .../devicetree/bindings/pwm/pwm-mediatek.txt       | 34 ++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pwm/pwm-mediatek.txt

diff --git a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt
new file mode 100644
index 000000000000..54c59b0560ad
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt
@@ -0,0 +1,34 @@
+MediaTek PWM controller
+
+Required properties:
+ - compatible: should be "mediatek,<name>-pwm":
+   - "mediatek,mt7623-pwm": found on mt7623 SoC.
+ - reg: physical base address and length of the controller's registers.
+ - #pwm-cells: must be 2. See pwm.txt in this directory for a description of
+   the cell format.
+ - clocks: phandle and clock specifier of the PWM reference clock.
+ - clock-names: must contain the following:
+   - "top": the top clock generator
+   - "main": clock used by the PWM core
+   - "pwm1-5": the five per PWM clocks
+ - pinctrl-names: Must contain a "default" entry.
+ - pinctrl-0: One property must exist for each entry in pinctrl-names.
+   See pinctrl/pinctrl-bindings.txt for details of the property values.
+
+Example:
+	pwm0: pwm@11006000 {
+		compatible = "mediatek,mt7623-pwm";
+		reg = <0 0x11006000 0 0x1000>;
+		#pwm-cells = <2>;
+		clocks = <&topckgen CLK_TOP_PWM_SEL>,
+			 <&pericfg CLK_PERI_PWM>,
+			 <&pericfg CLK_PERI_PWM1>,
+			 <&pericfg CLK_PERI_PWM2>,
+			 <&pericfg CLK_PERI_PWM3>,
+			 <&pericfg CLK_PERI_PWM4>,
+			 <&pericfg CLK_PERI_PWM5>;
+		clock-names = "top", "main", "pwm1", "pwm2",
+			      "pwm3", "pwm4", "pwm5";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pwm0_pins>;
+	};
-- 
cgit v1.2.3


From caf065f8fd583b43a3f95d84c8a0a0d07597963b Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Mon, 23 Jan 2017 19:34:37 +0100
Subject: pwm: Add MediaTek PWM support

This patch adds support for the PWM core found on current ARM base SoCs
made by MediaTek. This IP core supports 5 channels and has 2 operational
modes. There is the old mode, which is a classical PWM and the new mode
which allows the user to define bitmasks that get clocked out on the
pins. As the subsystem currently only supports PWM cores with the "old"
mode, we can safely ignore the "new" mode for now.

Signed-off-by: John Crispin <john@phrozen.org>
[thierry.reding@gmail.com: minor cleanups]
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/Kconfig        |   9 ++
 drivers/pwm/Makefile       |   1 +
 drivers/pwm/pwm-mediatek.c | 220 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 230 insertions(+)
 create mode 100644 drivers/pwm/pwm-mediatek.c

diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 42e37c20b361..313c10789ca2 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -293,6 +293,15 @@ config PWM_MTK_DISP
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-mtk-disp.
 
+config PWM_MEDIATEK
+	tristate "MediaTek PWM support"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	help
+	  Generic PWM framework driver for Mediatek ARM SoC.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-mxs.
+
 config PWM_MXS
 	tristate "Freescale MXS PWM support"
 	depends on ARCH_MXS && OF
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index 346a83b00f28..93da1f79a3b8 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_PWM_LPSS)		+= pwm-lpss.o
 obj-$(CONFIG_PWM_LPSS_PCI)	+= pwm-lpss-pci.o
 obj-$(CONFIG_PWM_LPSS_PLATFORM)	+= pwm-lpss-platform.o
 obj-$(CONFIG_PWM_MESON)		+= pwm-meson.o
+obj-$(CONFIG_PWM_MEDIATEK)	+= pwm-mediatek.o
 obj-$(CONFIG_PWM_MTK_DISP)	+= pwm-mtk-disp.o
 obj-$(CONFIG_PWM_MXS)		+= pwm-mxs.o
 obj-$(CONFIG_PWM_OMAP_DMTIMER)	+= pwm-omap-dmtimer.o
diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c
new file mode 100644
index 000000000000..7263952d1aac
--- /dev/null
+++ b/drivers/pwm/pwm-mediatek.c
@@ -0,0 +1,220 @@
+/*
+ * Mediatek Pulse Width Modulator driver
+ *
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+/* PWM registers and bits definitions */
+#define PWMCON			0x00
+#define PWMHDUR			0x04
+#define PWMLDUR			0x08
+#define PWMGDUR			0x0c
+#define PWMWAVENUM		0x28
+#define PWMDWIDTH		0x2c
+#define PWMTHRES		0x30
+
+enum {
+	MTK_CLK_MAIN = 0,
+	MTK_CLK_TOP,
+	MTK_CLK_PWM1,
+	MTK_CLK_PWM2,
+	MTK_CLK_PWM3,
+	MTK_CLK_PWM4,
+	MTK_CLK_PWM5,
+	MTK_CLK_MAX,
+};
+
+static const char * const mtk_pwm_clk_name[] = {
+	"main", "top", "pwm1", "pwm2", "pwm3", "pwm4", "pwm5"
+};
+
+/**
+ * struct mtk_pwm_chip - struct representing PWM chip
+ * @chip: linux PWM chip representation
+ * @regs: base address of PWM chip
+ * @clks: list of clocks
+ */
+struct mtk_pwm_chip {
+	struct pwm_chip chip;
+	void __iomem *regs;
+	struct clk *clks[MTK_CLK_MAX];
+};
+
+static inline struct mtk_pwm_chip *to_mtk_pwm_chip(struct pwm_chip *chip)
+{
+	return container_of(chip, struct mtk_pwm_chip, chip);
+}
+
+static inline u32 mtk_pwm_readl(struct mtk_pwm_chip *chip, unsigned int num,
+				unsigned int offset)
+{
+	return readl(chip->regs + 0x10 + (num * 0x40) + offset);
+}
+
+static inline void mtk_pwm_writel(struct mtk_pwm_chip *chip,
+				  unsigned int num, unsigned int offset,
+				  u32 value)
+{
+	writel(value, chip->regs + 0x10 + (num * 0x40) + offset);
+}
+
+static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
+			  int duty_ns, int period_ns)
+{
+	struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
+	struct clk *clk = pc->clks[MTK_CLK_PWM1 + pwm->hwpwm];
+	u32 resolution, clkdiv = 0;
+
+	resolution = NSEC_PER_SEC / clk_get_rate(clk);
+
+	while (period_ns / resolution > 8191) {
+		resolution *= 2;
+		clkdiv++;
+	}
+
+	if (clkdiv > 7)
+		return -EINVAL;
+
+	mtk_pwm_writel(pc, pwm->hwpwm, PWMCON, BIT(15) | BIT(3) | clkdiv);
+	mtk_pwm_writel(pc, pwm->hwpwm, PWMDWIDTH, period_ns / resolution);
+	mtk_pwm_writel(pc, pwm->hwpwm, PWMTHRES, duty_ns / resolution);
+
+	return 0;
+}
+
+static int mtk_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
+	u32 value;
+	int ret;
+
+	ret = clk_prepare(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]);
+	if (ret < 0)
+		return ret;
+
+	value = readl(pc->regs);
+	value |= BIT(pwm->hwpwm);
+	writel(value, pc->regs);
+
+	return 0;
+}
+
+static void mtk_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
+	u32 value;
+
+	value = readl(pc->regs);
+	value &= ~BIT(pwm->hwpwm);
+	writel(value, pc->regs);
+
+	clk_unprepare(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]);
+}
+
+static const struct pwm_ops mtk_pwm_ops = {
+	.config = mtk_pwm_config,
+	.enable = mtk_pwm_enable,
+	.disable = mtk_pwm_disable,
+	.owner = THIS_MODULE,
+};
+
+static int mtk_pwm_probe(struct platform_device *pdev)
+{
+	struct mtk_pwm_chip *pc;
+	struct resource *res;
+	unsigned int i;
+	int ret;
+
+	pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL);
+	if (!pc)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pc->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pc->regs))
+		return PTR_ERR(pc->regs);
+
+	for (i = 0; i < MTK_CLK_MAX; i++) {
+		pc->clks[i] = devm_clk_get(&pdev->dev, mtk_pwm_clk_name[i]);
+		if (IS_ERR(pc->clks[i]))
+			return PTR_ERR(pc->clks[i]);
+	}
+
+	ret = clk_prepare(pc->clks[MTK_CLK_TOP]);
+	if (ret < 0)
+		return ret;
+
+	ret = clk_prepare(pc->clks[MTK_CLK_MAIN]);
+	if (ret < 0)
+		goto disable_clk_top;
+
+	platform_set_drvdata(pdev, pc);
+
+	pc->chip.dev = &pdev->dev;
+	pc->chip.ops = &mtk_pwm_ops;
+	pc->chip.base = -1;
+	pc->chip.npwm = 5;
+
+	ret = pwmchip_add(&pc->chip);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
+		goto disable_clk_main;
+	}
+
+	return 0;
+
+disable_clk_main:
+	clk_unprepare(pc->clks[MTK_CLK_MAIN]);
+disable_clk_top:
+	clk_unprepare(pc->clks[MTK_CLK_TOP]);
+
+	return ret;
+}
+
+static int mtk_pwm_remove(struct platform_device *pdev)
+{
+	struct mtk_pwm_chip *pc = platform_get_drvdata(pdev);
+	unsigned int i;
+
+	for (i = 0; i < pc->chip.npwm; i++)
+		pwm_disable(&pc->chip.pwms[i]);
+
+	return pwmchip_remove(&pc->chip);
+}
+
+static const struct of_device_id mtk_pwm_of_match[] = {
+	{ .compatible = "mediatek,mt7623-pwm" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, mtk_pwm_of_match);
+
+static struct platform_driver mtk_pwm_driver = {
+	.driver = {
+		.name = "mtk-pwm",
+		.owner = THIS_MODULE,
+		.of_match_table = mtk_pwm_of_match,
+	},
+	.probe = mtk_pwm_probe,
+	.remove = mtk_pwm_remove,
+};
+module_platform_driver(mtk_pwm_driver);
+
+MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
+MODULE_ALIAS("platform:mtk-pwm");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From ee7cdbecb128d8b023c5439004cdea8baa2d0fe4 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Mon, 3 Apr 2017 17:48:28 +0200
Subject: dt-bindings: thermal: add support for Broadcom's Northstar thermal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit documents binding for thermal used in Northstar family SoCs.

There isn't any known Northstar device with active cooling system so DT
example has empty cooling-maps node. There is also no support for CPU
frequency throttling so I put only a critical trip in the example.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 .../devicetree/bindings/thermal/brcm,ns-thermal    | 37 ++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/thermal/brcm,ns-thermal

diff --git a/Documentation/devicetree/bindings/thermal/brcm,ns-thermal b/Documentation/devicetree/bindings/thermal/brcm,ns-thermal
new file mode 100644
index 000000000000..68e047170039
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/brcm,ns-thermal
@@ -0,0 +1,37 @@
+* Broadcom Northstar Thermal
+
+This binding describes thermal sensor that is part of Northstar's DMU (Device
+Management Unit).
+
+Required properties:
+- compatible : Must be "brcm,ns-thermal"
+- reg : iomem address range of PVTMON registers
+- #thermal-sensor-cells : Should be <0>
+
+Example:
+
+thermal: thermal@1800c2c0 {
+	compatible = "brcm,ns-thermal";
+	reg = <0x1800c2c0 0x10>;
+	#thermal-sensor-cells = <0>;
+};
+
+thermal-zones {
+	cpu_thermal: cpu-thermal {
+		polling-delay-passive = <0>;
+		polling-delay = <1000>;
+		coefficients = <(-556) 418000>;
+		thermal-sensors = <&thermal>;
+
+		trips {
+			cpu-crit {
+				temperature	= <125000>;
+				hysteresis	= <0>;
+				type		= "critical";
+			};
+		};
+
+		cooling-maps {
+		};
+	};
+};
-- 
cgit v1.2.3


From a94cb7eeecc4104a6874339f90c5d0647359c102 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Mon, 3 Apr 2017 17:48:29 +0200
Subject: thermal: broadcom: add Northstar thermal driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Northstar is a SoC family commonly used in home routers. This commit
adds a driver for checking CPU temperature. As Northstar Plus seems to
also have this IP block this new symbol gets ARCH_BCM_IPROC dependency.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/Kconfig               |   5 ++
 drivers/thermal/Makefile              |   1 +
 drivers/thermal/broadcom/Kconfig      |   8 +++
 drivers/thermal/broadcom/Makefile     |   1 +
 drivers/thermal/broadcom/ns-thermal.c | 105 ++++++++++++++++++++++++++++++++++
 5 files changed, 120 insertions(+)
 create mode 100644 drivers/thermal/broadcom/Kconfig
 create mode 100644 drivers/thermal/broadcom/Makefile
 create mode 100644 drivers/thermal/broadcom/ns-thermal.c

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 3bd24063375e..ac7301703d03 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -392,6 +392,11 @@ config MTK_THERMAL
 	  Enable this option if you want to have support for thermal management
 	  controller present in Mediatek SoCs
 
+menu "Broadcom thermal drivers"
+depends on ARCH_BCM || COMPILE_TEST
+source "drivers/thermal/broadcom/Kconfig"
+endmenu
+
 menu "Texas Instruments thermal drivers"
 depends on ARCH_HAS_BANDGAP || COMPILE_TEST
 depends on HAS_IOMEM
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index f23cde05dac6..6b7706b9f27c 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -27,6 +27,7 @@ thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
 thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
 
 # platform thermal drivers
+obj-y				+= broadcom/
 obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)	+= qcom-spmi-temp-alarm.o
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
 obj-$(CONFIG_ROCKCHIP_THERMAL)	+= rockchip_thermal.o
diff --git a/drivers/thermal/broadcom/Kconfig b/drivers/thermal/broadcom/Kconfig
new file mode 100644
index 000000000000..f0dea8a8e002
--- /dev/null
+++ b/drivers/thermal/broadcom/Kconfig
@@ -0,0 +1,8 @@
+config BCM_NS_THERMAL
+	tristate "Northstar thermal driver"
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	help
+	  Northstar is a family of SoCs that includes e.g. BCM4708, BCM47081,
+	  BCM4709 and BCM47094. It contains DMU (Device Management Unit) block
+	  with a thermal sensor that allows checking CPU temperature. This
+	  driver provides support for it.
diff --git a/drivers/thermal/broadcom/Makefile b/drivers/thermal/broadcom/Makefile
new file mode 100644
index 000000000000..059df9a0ed69
--- /dev/null
+++ b/drivers/thermal/broadcom/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_BCM_NS_THERMAL)		+= ns-thermal.o
diff --git a/drivers/thermal/broadcom/ns-thermal.c b/drivers/thermal/broadcom/ns-thermal.c
new file mode 100644
index 000000000000..80ad32c6b2df
--- /dev/null
+++ b/drivers/thermal/broadcom/ns-thermal.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2017 Rafał Miłecki <rafal@milecki.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#define PVTMON_CONTROL0					0x00
+#define PVTMON_CONTROL0_SEL_MASK			0x0000000e
+#define PVTMON_CONTROL0_SEL_TEMP_MONITOR		0x00000000
+#define PVTMON_CONTROL0_SEL_TEST_MODE			0x0000000e
+#define PVTMON_STATUS					0x08
+
+struct ns_thermal {
+	struct thermal_zone_device *tz;
+	void __iomem *pvtmon;
+};
+
+static int ns_thermal_get_temp(void *data, int *temp)
+{
+	struct ns_thermal *ns_thermal = data;
+	int offset = thermal_zone_get_offset(ns_thermal->tz);
+	int slope = thermal_zone_get_slope(ns_thermal->tz);
+	u32 val;
+
+	val = readl(ns_thermal->pvtmon + PVTMON_CONTROL0);
+	if ((val & PVTMON_CONTROL0_SEL_MASK) != PVTMON_CONTROL0_SEL_TEMP_MONITOR) {
+		/* Clear current mode selection */
+		val &= ~PVTMON_CONTROL0_SEL_MASK;
+
+		/* Set temp monitor mode (it's the default actually) */
+		val |= PVTMON_CONTROL0_SEL_TEMP_MONITOR;
+
+		writel(val, ns_thermal->pvtmon + PVTMON_CONTROL0);
+	}
+
+	val = readl(ns_thermal->pvtmon + PVTMON_STATUS);
+	*temp = slope * val + offset;
+
+	return 0;
+}
+
+static const struct thermal_zone_of_device_ops ns_thermal_ops = {
+	.get_temp = ns_thermal_get_temp,
+};
+
+static int ns_thermal_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ns_thermal *ns_thermal;
+
+	ns_thermal = devm_kzalloc(dev, sizeof(*ns_thermal), GFP_KERNEL);
+	if (!ns_thermal)
+		return -ENOMEM;
+
+	ns_thermal->pvtmon = of_iomap(dev_of_node(dev), 0);
+	if (WARN_ON(!ns_thermal->pvtmon))
+		return -ENOENT;
+
+	ns_thermal->tz = devm_thermal_zone_of_sensor_register(dev, 0,
+							      ns_thermal,
+							      &ns_thermal_ops);
+	if (IS_ERR(ns_thermal->tz)) {
+		iounmap(ns_thermal->pvtmon);
+		return PTR_ERR(ns_thermal->tz);
+	}
+
+	platform_set_drvdata(pdev, ns_thermal);
+
+	return 0;
+}
+
+static int ns_thermal_remove(struct platform_device *pdev)
+{
+	struct ns_thermal *ns_thermal = platform_get_drvdata(pdev);
+
+	iounmap(ns_thermal->pvtmon);
+
+	return 0;
+}
+
+static const struct of_device_id ns_thermal_of_match[] = {
+	{ .compatible = "brcm,ns-thermal", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ns_thermal_of_match);
+
+static struct platform_driver ns_thermal_driver = {
+	.probe		= ns_thermal_probe,
+	.remove		= ns_thermal_remove,
+	.driver = {
+		.name = "ns-thermal",
+		.of_match_table = ns_thermal_of_match,
+	},
+};
+module_platform_driver(ns_thermal_driver);
+
+MODULE_DESCRIPTION("Northstar thermal driver");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 28c1b08d2a156ef16e912a4c618a529e586e5dbc Mon Sep 17 00:00:00 2001
From: Steve Twiss <stwiss.opensource@diasemi.com>
Date: Tue, 28 Mar 2017 15:43:31 +0100
Subject: Documentation: devicetree: thermal: da9062/61 TJUNC temperature
 binding

Device tree binding information for DA9062 and DA9061 thermal junction
temperature monitor.

Binding descriptions for the DA9061 and DA9062 thermal TJUNC supervisor
device driver, using a single THERMAL_TRIP_HOT trip-wire and allowing for
a configurable polling period for over-temperature polling.

This patch also adds two examples, one for DA9062 and one for DA9061. The
DA9061 example uses a fall-back compatible string for the DA9062.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Steve Twiss <stwiss.opensource@diasemi.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 .../devicetree/bindings/thermal/da9062-thermal.txt | 36 ++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/thermal/da9062-thermal.txt

diff --git a/Documentation/devicetree/bindings/thermal/da9062-thermal.txt b/Documentation/devicetree/bindings/thermal/da9062-thermal.txt
new file mode 100644
index 000000000000..e241bb5a5584
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/da9062-thermal.txt
@@ -0,0 +1,36 @@
+* Dialog DA9062/61 TJUNC Thermal Module
+
+This module is part of the DA9061/DA9062. For more details about entire
+DA9062 and DA9061 chips see Documentation/devicetree/bindings/mfd/da9062.txt
+
+Junction temperature thermal module uses an interrupt signal to identify
+high THERMAL_TRIP_HOT temperatures for the PMIC device.
+
+Required properties:
+
+- compatible: should be one of the following valid compatible string lines:
+        "dlg,da9061-thermal", "dlg,da9062-thermal"
+        "dlg,da9062-thermal"
+
+Optional properties:
+
+- polling-delay-passive : Specify the polling period, measured in
+    milliseconds, between thermal zone device update checks.
+
+Example: DA9062
+
+	pmic0: da9062@58 {
+		thermal {
+			compatible = "dlg,da9062-thermal";
+			polling-delay-passive = <3000>;
+		};
+	};
+
+Example: DA9061 using a fall-back compatible for the DA9062 onkey driver
+
+	pmic0: da9061@58 {
+		thermal {
+			compatible = "dlg,da9061-thermal", "dlg,da9062-thermal";
+			polling-delay-passive = <3000>;
+		};
+	};
-- 
cgit v1.2.3


From 608567aac3206ae886c79688fbb8a62c473b55ef Mon Sep 17 00:00:00 2001
From: Steve Twiss <stwiss.opensource@diasemi.com>
Date: Tue, 28 Mar 2017 15:43:33 +0100
Subject: thermal: da9062/61: Thermal junction temperature monitoring driver

Add junction temperature monitoring supervisor device driver, compatible
with the DA9062 and DA9061 PMICs. A MODULE_DEVICE_TABLE() macro is added.

If the PMIC's internal junction temperature rises above T_WARN (125 degC)
an interrupt is issued. This T_WARN level is defined as the
THERMAL_TRIP_HOT trip-wire inside the device driver.

The thermal triggering mechanism is interrupt based and happens when the
temperature rises above a given threshold level. The component cannot
return an exact temperature, it only has knowledge if the temperature is
above or below a given threshold value. A status bit must be polled to
detect when the temperature falls below that threshold level again. A
kernel work queue is configured to repeatedly poll and detect when the
temperature falls below this trip-wire, between 1 and 10 second intervals
(defaulting at 3 seconds).

This scheme is provided as an example. It would be expected that any
final implementation will also include a notify() function and any of these
settings could be altered to match the application where appropriate.

When over-temperature is reached, the interrupt from the DA9061/2 will be
repeatedly triggered. The IRQ is therefore disabled when the first
over-temperature event happens and the status bit is polled using a
work-queue until it becomes false.

This strategy is designed to allow the periodic transmission of uevents
(HOT trip point) as the first level of temperature supervision method. It
is intended for non-invasive temperature control, where the necessary
measures for cooling the system down are left to the host software. Once
the temperature falls again, the IRQ is re-enabled so a new critical
over-temperature event can be detected.

Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Steve Twiss <stwiss.opensource@diasemi.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/Kconfig          |  10 ++
 drivers/thermal/Makefile         |   1 +
 drivers/thermal/da9062-thermal.c | 315 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 326 insertions(+)
 create mode 100644 drivers/thermal/da9062-thermal.c

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index ac7301703d03..6699843918fa 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -303,6 +303,16 @@ config DB8500_CPUFREQ_COOLING
 	  bound cpufreq cooling device turns active to set CPU frequency low to
 	  cool down the CPU.
 
+config DA9062_THERMAL
+	tristate "DA9062/DA9061 Dialog Semiconductor thermal driver"
+	depends on MFD_DA9062 || COMPILE_TEST
+	depends on OF
+	help
+	  Enable this for the Dialog Semiconductor thermal sensor driver.
+	  This will report PMIC junction over-temperature for one thermal trip
+	  zone.
+	  Compatible with the DA9062 and DA9061 PMICs.
+
 config INTEL_POWERCLAMP
 	tristate "Intel PowerClamp idle injection driver"
 	depends on THERMAL
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 6b7706b9f27c..a1e9b8b4e897 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_IMX_THERMAL)	+= imx_thermal.o
 obj-$(CONFIG_MAX77620_THERMAL)	+= max77620_thermal.o
 obj-$(CONFIG_QORIQ_THERMAL)	+= qoriq_thermal.o
 obj-$(CONFIG_DB8500_CPUFREQ_COOLING)	+= db8500_cpufreq_cooling.o
+obj-$(CONFIG_DA9062_THERMAL)	+= da9062-thermal.o
 obj-$(CONFIG_INTEL_POWERCLAMP)	+= intel_powerclamp.o
 obj-$(CONFIG_X86_PKG_TEMP_THERMAL)	+= x86_pkg_temp_thermal.o
 obj-$(CONFIG_INTEL_SOC_DTS_IOSF_CORE)	+= intel_soc_dts_iosf.o
diff --git a/drivers/thermal/da9062-thermal.c b/drivers/thermal/da9062-thermal.c
new file mode 100644
index 000000000000..dd8dd947b7f0
--- /dev/null
+++ b/drivers/thermal/da9062-thermal.c
@@ -0,0 +1,315 @@
+/*
+ * Thermal device driver for DA9062 and DA9061
+ * Copyright (C) 2017  Dialog Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* When over-temperature is reached, an interrupt from the device will be
+ * triggered. Following this event the interrupt will be disabled and
+ * periodic transmission of uevents (HOT trip point) should define the
+ * first level of temperature supervision. It is expected that any final
+ * implementation of the thermal driver will include a .notify() function
+ * to implement these uevents to userspace.
+ *
+ * These uevents are intended to indicate non-invasive temperature control
+ * of the system, where the necessary measures for cooling are the
+ * responsibility of the host software. Once the temperature falls again,
+ * the IRQ is re-enabled so the start of a new over-temperature event can
+ * be detected without constant software monitoring.
+ */
+
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/thermal.h>
+#include <linux/workqueue.h>
+
+#include <linux/mfd/da9062/core.h>
+#include <linux/mfd/da9062/registers.h>
+
+/* Minimum, maximum and default polling millisecond periods are provided
+ * here as an example. It is expected that any final implementation to also
+ * include a modification of these settings to match the required
+ * application.
+ */
+#define DA9062_DEFAULT_POLLING_MS_PERIOD	3000
+#define DA9062_MAX_POLLING_MS_PERIOD		10000
+#define DA9062_MIN_POLLING_MS_PERIOD		1000
+
+#define DA9062_MILLI_CELSIUS(t)			((t) * 1000)
+
+struct da9062_thermal_config {
+	const char *name;
+};
+
+struct da9062_thermal {
+	struct da9062 *hw;
+	struct delayed_work work;
+	struct thermal_zone_device *zone;
+	enum thermal_device_mode mode;
+	struct mutex lock; /* protection for da9062_thermal temperature */
+	int temperature;
+	int irq;
+	const struct da9062_thermal_config *config;
+	struct device *dev;
+};
+
+static void da9062_thermal_poll_on(struct work_struct *work)
+{
+	struct da9062_thermal *thermal = container_of(work,
+						struct da9062_thermal,
+						work.work);
+	unsigned long delay;
+	unsigned int val;
+	int ret;
+
+	/* clear E_TEMP */
+	ret = regmap_write(thermal->hw->regmap,
+			   DA9062AA_EVENT_B,
+			   DA9062AA_E_TEMP_MASK);
+	if (ret < 0) {
+		dev_err(thermal->dev,
+			"Cannot clear the TJUNC temperature status\n");
+		goto err_enable_irq;
+	}
+
+	/* Now read E_TEMP again: it is acting like a status bit.
+	 * If over-temperature, then this status will be true.
+	 * If not over-temperature, this status will be false.
+	 */
+	ret = regmap_read(thermal->hw->regmap,
+			  DA9062AA_EVENT_B,
+			  &val);
+	if (ret < 0) {
+		dev_err(thermal->dev,
+			"Cannot check the TJUNC temperature status\n");
+		goto err_enable_irq;
+	}
+
+	if (val & DA9062AA_E_TEMP_MASK) {
+		mutex_lock(&thermal->lock);
+		thermal->temperature = DA9062_MILLI_CELSIUS(125);
+		mutex_unlock(&thermal->lock);
+		thermal_zone_device_update(thermal->zone,
+					   THERMAL_EVENT_UNSPECIFIED);
+
+		delay = msecs_to_jiffies(thermal->zone->passive_delay);
+		schedule_delayed_work(&thermal->work, delay);
+		return;
+	}
+
+	mutex_lock(&thermal->lock);
+	thermal->temperature = DA9062_MILLI_CELSIUS(0);
+	mutex_unlock(&thermal->lock);
+	thermal_zone_device_update(thermal->zone,
+				   THERMAL_EVENT_UNSPECIFIED);
+
+err_enable_irq:
+	enable_irq(thermal->irq);
+}
+
+static irqreturn_t da9062_thermal_irq_handler(int irq, void *data)
+{
+	struct da9062_thermal *thermal = data;
+
+	disable_irq_nosync(thermal->irq);
+	schedule_delayed_work(&thermal->work, 0);
+
+	return IRQ_HANDLED;
+}
+
+static int da9062_thermal_get_mode(struct thermal_zone_device *z,
+				   enum thermal_device_mode *mode)
+{
+	struct da9062_thermal *thermal = z->devdata;
+	*mode = thermal->mode;
+	return 0;
+}
+
+static int da9062_thermal_get_trip_type(struct thermal_zone_device *z,
+					int trip,
+					enum thermal_trip_type *type)
+{
+	struct da9062_thermal *thermal = z->devdata;
+
+	switch (trip) {
+	case 0:
+		*type = THERMAL_TRIP_HOT;
+		break;
+	default:
+		dev_err(thermal->dev,
+			"Driver does not support more than 1 trip-wire\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int da9062_thermal_get_trip_temp(struct thermal_zone_device *z,
+					int trip,
+					int *temp)
+{
+	struct da9062_thermal *thermal = z->devdata;
+
+	switch (trip) {
+	case 0:
+		*temp = DA9062_MILLI_CELSIUS(125);
+		break;
+	default:
+		dev_err(thermal->dev,
+			"Driver does not support more than 1 trip-wire\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int da9062_thermal_get_temp(struct thermal_zone_device *z,
+				   int *temp)
+{
+	struct da9062_thermal *thermal = z->devdata;
+
+	mutex_lock(&thermal->lock);
+	*temp = thermal->temperature;
+	mutex_unlock(&thermal->lock);
+
+	return 0;
+}
+
+static struct thermal_zone_device_ops da9062_thermal_ops = {
+	.get_temp	= da9062_thermal_get_temp,
+	.get_mode	= da9062_thermal_get_mode,
+	.get_trip_type	= da9062_thermal_get_trip_type,
+	.get_trip_temp	= da9062_thermal_get_trip_temp,
+};
+
+static const struct da9062_thermal_config da9062_config = {
+	.name = "da9062-thermal",
+};
+
+static const struct of_device_id da9062_compatible_reg_id_table[] = {
+	{ .compatible = "dlg,da9062-thermal", .data = &da9062_config },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(of, da9062_compatible_reg_id_table);
+
+static int da9062_thermal_probe(struct platform_device *pdev)
+{
+	struct da9062 *chip = dev_get_drvdata(pdev->dev.parent);
+	struct da9062_thermal *thermal;
+	unsigned int pp_tmp = DA9062_DEFAULT_POLLING_MS_PERIOD;
+	const struct of_device_id *match;
+	int ret = 0;
+
+	match = of_match_node(da9062_compatible_reg_id_table,
+			      pdev->dev.of_node);
+	if (!match)
+		return -ENXIO;
+
+	if (pdev->dev.of_node) {
+		if (!of_property_read_u32(pdev->dev.of_node,
+					  "polling-delay-passive",
+					  &pp_tmp)) {
+			if (pp_tmp < DA9062_MIN_POLLING_MS_PERIOD ||
+			    pp_tmp > DA9062_MAX_POLLING_MS_PERIOD) {
+				dev_warn(&pdev->dev,
+					 "Out-of-range polling period %d ms\n",
+					 pp_tmp);
+				pp_tmp = DA9062_DEFAULT_POLLING_MS_PERIOD;
+			}
+		}
+	}
+
+	thermal = devm_kzalloc(&pdev->dev, sizeof(struct da9062_thermal),
+			       GFP_KERNEL);
+	if (!thermal) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	thermal->config = match->data;
+	thermal->hw = chip;
+	thermal->mode = THERMAL_DEVICE_ENABLED;
+	thermal->dev = &pdev->dev;
+
+	INIT_DELAYED_WORK(&thermal->work, da9062_thermal_poll_on);
+	mutex_init(&thermal->lock);
+
+	thermal->zone = thermal_zone_device_register(thermal->config->name,
+					1, 0, thermal,
+					&da9062_thermal_ops, NULL, pp_tmp,
+					0);
+	if (IS_ERR(thermal->zone)) {
+		dev_err(&pdev->dev, "Cannot register thermal zone device\n");
+		ret = PTR_ERR(thermal->zone);
+		goto err;
+	}
+
+	dev_dbg(&pdev->dev,
+		"TJUNC temperature polling period set at %d ms\n",
+		thermal->zone->passive_delay);
+
+	ret = platform_get_irq_byname(pdev, "THERMAL");
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to get platform IRQ.\n");
+		goto err_zone;
+	}
+	thermal->irq = ret;
+
+	ret = request_threaded_irq(thermal->irq, NULL,
+				   da9062_thermal_irq_handler,
+				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				   "THERMAL", thermal);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Failed to request thermal device IRQ.\n");
+		goto err_zone;
+	}
+
+	platform_set_drvdata(pdev, thermal);
+	return 0;
+
+err_zone:
+	thermal_zone_device_unregister(thermal->zone);
+err:
+	return ret;
+}
+
+static int da9062_thermal_remove(struct platform_device *pdev)
+{
+	struct	da9062_thermal *thermal = platform_get_drvdata(pdev);
+
+	free_irq(thermal->irq, thermal);
+	cancel_delayed_work_sync(&thermal->work);
+	thermal_zone_device_unregister(thermal->zone);
+	return 0;
+}
+
+static struct platform_driver da9062_thermal_driver = {
+	.probe	= da9062_thermal_probe,
+	.remove	= da9062_thermal_remove,
+	.driver	= {
+		.name	= "da9062-thermal",
+		.of_match_table = da9062_compatible_reg_id_table,
+	},
+};
+
+module_platform_driver(da9062_thermal_driver);
+
+MODULE_AUTHOR("Steve Twiss");
+MODULE_DESCRIPTION("Thermal TJUNC device driver for Dialog DA9062 and DA9061");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:da9062-thermal");
-- 
cgit v1.2.3


From 5016bdb796b3726eec043ca0ce3be981f712c756 Mon Sep 17 00:00:00 2001
From: Nate Watterson <nwatters@codeaurora.org>
Date: Fri, 7 Apr 2017 01:36:20 -0400
Subject: iommu/iova: Fix underflow bug in __alloc_and_insert_iova_range

Normally, calling alloc_iova() using an iova_domain with insufficient
pfns remaining between start_pfn and dma_limit will fail and return a
NULL pointer. Unexpectedly, if such a "full" iova_domain contains an
iova with pfn_lo == 0, the alloc_iova() call will instead succeed and
return an iova containing invalid pfns.

This is caused by an underflow bug in __alloc_and_insert_iova_range()
that occurs after walking the "full" iova tree when the search ends
at the iova with pfn_lo == 0 and limit_pfn is then adjusted to be just
below that (-1). This (now huge) limit_pfn gives the impression that a
vast amount of space is available between it and start_pfn and thus
a new iova is allocated with the invalid pfn_hi value, 0xFFF.... .

To rememdy this, a check is introduced to ensure that adjustments to
limit_pfn will not underflow.

This issue has been observed in the wild, and is easily reproduced with
the following sample code.

	struct iova_domain *iovad = kzalloc(sizeof(*iovad), GFP_KERNEL);
	struct iova *rsvd_iova, *good_iova, *bad_iova;
	unsigned long limit_pfn = 3;
	unsigned long start_pfn = 1;
	unsigned long va_size = 2;

	init_iova_domain(iovad, SZ_4K, start_pfn, limit_pfn);
	rsvd_iova = reserve_iova(iovad, 0, 0);
	good_iova = alloc_iova(iovad, va_size, limit_pfn, true);
	bad_iova = alloc_iova(iovad, va_size, limit_pfn, true);

Prior to the patch, this yielded:
	*rsvd_iova == {0, 0}   /* Expected */
	*good_iova == {2, 3}   /* Expected */
	*bad_iova  == {-2, -1} /* Oh no... */

After the patch, bad_iova is NULL as expected since inadequate
space remains between limit_pfn and start_pfn after allocating
good_iova.

Signed-off-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index b7268a14184f..f6533e0198f6 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -138,7 +138,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 				break;	/* found a free slot */
 		}
 adjust_limit_pfn:
-		limit_pfn = curr_iova->pfn_lo - 1;
+		limit_pfn = curr_iova->pfn_lo ? (curr_iova->pfn_lo - 1) : 0;
 move_left:
 		prev = curr;
 		curr = rb_prev(curr);
-- 
cgit v1.2.3


From fd33f3eca6bfb0bba9b928ba28a6c6c16f7243ad Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Fri, 31 Mar 2017 12:14:17 +0200
Subject: MAINTAINERS: Add maintainers for the meson clock driver

Suggested-by: Michael Turquette <mturquette@baylibre.com>
Cc: Kevin Hilman <khilman@baylibre.com>,
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
---
 MAINTAINERS | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c265a5fe4848..8cb237925b06 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1069,6 +1069,16 @@ F: 	drivers/pinctrl/meson/
 F:	drivers/mmc/host/meson*
 N:	meson
 
+ARM/Amlogic Meson SoC CLOCK FRAMEWORK
+M:	Neil Armstrong <narmstrong@baylibre.com>
+M:	Jerome Brunet <jbrunet@baylibre.com>
+L:	linux-amlogic@lists.infradead.org
+S:	Maintained
+F:	drivers/clk/meson/
+F:	include/dt-bindings/clock/meson*
+F:	include/dt-bindings/clock/gxbb*
+F:	Documentation/devicetree/bindings/clock/amlogic*
+
 ARM/Annapurna Labs ALPINE ARCHITECTURE
 M:	Tsahee Zidenberg <tsahee@annapurnalabs.com>
 M:	Antoine Tenart <antoine.tenart@free-electrons.com>
-- 
cgit v1.2.3


From a70c6e06ed7c8c84612c7e3dada9d6895b65c70a Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 28 Mar 2017 10:47:22 +0200
Subject: clk: meson: gxbb: protect against holes in the onecell_data array

The clock controller is getting more complex and it might be possible, in
the future, to have holes in the clk_hw_onecell_data array. Just make sure
we skip those holes if it ever happens.

Acked-by: Michael Turquette <mturquette@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 drivers/clk/meson/gxbb.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 75197664a7ee..28812ea41a60 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -1388,6 +1388,10 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 	 * register all clks
 	 */
 	for (clkid = 0; clkid < clkc_data->hw_onecell_data->num; clkid++) {
+		/* array might be sparse */
+		if (!clkc_data->hw_onecell_data->hws[clkid])
+			continue;
+
 		ret = devm_clk_hw_register(dev,
 					clkc_data->hw_onecell_data->hws[clkid]);
 		if (ret)
-- 
cgit v1.2.3


From 59e85335dda9826284a1f0f160ea8f10b59e0568 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 14 Feb 2017 00:13:55 +0100
Subject: clk: meson: add audio clock divider support

The audio divider needs a specific clock divider driver.
With am mpll parent clock, which is able to provide a fairly precise rate,
the generic divider tends to select low value of the divider. In such case
the quality of the clock is very poor. For the same final rate, maximizing
the audio clock divider value and selecting the corresponding mpll rate
gives better results. This is what this driver aims to acheive. So far, so
good.

Cc: Hendrik v. Raven <hendrik@consetetur.de>
Acked-by: Michael Turquette <mturquette@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 drivers/clk/meson/Makefile            |   2 +-
 drivers/clk/meson/clk-audio-divider.c | 144 ++++++++++++++++++++++++++++++++++
 drivers/clk/meson/clkc.h              |  10 +++
 3 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 drivers/clk/meson/clk-audio-divider.c

diff --git a/drivers/clk/meson/Makefile b/drivers/clk/meson/Makefile
index 349583405b7c..83b6d9d65aa1 100644
--- a/drivers/clk/meson/Makefile
+++ b/drivers/clk/meson/Makefile
@@ -2,6 +2,6 @@
 # Makefile for Meson specific clk
 #
 
-obj-$(CONFIG_COMMON_CLK_AMLOGIC) += clk-pll.o clk-cpu.o clk-mpll.o
+obj-$(CONFIG_COMMON_CLK_AMLOGIC) += clk-pll.o clk-cpu.o clk-mpll.o clk-audio-divider.o
 obj-$(CONFIG_COMMON_CLK_MESON8B) += meson8b.o
 obj-$(CONFIG_COMMON_CLK_GXBB)	 += gxbb.o gxbb-aoclk.o
diff --git a/drivers/clk/meson/clk-audio-divider.c b/drivers/clk/meson/clk-audio-divider.c
new file mode 100644
index 000000000000..6c07db06642d
--- /dev/null
+++ b/drivers/clk/meson/clk-audio-divider.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2017 AmLogic, Inc.
+ * Author: Jerome Brunet <jbrunet@baylibre.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * i2s master clock divider: The algorithm of the generic clk-divider used with
+ * a very precise clock parent such as the mpll tends to select a low divider
+ * factor. This gives poor results with this particular divider, especially with
+ * high frequencies (> 100 MHz)
+ *
+ * This driver try to select the maximum possible divider with the rate the
+ * upstream clock can provide.
+ */
+
+#include <linux/clk-provider.h>
+#include "clkc.h"
+
+#define to_meson_clk_audio_divider(_hw) container_of(_hw, \
+				struct meson_clk_audio_divider, hw)
+
+static int _div_round(unsigned long parent_rate, unsigned long rate,
+		      unsigned long flags)
+{
+	if (flags & CLK_DIVIDER_ROUND_CLOSEST)
+		return DIV_ROUND_CLOSEST_ULL((u64)parent_rate, rate);
+
+	return DIV_ROUND_UP_ULL((u64)parent_rate, rate);
+}
+
+static int _get_val(unsigned long parent_rate, unsigned long rate)
+{
+	return DIV_ROUND_UP_ULL((u64)parent_rate, rate) - 1;
+}
+
+static int _valid_divider(struct clk_hw *hw, int divider)
+{
+	struct meson_clk_audio_divider *adiv =
+		to_meson_clk_audio_divider(hw);
+	int max_divider;
+	u8 width;
+
+	width = adiv->div.width;
+	max_divider = 1 << width;
+
+	return clamp(divider, 1, max_divider);
+}
+
+static unsigned long audio_divider_recalc_rate(struct clk_hw *hw,
+					       unsigned long parent_rate)
+{
+	struct meson_clk_audio_divider *adiv =
+		to_meson_clk_audio_divider(hw);
+	struct parm *p;
+	unsigned long reg, divider;
+
+	p = &adiv->div;
+	reg = readl(adiv->base + p->reg_off);
+	divider = PARM_GET(p->width, p->shift, reg) + 1;
+
+	return DIV_ROUND_UP_ULL((u64)parent_rate, divider);
+}
+
+static long audio_divider_round_rate(struct clk_hw *hw,
+				     unsigned long rate,
+				     unsigned long *parent_rate)
+{
+	struct meson_clk_audio_divider *adiv =
+		to_meson_clk_audio_divider(hw);
+	unsigned long max_prate;
+	int divider;
+
+	if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) {
+		divider = _div_round(*parent_rate, rate, adiv->flags);
+		divider = _valid_divider(hw, divider);
+		return DIV_ROUND_UP_ULL((u64)*parent_rate, divider);
+	}
+
+	/* Get the maximum parent rate */
+	max_prate = clk_hw_round_rate(clk_hw_get_parent(hw), ULONG_MAX);
+
+	/* Get the corresponding rounded down divider */
+	divider = max_prate / rate;
+	divider = _valid_divider(hw, divider);
+
+	/* Get actual rate of the parent */
+	*parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw),
+					 divider * rate);
+
+	return DIV_ROUND_UP_ULL((u64)*parent_rate, divider);
+}
+
+static int audio_divider_set_rate(struct clk_hw *hw,
+				  unsigned long rate,
+				  unsigned long parent_rate)
+{
+	struct meson_clk_audio_divider *adiv =
+		to_meson_clk_audio_divider(hw);
+	struct parm *p;
+	unsigned long reg, flags = 0;
+	int val;
+
+	val = _get_val(parent_rate, rate);
+
+	if (adiv->lock)
+		spin_lock_irqsave(adiv->lock, flags);
+	else
+		__acquire(adiv->lock);
+
+	p = &adiv->div;
+	reg = readl(adiv->base + p->reg_off);
+	reg = PARM_SET(p->width, p->shift, reg, val);
+	writel(reg, adiv->base + p->reg_off);
+
+	if (adiv->lock)
+		spin_unlock_irqrestore(adiv->lock, flags);
+	else
+		__release(adiv->lock);
+
+	return 0;
+}
+
+const struct clk_ops meson_clk_audio_divider_ro_ops = {
+	.recalc_rate	= audio_divider_recalc_rate,
+	.round_rate	= audio_divider_round_rate,
+};
+
+const struct clk_ops meson_clk_audio_divider_ops = {
+	.recalc_rate	= audio_divider_recalc_rate,
+	.round_rate	= audio_divider_round_rate,
+	.set_rate	= audio_divider_set_rate,
+};
diff --git a/drivers/clk/meson/clkc.h b/drivers/clk/meson/clkc.h
index b0c9999d03de..d6feafe8bd6c 100644
--- a/drivers/clk/meson/clkc.h
+++ b/drivers/clk/meson/clkc.h
@@ -121,6 +121,14 @@ struct meson_clk_mpll {
 	spinlock_t *lock;
 };
 
+struct meson_clk_audio_divider {
+	struct clk_hw hw;
+	void __iomem *base;
+	struct parm div;
+	u8 flags;
+	spinlock_t *lock;
+};
+
 #define MESON_GATE(_name, _reg, _bit)					\
 struct clk_gate _name = { 						\
 	.reg = (void __iomem *) _reg, 					\
@@ -141,5 +149,7 @@ extern const struct clk_ops meson_clk_pll_ops;
 extern const struct clk_ops meson_clk_cpu_ops;
 extern const struct clk_ops meson_clk_mpll_ro_ops;
 extern const struct clk_ops meson_clk_mpll_ops;
+extern const struct clk_ops meson_clk_audio_divider_ro_ops;
+extern const struct clk_ops meson_clk_audio_divider_ops;
 
 #endif /* __CLKC_H */
-- 
cgit v1.2.3


From 4087bd4b21702dce65c3e5179308dead2e96e06d Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 24 Jan 2017 18:35:23 +0100
Subject: clk: meson: gxbb: add cts_amclk

Add the i2s master clock also referred as cts_amclk

Acked-by: Michael Turquette <mturquette@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 drivers/clk/meson/gxbb.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/clk/meson/gxbb.h |  5 +++-
 2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 28812ea41a60..1c20edbfc812 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -845,6 +845,51 @@ static struct clk_mux gxbb_mali = {
 	},
 };
 
+static struct clk_mux gxbb_cts_amclk_sel = {
+	.reg = (void *) HHI_AUD_CLK_CNTL,
+	.mask = 0x3,
+	.shift = 9,
+	/* Default parent unknown (register reset value: 0) */
+	.table = (u32[]){ 1, 2, 3 },
+	.lock = &clk_lock,
+		.hw.init = &(struct clk_init_data){
+		.name = "cts_amclk_sel",
+		.ops = &clk_mux_ops,
+		.parent_names = (const char *[]){ "mpll0", "mpll1", "mpll2" },
+		.num_parents = 3,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct meson_clk_audio_divider gxbb_cts_amclk_div = {
+	.div = {
+		.reg_off = HHI_AUD_CLK_CNTL,
+		.shift   = 0,
+		.width   = 8,
+	},
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "cts_amclk_div",
+		.ops = &meson_clk_audio_divider_ops,
+		.parent_names = (const char *[]){ "cts_amclk_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT | CLK_DIVIDER_ROUND_CLOSEST,
+	},
+};
+
+static struct clk_gate gxbb_cts_amclk = {
+	.reg = (void *) HHI_AUD_CLK_CNTL,
+	.bit_idx = 8,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "cts_amclk",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "cts_amclk_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
 /* Everything Else (EE) domain gates */
 static MESON_GATE(gxbb_ddr, HHI_GCLK_MPEG0, 0);
 static MESON_GATE(gxbb_dos, HHI_GCLK_MPEG0, 1);
@@ -1045,6 +1090,9 @@ static struct clk_hw_onecell_data gxbb_hw_onecell_data = {
 		[CLKID_MALI_1_DIV]	    = &gxbb_mali_1_div.hw,
 		[CLKID_MALI_1]		    = &gxbb_mali_1.hw,
 		[CLKID_MALI]		    = &gxbb_mali.hw,
+		[CLKID_CTS_AMCLK]	    = &gxbb_cts_amclk.hw,
+		[CLKID_CTS_AMCLK_SEL]	    = &gxbb_cts_amclk_sel.hw,
+		[CLKID_CTS_AMCLK_DIV]	    = &gxbb_cts_amclk_div.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -1158,6 +1206,9 @@ static struct clk_hw_onecell_data gxl_hw_onecell_data = {
 		[CLKID_MALI_1_DIV]	    = &gxbb_mali_1_div.hw,
 		[CLKID_MALI_1]		    = &gxbb_mali_1.hw,
 		[CLKID_MALI]		    = &gxbb_mali.hw,
+		[CLKID_CTS_AMCLK]	    = &gxbb_cts_amclk.hw,
+		[CLKID_CTS_AMCLK_SEL]	    = &gxbb_cts_amclk_sel.hw,
+		[CLKID_CTS_AMCLK_DIV]	    = &gxbb_cts_amclk_div.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -1270,6 +1321,7 @@ static struct clk_gate *const gxbb_clk_gates[] = {
 	&gxbb_sar_adc_clk,
 	&gxbb_mali_0,
 	&gxbb_mali_1,
+	&gxbb_cts_amclk,
 };
 
 static struct clk_mux *const gxbb_clk_muxes[] = {
@@ -1278,6 +1330,7 @@ static struct clk_mux *const gxbb_clk_muxes[] = {
 	&gxbb_mali_0_sel,
 	&gxbb_mali_1_sel,
 	&gxbb_mali,
+	&gxbb_cts_amclk_sel,
 };
 
 static struct clk_divider *const gxbb_clk_dividers[] = {
@@ -1287,6 +1340,10 @@ static struct clk_divider *const gxbb_clk_dividers[] = {
 	&gxbb_mali_1_div,
 };
 
+static struct meson_clk_audio_divider *const gxbb_audio_dividers[] = {
+	&gxbb_cts_amclk_div,
+};
+
 struct clkc_data {
 	struct clk_gate *const *clk_gates;
 	unsigned int clk_gates_count;
@@ -1298,6 +1355,8 @@ struct clkc_data {
 	unsigned int clk_muxes_count;
 	struct clk_divider *const *clk_dividers;
 	unsigned int clk_dividers_count;
+	struct meson_clk_audio_divider *const *clk_audio_dividers;
+	unsigned int clk_audio_dividers_count;
 	struct meson_clk_cpu *cpu_clk;
 	struct clk_hw_onecell_data *hw_onecell_data;
 };
@@ -1313,6 +1372,8 @@ static const struct clkc_data gxbb_clkc_data = {
 	.clk_muxes_count = ARRAY_SIZE(gxbb_clk_muxes),
 	.clk_dividers = gxbb_clk_dividers,
 	.clk_dividers_count = ARRAY_SIZE(gxbb_clk_dividers),
+	.clk_audio_dividers = gxbb_audio_dividers,
+	.clk_audio_dividers_count = ARRAY_SIZE(gxbb_audio_dividers),
 	.cpu_clk = &gxbb_cpu_clk,
 	.hw_onecell_data = &gxbb_hw_onecell_data,
 };
@@ -1328,6 +1389,8 @@ static const struct clkc_data gxl_clkc_data = {
 	.clk_muxes_count = ARRAY_SIZE(gxbb_clk_muxes),
 	.clk_dividers = gxbb_clk_dividers,
 	.clk_dividers_count = ARRAY_SIZE(gxbb_clk_dividers),
+	.clk_audio_dividers = gxbb_audio_dividers,
+	.clk_audio_dividers_count = ARRAY_SIZE(gxbb_audio_dividers),
 	.cpu_clk = &gxbb_cpu_clk,
 	.hw_onecell_data = &gxl_hw_onecell_data,
 };
@@ -1384,6 +1447,10 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 		clkc_data->clk_dividers[i]->reg = clk_base +
 			(u64)clkc_data->clk_dividers[i]->reg;
 
+	/* Populate base address for the audio dividers */
+	for (i = 0; i < clkc_data->clk_audio_dividers_count; i++)
+		clkc_data->clk_audio_dividers[i]->base = clk_base;
+
 	/*
 	 * register all clks
 	 */
diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index 9d949244b104..b3cd11fd3aab 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -277,8 +277,11 @@
 #define CLKID_MALI_1_DIV	 104
 /* CLKID_MALI_1	*/
 /* CLKID_MALI	*/
+#define CLKID_CTS_AMCLK		  107
+#define CLKID_CTS_AMCLK_SEL	  108
+#define CLKID_CTS_AMCLK_DIV	  109
 
-#define NR_CLKS			  107
+#define NR_CLKS			  110
 
 /* include the CLKIDs that have been made part of the stable DT binding */
 #include <dt-bindings/clock/gxbb-clkc.h>
-- 
cgit v1.2.3


From 3c277c247eabeb0c869c06580c11efe12094a32f Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 20 Feb 2017 18:02:34 +0100
Subject: clk: meson: gxbb: add cts_mclk_i958

Add the spdif master clock also referred as cts_mclk_i958

Acked-by: Michael Turquette <mturquette@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 drivers/clk/meson/gxbb.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/clk/meson/gxbb.h |  5 ++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 1c20edbfc812..72492cc63915 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -890,6 +890,49 @@ static struct clk_gate gxbb_cts_amclk = {
 	},
 };
 
+static struct clk_mux gxbb_cts_mclk_i958_sel = {
+	.reg = (void *)HHI_AUD_CLK_CNTL2,
+	.mask = 0x3,
+	.shift = 25,
+	/* Default parent unknown (register reset value: 0) */
+	.table = (u32[]){ 1, 2, 3 },
+	.lock = &clk_lock,
+		.hw.init = &(struct clk_init_data){
+		.name = "cts_mclk_i958_sel",
+		.ops = &clk_mux_ops,
+		.parent_names = (const char *[]){ "mpll0", "mpll1", "mpll2" },
+		.num_parents = 3,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_divider gxbb_cts_mclk_i958_div = {
+	.reg = (void *)HHI_AUD_CLK_CNTL2,
+	.shift = 16,
+	.width = 8,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "cts_mclk_i958_div",
+		.ops = &clk_divider_ops,
+		.parent_names = (const char *[]){ "cts_mclk_i958_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT | CLK_DIVIDER_ROUND_CLOSEST,
+	},
+};
+
+static struct clk_gate gxbb_cts_mclk_i958 = {
+	.reg = (void *)HHI_AUD_CLK_CNTL2,
+	.bit_idx = 24,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "cts_mclk_i958",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "cts_mclk_i958_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
 /* Everything Else (EE) domain gates */
 static MESON_GATE(gxbb_ddr, HHI_GCLK_MPEG0, 0);
 static MESON_GATE(gxbb_dos, HHI_GCLK_MPEG0, 1);
@@ -1093,6 +1136,9 @@ static struct clk_hw_onecell_data gxbb_hw_onecell_data = {
 		[CLKID_CTS_AMCLK]	    = &gxbb_cts_amclk.hw,
 		[CLKID_CTS_AMCLK_SEL]	    = &gxbb_cts_amclk_sel.hw,
 		[CLKID_CTS_AMCLK_DIV]	    = &gxbb_cts_amclk_div.hw,
+		[CLKID_CTS_MCLK_I958]	    = &gxbb_cts_mclk_i958.hw,
+		[CLKID_CTS_MCLK_I958_SEL]   = &gxbb_cts_mclk_i958_sel.hw,
+		[CLKID_CTS_MCLK_I958_DIV]   = &gxbb_cts_mclk_i958_div.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -1209,6 +1255,9 @@ static struct clk_hw_onecell_data gxl_hw_onecell_data = {
 		[CLKID_CTS_AMCLK]	    = &gxbb_cts_amclk.hw,
 		[CLKID_CTS_AMCLK_SEL]	    = &gxbb_cts_amclk_sel.hw,
 		[CLKID_CTS_AMCLK_DIV]	    = &gxbb_cts_amclk_div.hw,
+		[CLKID_CTS_MCLK_I958]	    = &gxbb_cts_mclk_i958.hw,
+		[CLKID_CTS_MCLK_I958_SEL]   = &gxbb_cts_mclk_i958_sel.hw,
+		[CLKID_CTS_MCLK_I958_DIV]   = &gxbb_cts_mclk_i958_div.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -1322,6 +1371,7 @@ static struct clk_gate *const gxbb_clk_gates[] = {
 	&gxbb_mali_0,
 	&gxbb_mali_1,
 	&gxbb_cts_amclk,
+	&gxbb_cts_mclk_i958,
 };
 
 static struct clk_mux *const gxbb_clk_muxes[] = {
@@ -1331,6 +1381,7 @@ static struct clk_mux *const gxbb_clk_muxes[] = {
 	&gxbb_mali_1_sel,
 	&gxbb_mali,
 	&gxbb_cts_amclk_sel,
+	&gxbb_cts_mclk_i958_sel,
 };
 
 static struct clk_divider *const gxbb_clk_dividers[] = {
@@ -1338,6 +1389,7 @@ static struct clk_divider *const gxbb_clk_dividers[] = {
 	&gxbb_sar_adc_clk_div,
 	&gxbb_mali_0_div,
 	&gxbb_mali_1_div,
+	&gxbb_cts_mclk_i958_div,
 };
 
 static struct meson_clk_audio_divider *const gxbb_audio_dividers[] = {
diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index b3cd11fd3aab..ac2718925b68 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -280,8 +280,11 @@
 #define CLKID_CTS_AMCLK		  107
 #define CLKID_CTS_AMCLK_SEL	  108
 #define CLKID_CTS_AMCLK_DIV	  109
+#define CLKID_CTS_MCLK_I958	  110
+#define CLKID_CTS_MCLK_I958_SEL	  111
+#define CLKID_CTS_MCLK_I958_DIV	  112
 
-#define NR_CLKS			  110
+#define NR_CLKS			  113
 
 /* include the CLKIDs that have been made part of the stable DT binding */
 #include <dt-bindings/clock/gxbb-clkc.h>
-- 
cgit v1.2.3


From 7eaa44f6207fb64b1fa4304f83f22486590540e2 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Fri, 3 Mar 2017 12:40:15 +0100
Subject: clk: meson: gxbb: add cts_i958 clock

This adds the cts_i958 clock to control the clock source of the spdif
output block. This mux is not explicitly mentionned in the documentation
but it is critical to the spdif dai. It is used to select whether the clock
source of the spdif output is cts_amclk (when data are taken from i2s
buffer) or the cts_mclk_i958 (when data are taken from the spdif buffer)

Acked-by: Michael Turquette <mturquette@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 drivers/clk/meson/gxbb.c | 21 +++++++++++++++++++++
 drivers/clk/meson/gxbb.h |  3 ++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 72492cc63915..ad5f027af1a2 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -933,6 +933,24 @@ static struct clk_gate gxbb_cts_mclk_i958 = {
 	},
 };
 
+static struct clk_mux gxbb_cts_i958 = {
+	.reg = (void *)HHI_AUD_CLK_CNTL2,
+	.mask = 0x1,
+	.shift = 27,
+	.lock = &clk_lock,
+		.hw.init = &(struct clk_init_data){
+		.name = "cts_i958",
+		.ops = &clk_mux_ops,
+		.parent_names = (const char *[]){ "cts_amclk", "cts_mclk_i958" },
+		.num_parents = 2,
+		/*
+		 *The parent is specific to origin of the audio data. Let the
+		 * consumer choose the appropriate parent
+		 */
+		.flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+	},
+};
+
 /* Everything Else (EE) domain gates */
 static MESON_GATE(gxbb_ddr, HHI_GCLK_MPEG0, 0);
 static MESON_GATE(gxbb_dos, HHI_GCLK_MPEG0, 1);
@@ -1139,6 +1157,7 @@ static struct clk_hw_onecell_data gxbb_hw_onecell_data = {
 		[CLKID_CTS_MCLK_I958]	    = &gxbb_cts_mclk_i958.hw,
 		[CLKID_CTS_MCLK_I958_SEL]   = &gxbb_cts_mclk_i958_sel.hw,
 		[CLKID_CTS_MCLK_I958_DIV]   = &gxbb_cts_mclk_i958_div.hw,
+		[CLKID_CTS_I958]	    = &gxbb_cts_i958.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -1258,6 +1277,7 @@ static struct clk_hw_onecell_data gxl_hw_onecell_data = {
 		[CLKID_CTS_MCLK_I958]	    = &gxbb_cts_mclk_i958.hw,
 		[CLKID_CTS_MCLK_I958_SEL]   = &gxbb_cts_mclk_i958_sel.hw,
 		[CLKID_CTS_MCLK_I958_DIV]   = &gxbb_cts_mclk_i958_div.hw,
+		[CLKID_CTS_I958]	    = &gxbb_cts_i958.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -1382,6 +1402,7 @@ static struct clk_mux *const gxbb_clk_muxes[] = {
 	&gxbb_mali,
 	&gxbb_cts_amclk_sel,
 	&gxbb_cts_mclk_i958_sel,
+	&gxbb_cts_i958,
 };
 
 static struct clk_divider *const gxbb_clk_dividers[] = {
diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index ac2718925b68..1d8d13f5e813 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -283,8 +283,9 @@
 #define CLKID_CTS_MCLK_I958	  110
 #define CLKID_CTS_MCLK_I958_SEL	  111
 #define CLKID_CTS_MCLK_I958_DIV	  112
+#define CLKID_CTS_I958		  113
 
-#define NR_CLKS			  113
+#define NR_CLKS			  114
 
 /* include the CLKIDs that have been made part of the stable DT binding */
 #include <dt-bindings/clock/gxbb-clkc.h>
-- 
cgit v1.2.3


From 9a1c779e6b06855e41099caa6f15b3b584dfa88c Mon Sep 17 00:00:00 2001
From: Liam Beguin <lbeguin@tycoint.com>
Date: Fri, 7 Apr 2017 17:03:24 +0200
Subject: video: ARM CLCD: fix dma allocation size

This patch forces the frambuffer size to be aligned on kernel pages.

During the board startup, the splash screed did appear;
the "ts_test" program or our application were not able to start.

The following error message was reported:
error: failed to map framebuffer device to memory.
LinuxFB: driver cannot connect

The issue was discovered, on the LPC32xx platform, during the migration
of the LCD definition from the board file to the device tree.

Signed-off-by: Liam Beguin <lbeguin@tycoint.com>
Signed-off-by: Sylvain Lemieux <slemieux@tycoint.com>
Cc: Vladimir Zapolskiy <vz@mleia.com>
Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/amba-clcd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c
index 0fab92c62828..ffc2c33c6cef 100644
--- a/drivers/video/fbdev/amba-clcd.c
+++ b/drivers/video/fbdev/amba-clcd.c
@@ -881,8 +881,8 @@ static int clcdfb_of_dma_setup(struct clcd_fb *fb)
 	if (err)
 		return err;
 
-	framesize = fb->panel->mode.xres * fb->panel->mode.yres *
-			fb->panel->bpp / 8;
+	framesize = PAGE_ALIGN(fb->panel->mode.xres * fb->panel->mode.yres *
+			fb->panel->bpp / 8);
 	fb->fb.screen_base = dma_alloc_coherent(&fb->dev->dev, framesize,
 			&dma, GFP_KERNEL);
 	if (!fb->fb.screen_base)
-- 
cgit v1.2.3


From 5a93db427ab170c9793d76abf3e4be1ebd09375f Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 7 Apr 2017 17:03:24 +0200
Subject: xen, fbfront: add support for specifying size via xenstore

Today xen-fbfront supports specifying the display size via module
parameters only. Add support for specifying the size via Xenstore in
order to enable doing this easily via the domain's Xen configuration.

Add an error message in case the configured display size conflicts
with video memory size.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/xen-fbfront.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index d0115a7af0a9..1f892d7235db 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -18,6 +18,8 @@
  * frame buffer.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/console.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -380,10 +382,18 @@ static int xenfb_probe(struct xenbus_device *dev,
 			video[KPARAM_MEM] = val;
 	}
 
+	video[KPARAM_WIDTH] = xenbus_read_unsigned(dev->otherend, "width",
+						   video[KPARAM_WIDTH]);
+	video[KPARAM_HEIGHT] = xenbus_read_unsigned(dev->otherend, "height",
+						    video[KPARAM_HEIGHT]);
+
 	/* If requested res does not fit in available memory, use default */
 	fb_size = video[KPARAM_MEM] * 1024 * 1024;
 	if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH / 8
 	    > fb_size) {
+		pr_warn("display parameters %d,%d,%d invalid, use defaults\n",
+			video[KPARAM_MEM], video[KPARAM_WIDTH],
+			video[KPARAM_HEIGHT]);
 		video[KPARAM_WIDTH] = XENFB_WIDTH;
 		video[KPARAM_HEIGHT] = XENFB_HEIGHT;
 		fb_size = XENFB_DEFAULT_FB_LEN;
-- 
cgit v1.2.3


From 0cb51f6532639254eca4aba979b503f6eb467bb4 Mon Sep 17 00:00:00 2001
From: Pushkar Jambhlekar <pushkar.iit@gmail.com>
Date: Fri, 7 Apr 2017 17:03:24 +0200
Subject: drivers/video/fbdev: Fixing coding guidelines in acornfb.c

Fixing coding guidelines error reported by 'checkpatch.pl'

Signed-off-by: Pushkar Jambhlekar <pushkar.iit@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/acornfb.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/video/fbdev/acornfb.c b/drivers/video/fbdev/acornfb.c
index fb75b7e5a19a..0c325b4da61d 100644
--- a/drivers/video/fbdev/acornfb.c
+++ b/drivers/video/fbdev/acornfb.c
@@ -101,7 +101,7 @@ extern unsigned int vram_size;	/* set by setup.c */
 #ifdef HAS_VIDC20
 #include <mach/acornfb.h>
 
-#define MAX_SIZE	2*1024*1024
+#define MAX_SIZE	(2*1024*1024)
 
 /* VIDC20 has a different set of rules from the VIDC:
  *  hcr  : must be multiple of 4
@@ -162,7 +162,7 @@ static void acornfb_set_timing(struct fb_info *info)
 	if (memcmp(&current_vidc, &vidc, sizeof(vidc))) {
 		current_vidc = vidc;
 
-		vidc_writel(VIDC20_CTRL| vidc.control);
+		vidc_writel(VIDC20_CTRL | vidc.control);
 		vidc_writel(0xd0000000 | vidc.pll_ctl);
 		vidc_writel(0x80000000 | vidc.h_cycle);
 		vidc_writel(0x81000000 | vidc.h_sync_width);
@@ -297,7 +297,7 @@ acornfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 		pal.p = 0;
 		vidc_writel(0x10000000);
 		for (i = 0; i < 256; i += 1) {
-			pal.vidc20.red   = current_par.palette[ i       & 31].vidc20.red;
+			pal.vidc20.red   = current_par.palette[i       & 31].vidc20.red;
 			pal.vidc20.green = current_par.palette[(i >> 1) & 31].vidc20.green;
 			pal.vidc20.blue  = current_par.palette[(i >> 2) & 31].vidc20.blue;
 			vidc_writel(pal.p);
@@ -1043,8 +1043,7 @@ static int acornfb_probe(struct platform_device *dev)
 		base = dma_alloc_wc(current_par.dev, size, &handle,
 				    GFP_KERNEL);
 		if (base == NULL) {
-			printk(KERN_ERR "acornfb: unable to allocate screen "
-			       "memory\n");
+			printk(KERN_ERR "acornfb: unable to allocate screen memory\n");
 			return -ENOMEM;
 		}
 
@@ -1103,8 +1102,7 @@ static int acornfb_probe(struct platform_device *dev)
 	v_sync = h_sync / (fb_info.var.yres + fb_info.var.upper_margin +
 		 fb_info.var.lower_margin + fb_info.var.vsync_len);
 
-	printk(KERN_INFO "Acornfb: %dkB %cRAM, %s, using %dx%d, "
-		"%d.%03dkHz, %dHz\n",
+	printk(KERN_INFO "Acornfb: %dkB %cRAM, %s, using %dx%d, %d.%03dkHz, %dHz\n",
 		fb_info.fix.smem_len / 1024,
 		current_par.using_vram ? 'V' : 'D',
 		VIDC_NAME, fb_info.var.xres, fb_info.var.yres,
-- 
cgit v1.2.3


From 88e4ac68ea9a09e105c86070ebfa01ca482ca4c2 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 1 Apr 2017 15:02:24 +0200
Subject: clk: meson: mpll: fix division by zero in rate_from_params

According to the public datasheet all register bits in HHI_MPLL_CNTL7,
HHI_MPLL_CNTL8 and HHI_MPLL_CNTL9 default to zero. On all GX SoCs these
seem to be initialized by the bootloader to some default value.
However, on my Meson8 board they are not initialized, leading to a
division by zero in rate_from_params as the math is:
(parent_rate * SDM_DEN) / ((SDM_DEN * 0) + 0)

According to the datasheet, the minimum n2 value is 4. The rate provided
by the clock when n2 is less than this minimum is unpredictable. In such
case, we report an error.

Although the rate_from_params function was only introduced recently the
original bug has been there for much longer. It was only exposed
recently when the MPLL clocks were added to the Meson8b clock driver.

Fixes: 1c50da4f27 ("clk: meson: add mpll support")
Reviewed-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 drivers/clk/meson/clk-mpll.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/clk/meson/clk-mpll.c b/drivers/clk/meson/clk-mpll.c
index 540dabe5adad..d9462b505dcc 100644
--- a/drivers/clk/meson/clk-mpll.c
+++ b/drivers/clk/meson/clk-mpll.c
@@ -65,18 +65,21 @@
 #include "clkc.h"
 
 #define SDM_DEN 16384
-#define SDM_MIN 1
-#define SDM_MAX 16383
 #define N2_MIN	4
 #define N2_MAX	511
 
 #define to_meson_clk_mpll(_hw) container_of(_hw, struct meson_clk_mpll, hw)
 
-static unsigned long rate_from_params(unsigned long parent_rate,
+static long rate_from_params(unsigned long parent_rate,
 				      unsigned long sdm,
 				      unsigned long n2)
 {
-	return (parent_rate * SDM_DEN) / ((SDM_DEN * n2) + sdm);
+	unsigned long divisor = (SDM_DEN * n2) + sdm;
+
+	if (n2 < N2_MIN)
+		return -EINVAL;
+
+	return (parent_rate * SDM_DEN) / divisor;
 }
 
 static void params_from_rate(unsigned long requested_rate,
@@ -89,17 +92,13 @@ static void params_from_rate(unsigned long requested_rate,
 
 	if (div < N2_MIN) {
 		*n2 = N2_MIN;
-		*sdm = SDM_MIN;
+		*sdm = 0;
 	} else if (div > N2_MAX) {
 		*n2 = N2_MAX;
-		*sdm = SDM_MAX;
+		*sdm = SDM_DEN - 1;
 	} else {
 		*n2 = div;
 		*sdm = DIV_ROUND_UP(rem * SDM_DEN, requested_rate);
-		if (*sdm < SDM_MIN)
-			*sdm = SDM_MIN;
-		else if (*sdm > SDM_MAX)
-			*sdm = SDM_MAX;
 	}
 }
 
@@ -109,6 +108,7 @@ static unsigned long mpll_recalc_rate(struct clk_hw *hw,
 	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
 	struct parm *p;
 	unsigned long reg, sdm, n2;
+	long rate;
 
 	p = &mpll->sdm;
 	reg = readl(mpll->base + p->reg_off);
@@ -118,7 +118,11 @@ static unsigned long mpll_recalc_rate(struct clk_hw *hw,
 	reg = readl(mpll->base + p->reg_off);
 	n2 = PARM_GET(p->width, p->shift, reg);
 
-	return rate_from_params(parent_rate, sdm, n2);
+	rate = rate_from_params(parent_rate, sdm, n2);
+	if (rate < 0)
+		return 0;
+
+	return rate;
 }
 
 static long mpll_round_rate(struct clk_hw *hw,
-- 
cgit v1.2.3


From b609338b26f5653aa211fc7af83477e2df6e3f0b Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 1 Apr 2017 15:02:25 +0200
Subject: clk: meson: mpll: use 64bit math in rate_from_params

On Meson8b the MPLL parent clock (fixed_pll) has a rate of 2550MHz.
Multiplying this with SDM_DEN results in a value greater than 32bits.
This is not a problem on the 64bit Meson GX SoCs, but it may result in
undefined behavior on the older 32bit Meson8b SoC.

While rate_from_params was only introduced recently to make the math
reusable from _round_rate and _recalc_rate the original bug exists much
longer.

Fixes: 1c50da4f27 ("clk: meson: add mpll support")
Reviewed-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
[as discussed on the ml, use DIV_ROUND_UP_ULL]
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 drivers/clk/meson/clk-mpll.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/meson/clk-mpll.c b/drivers/clk/meson/clk-mpll.c
index d9462b505dcc..39eab69fe51a 100644
--- a/drivers/clk/meson/clk-mpll.c
+++ b/drivers/clk/meson/clk-mpll.c
@@ -79,7 +79,7 @@ static long rate_from_params(unsigned long parent_rate,
 	if (n2 < N2_MIN)
 		return -EINVAL;
 
-	return (parent_rate * SDM_DEN) / divisor;
+	return DIV_ROUND_UP_ULL((u64)parent_rate * SDM_DEN, divisor);
 }
 
 static void params_from_rate(unsigned long requested_rate,
-- 
cgit v1.2.3


From 37cabc74e57d15d683a8e49159bc8dcd5a50516a Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Sat, 8 Apr 2017 00:19:03 +0800
Subject: clk: sunxi-ng: fix PRCM CCU ir clk parent

The first parent of ir clk in PRCM CCU is wrongly written as "osc32K"
instead of "osc32k".

Change it to "osc32k".

Fixes: cdb8b80b6093 ("clk: sunxi-ng: add support for PRCM CCUs")

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun8i-r.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r.c b/drivers/clk/sunxi-ng/ccu-sun8i-r.c
index 0d027d53dbdf..119f47b568ea 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-r.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-r.c
@@ -81,7 +81,7 @@ static SUNXI_CCU_GATE(apb0_i2c_clk,	"apb0-i2c",	"apb0",
 static SUNXI_CCU_GATE(apb0_twd_clk,	"apb0-twd",	"apb0",
 		      0x28, BIT(7), 0);
 
-static const char * const r_mod0_default_parents[] = { "osc32K", "osc24M" };
+static const char * const r_mod0_default_parents[] = { "osc32k", "osc24M" };
 static SUNXI_CCU_MP_WITH_MUX_GATE(ir_clk, "ir",
 				  r_mod0_default_parents, 0x54,
 				  0, 4,		/* M */
-- 
cgit v1.2.3


From 266061b7637b189ac82ee3a75116ff2dcb031e0b Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Sat, 8 Apr 2017 00:19:04 +0800
Subject: clk: sunxi-ng: fix PRCM CCU CLK_NUMBER value

The CLK_NUMBER value of PRCM CCU is wrongly set to (CLK_APB0_PWD + 1),
which prevented the IR mod clock from being set up.

Change it to (CLK_IR + 1) in order to correctly get IR mod set up.

Fixes: cdb8b80b6093 ("clk: sunxi-ng: add support for PRCM CCUs")

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun8i-r.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r.h b/drivers/clk/sunxi-ng/ccu-sun8i-r.h
index eaa431fd1d8f..a7a407f12b56 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-r.h
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-r.h
@@ -22,6 +22,6 @@
 #define CLK_AHB0	1
 #define CLK_APB0	2
 
-#define CLK_NUMBER	(CLK_APB0_TWD + 1)
+#define CLK_NUMBER	(CLK_IR + 1)
 
 #endif /* _CCU_SUN8I_R_H */
-- 
cgit v1.2.3


From 05510f2b4819398a25ef432d72efa9e95419768a Mon Sep 17 00:00:00 2001
From: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Date: Tue, 7 Mar 2017 14:19:56 +0100
Subject: MIPS: Enable GENERIC_CPU_AUTOPROBE

Add missing macros and methods that are required by
CONFIG_GENERIC_CPU_AUTOPROBE: MAX_CPU_FEATURES, cpu_have_feature(),
cpu_feature().

Also set a default elf platform as currently it is not set for most MIPS
platforms resulting in incorrectly specified modalias values in cpu
autoprobe ("cpu:type:(null):feature:...").

Export 'elf_hwcap' symbol so that it can be accessed from modules that
use module_cpu_feature_match()

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15395/

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig                  |  1 +
 arch/mips/include/asm/cpufeature.h | 26 ++++++++++++++++++++++++++
 arch/mips/kernel/cpu-probe.c       |  7 +++++++
 3 files changed, 34 insertions(+)
 create mode 100644 arch/mips/include/asm/cpufeature.h

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index e0bb576410bb..68af16b72e9c 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -46,6 +46,7 @@ config MIPS
 	select ARCH_DISCARD_MEMBLOCK
 	select GENERIC_SMP_IDLE_THREAD
 	select BUILDTIME_EXTABLE_SORT
+	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
 	select GENERIC_CMOS_UPDATE
diff --git a/arch/mips/include/asm/cpufeature.h b/arch/mips/include/asm/cpufeature.h
new file mode 100644
index 000000000000..c63ec05313c1
--- /dev/null
+++ b/arch/mips/include/asm/cpufeature.h
@@ -0,0 +1,26 @@
+/*
+ * CPU feature definitions for module loading, used by
+ * module_cpu_feature_match(), see uapi/asm/hwcap.h for MIPS CPU features.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <uapi/asm/hwcap.h>
+#include <asm/elf.h>
+
+#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap))
+
+#define cpu_feature(x)		ilog2(HWCAP_ ## x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+	return elf_hwcap & (1UL << num);
+}
+
+#endif /* __ASM_CPUFEATURE_H */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 12422fd4af23..e57e6850f4dd 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -34,6 +34,7 @@
 
 /* Hardware capabilities */
 unsigned int elf_hwcap __read_mostly;
+EXPORT_SYMBOL_GPL(elf_hwcap);
 
 /*
  * Get the FPU Implementation/Revision.
@@ -1946,6 +1947,12 @@ void cpu_probe(void)
 	struct cpuinfo_mips *c = &current_cpu_data;
 	unsigned int cpu = smp_processor_id();
 
+	/*
+	 * Set a default elf platform, cpu probe may later
+	 * overwrite it with a more precise value
+	 */
+	set_elf_platform(cpu, "mips");
+
 	c->processor_id = PRID_IMP_UNKNOWN;
 	c->fpu_id	= FPIR_IMP_NONE;
 	c->cputype	= CPU_UNKNOWN;
-- 
cgit v1.2.3


From bfbfa9d61cf29f3579107892c7347c02d891dfec Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 14 Mar 2017 14:21:40 -0700
Subject: MIPS: uasm: Add support for LHU.

The follow-on BPF JIT patches use the LHU instruction, so add it.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Steven J. Hill <steven.hill@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15743/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/uasm.h | 1 +
 arch/mips/mm/uasm-mips.c     | 1 +
 arch/mips/mm/uasm.c          | 3 ++-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index e9a9e2ade1d2..d91ed5b506ed 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -138,6 +138,7 @@ Ip_u2s3u1(_lb);
 Ip_u2s3u1(_ld);
 Ip_u3u1u2(_ldx);
 Ip_u2s3u1(_lh);
+Ip_u2s3u1(_lhu);
 Ip_u2s3u1(_ll);
 Ip_u2s3u1(_lld);
 Ip_u1s2(_lui);
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 763d3f1edb8a..2277499fe6ae 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -103,6 +103,7 @@ static struct insn insn_table[] = {
 	{ insn_ld,  M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_ldx, M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD },
 	{ insn_lh,  M(lh_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+	{ insn_lhu,  M(lhu_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 #ifndef CONFIG_CPU_MIPSR6
 	{ insn_lld,  M(lld_op, 0, 0, 0, 0, 0),	RS | RT | SIMM },
 	{ insn_ll,  M(ll_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index a82970442b8a..7f400c8d4645 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -61,7 +61,7 @@ enum opcode {
 	insn_sllv, insn_slt, insn_sltiu, insn_sltu, insn_sra, insn_srl,
 	insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, insn_tlbp,
 	insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, insn_xor,
-	insn_xori, insn_yield, insn_lddir, insn_ldpte,
+	insn_xori, insn_yield, insn_lddir, insn_ldpte, insn_lhu,
 };
 
 struct insn {
@@ -297,6 +297,7 @@ I_u1(_jr)
 I_u2s3u1(_lb)
 I_u2s3u1(_ld)
 I_u2s3u1(_lh)
+I_u2s3u1(_lhu)
 I_u2s3u1(_ll)
 I_u2s3u1(_lld)
 I_u1s2(_lui)
-- 
cgit v1.2.3


From 4ad701532a758202a422a8588f4d09c058c9a5dc Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 14 Mar 2017 14:21:41 -0700
Subject: MIPS: BPF: Add JIT support for SKF_AD_HATYPE.

This let's us pass some additional "modprobe test-bpf" tests with JIT
enabled.

Reuse the code for SKF_AD_IFINDEX, but substitute the offset and size
of the "type" field.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Steven J. Hill <steven.hill@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15744/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 49a2e2226fee..880e329310ee 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -365,6 +365,12 @@ static inline void emit_half_load(unsigned int reg, unsigned int base,
 	emit_instr(ctx, lh, reg, offset, base);
 }
 
+static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base,
+					   unsigned int offset, struct jit_ctx *ctx)
+{
+	emit_instr(ctx, lhu, reg, offset, base);
+}
+
 static inline void emit_mul(unsigned int dst, unsigned int src1,
 			    unsigned int src2, struct jit_ctx *ctx)
 {
@@ -1112,6 +1118,8 @@ jmp_cmp:
 			break;
 		case BPF_ANC | SKF_AD_IFINDEX:
 			/* A = skb->dev->ifindex */
+		case BPF_ANC | SKF_AD_HATYPE:
+			/* A = skb->dev->type */
 			ctx->flags |= SEEN_SKB | SEEN_A;
 			off = offsetof(struct sk_buff, dev);
 			/* Load *dev pointer */
@@ -1120,10 +1128,15 @@ jmp_cmp:
 			emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
 				   b_imm(prog->len, ctx), ctx);
 			emit_reg_move(r_ret, r_zero, ctx);
-			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
-						  ifindex) != 4);
-			off = offsetof(struct net_device, ifindex);
-			emit_load(r_A, r_s0, off, ctx);
+			if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
+				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+				off = offsetof(struct net_device, ifindex);
+				emit_load(r_A, r_s0, off, ctx);
+			} else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */
+				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
+				off = offsetof(struct net_device, type);
+				emit_half_load_unsigned(r_A, r_s0, off, ctx);
+			}
 			break;
 		case BPF_ANC | SKF_AD_MARK:
 			ctx->flags |= SEEN_SKB | SEEN_A;
-- 
cgit v1.2.3


From 64b2dd3a8be3ec4e08fd5a3dab0620d9c6c398fe Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 14 Mar 2017 14:21:42 -0700
Subject: MIPS: BPF: Use unsigned access for unsigned SKB fields.

The SKB vlan_tci and queue_mapping fields are unsigned, don't sign
extend these in the BPF JIT.  In the vlan_tci case, the value gets
masked so the change is not needed for correctness, but do it anyway
for agreement with the types defined in struct sk_buff.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Steven J. Hill <steven.hill@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15746/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 880e329310ee..a68cd36a892f 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1156,7 +1156,7 @@ jmp_cmp:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  vlan_tci) != 2);
 			off = offsetof(struct sk_buff, vlan_tci);
-			emit_half_load(r_s0, r_skb, off, ctx);
+			emit_half_load_unsigned(r_s0, r_skb, off, ctx);
 			if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
 				emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx);
 			} else {
@@ -1183,7 +1183,7 @@ jmp_cmp:
 			BUILD_BUG_ON(offsetof(struct sk_buff,
 					      queue_mapping) > 0xff);
 			off = offsetof(struct sk_buff, queue_mapping);
-			emit_half_load(r_A, r_skb, off, ctx);
+			emit_half_load_unsigned(r_A, r_skb, off, ctx);
 			break;
 		default:
 			pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__,
-- 
cgit v1.2.3


From 1ef0910cfd681f0bd0b81f8809935b2006e9cfb9 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 14 Mar 2017 14:21:43 -0700
Subject: MIPS: BPF: Quit clobbering callee saved registers in JIT code.

If bpf_needs_clear_a() returns true, only actually clear it if it is
ever used.  If it is not used, we don't save and restore it, so the
clearing has the nasty side effect of clobbering caller state.

Also, don't emit stack pointer adjustment instructions if the
adjustment amount is zero.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Steven J. Hill <steven.hill@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15745/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index a68cd36a892f..44b925005dd3 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -532,7 +532,8 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
 	u32 sflags, tmp_flags;
 
 	/* Adjust the stack pointer */
-	emit_stack_offset(-align_sp(offset), ctx);
+	if (offset)
+		emit_stack_offset(-align_sp(offset), ctx);
 
 	tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
 	/* sflags is essentially a bitmap */
@@ -584,7 +585,8 @@ static void restore_bpf_jit_regs(struct jit_ctx *ctx,
 		emit_load_stack_reg(r_ra, r_sp, real_off, ctx);
 
 	/* Restore the sp and discard the scrach memory */
-	emit_stack_offset(align_sp(offset), ctx);
+	if (offset)
+		emit_stack_offset(align_sp(offset), ctx);
 }
 
 static unsigned int get_stack_depth(struct jit_ctx *ctx)
@@ -631,8 +633,14 @@ static void build_prologue(struct jit_ctx *ctx)
 	if (ctx->flags & SEEN_X)
 		emit_jit_reg_move(r_X, r_zero, ctx);
 
-	/* Do not leak kernel data to userspace */
-	if (bpf_needs_clear_a(&ctx->skf->insns[0]))
+	/*
+	 * Do not leak kernel data to userspace, we only need to clear
+	 * r_A if it is ever used.  In fact if it is never used, we
+	 * will not save/restore it, so clearing it in this case would
+	 * corrupt the state of the caller.
+	 */
+	if (bpf_needs_clear_a(&ctx->skf->insns[0]) &&
+	    (ctx->flags & SEEN_A))
 		emit_jit_reg_move(r_A, r_zero, ctx);
 }
 
-- 
cgit v1.2.3


From a81507c79f4ae9a0f9fb1054b59b62a090620dd9 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 14 Mar 2017 14:21:44 -0700
Subject: MIPS: BPF: Fix multiple problems in JIT skb access helpers.

o Socket data is unsigned, so use unsigned accessors instructions.

 o Fix path result pointer generation arithmetic.

 o Fix half-word byte swapping code for unsigned semantics.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Steven J. Hill <steven.hill@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15747/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/net/bpf_jit_asm.S | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S
index 5d2e0c8d29c0..88a2075305d1 100644
--- a/arch/mips/net/bpf_jit_asm.S
+++ b/arch/mips/net/bpf_jit_asm.S
@@ -90,18 +90,14 @@ FEXPORT(sk_load_half_positive)
 	is_offset_in_header(2, half)
 	/* Offset within header boundaries */
 	PTR_ADDU t1, $r_skb_data, offset
-	.set	reorder
-	lh	$r_A, 0(t1)
-	.set	noreorder
+	lhu	$r_A, 0(t1)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 # if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
-	wsbh	t0, $r_A
-	seh	$r_A, t0
+	wsbh	$r_A, $r_A
 # else
-	sll	t0, $r_A, 24
-	andi	t1, $r_A, 0xff00
-	sra	t0, t0, 16
-	srl	t1, t1, 8
+	sll	t0, $r_A, 8
+	srl	t1, $r_A, 8
+	andi	t0, t0, 0xff00
 	or	$r_A, t0, t1
 # endif
 #endif
@@ -115,7 +111,7 @@ FEXPORT(sk_load_byte_positive)
 	is_offset_in_header(1, byte)
 	/* Offset within header boundaries */
 	PTR_ADDU t1, $r_skb_data, offset
-	lb	$r_A, 0(t1)
+	lbu	$r_A, 0(t1)
 	jr	$r_ra
 	 move	$r_ret, zero
 	END(sk_load_byte)
@@ -139,6 +135,11 @@ FEXPORT(sk_load_byte_positive)
  * (void *to) is returned in r_s0
  *
  */
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define DS_OFFSET(SIZE) (4 * SZREG)
+#else
+#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE))
+#endif
 #define bpf_slow_path_common(SIZE)				\
 	/* Quick check. Are we within reasonable boundaries? */ \
 	LONG_ADDIU	$r_s1, $r_skb_len, -SIZE;		\
@@ -150,7 +151,7 @@ FEXPORT(sk_load_byte_positive)
 	PTR_LA		t0, skb_copy_bits;			\
 	PTR_S		$r_ra, (5 * SZREG)($r_sp);		\
 	/* Assign low slot to a2 */				\
-	move		a2, $r_sp;				\
+	PTR_ADDIU	a2, $r_sp, DS_OFFSET(SIZE);		\
 	jalr		t0;					\
 	/* Reset our destination slot (DS but it's ok) */	\
 	 INT_S		zero, (4 * SZREG)($r_sp);		\
-- 
cgit v1.2.3


From dfa32261fa0ed1821c7d5dbb9e93eddfe311a0d9 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Fri, 17 Feb 2017 11:45:55 -0800
Subject: MIPS: Octeon: Remove vestiges of CONFIG_CAVIUM_OCTEON_2ND_KERNEL

This config option never really worked, and has bit-rotted to the
point of being completely useless.  Remove it completely.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Steven J. Hill <steven.hill@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15314/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/Kconfig  |  9 ---------
 arch/mips/cavium-octeon/Platform |  4 ----
 arch/mips/cavium-octeon/setup.c  | 12 +-----------
 3 files changed, 1 insertion(+), 24 deletions(-)

diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index c370426a7322..5c0b56203bae 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -25,15 +25,6 @@ endif # CPU_CAVIUM_OCTEON
 
 if CAVIUM_OCTEON_SOC
 
-config CAVIUM_OCTEON_2ND_KERNEL
-	bool "Build the kernel to be used as a 2nd kernel on the same chip"
-	default "n"
-	help
-	  This option configures this kernel to be linked at a different
-	  address and use the 2nd uart for output. This allows a kernel built
-	  with this option to be run at the same time as one built without this
-	  option.
-
 config CAVIUM_OCTEON_LOCK_L2
 	bool "Lock often used kernel code in the L2"
 	default "y"
diff --git a/arch/mips/cavium-octeon/Platform b/arch/mips/cavium-octeon/Platform
index 8a301cb12d68..45be853700e6 100644
--- a/arch/mips/cavium-octeon/Platform
+++ b/arch/mips/cavium-octeon/Platform
@@ -4,8 +4,4 @@
 platform-$(CONFIG_CAVIUM_OCTEON_SOC)	+= cavium-octeon/
 cflags-$(CONFIG_CAVIUM_OCTEON_SOC)	+=				\
 		-I$(srctree)/arch/mips/include/asm/mach-cavium-octeon
-ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
-load-$(CONFIG_CAVIUM_OCTEON_SOC)	+= 0xffffffff84100000
-else
 load-$(CONFIG_CAVIUM_OCTEON_SOC)	+= 0xffffffff81100000
-endif
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index d9dbeb0b165b..a8034d0dcade 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -374,14 +374,8 @@ void octeon_write_lcd(const char *s)
  */
 int octeon_get_boot_uart(void)
 {
-	int uart;
-#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
-	uart = 1;
-#else
-	uart = (octeon_boot_desc_ptr->flags & OCTEON_BL_FLAG_CONSOLE_UART1) ?
+	return (octeon_boot_desc_ptr->flags & OCTEON_BL_FLAG_CONSOLE_UART1) ?
 		1 : 0;
-#endif
-	return uart;
 }
 
 /**
@@ -901,14 +895,10 @@ void __init prom_init(void)
 	}
 
 	if (strstr(arcs_cmdline, "console=") == NULL) {
-#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
-		strcat(arcs_cmdline, " console=ttyS0,115200");
-#else
 		if (octeon_uart == 1)
 			strcat(arcs_cmdline, " console=ttyS1,115200");
 		else
 			strcat(arcs_cmdline, " console=ttyS0,115200");
-#endif
 	}
 
 	mips_hpt_frequency = octeon_get_clock_rate();
-- 
cgit v1.2.3


From 3377e227af441aff710726437adc20efc359fd9c Mon Sep 17 00:00:00 2001
From: Alex Belits <alex.belits@cavium.com>
Date: Thu, 16 Feb 2017 17:27:34 -0800
Subject: MIPS: Add 48-bit VA space (and 4-level page tables) for 4K pages.

Some users must have 4K pages while needing a 48-bit VA space size.
The cleanest way do do this is to go to a 4-level page table for this
case.  Each page table level using order-0 pages adds 9 bits to the
VA size (at 4K pages, so for four levels we get 9 * 4 + 12 == 48-bits.

For the 4K page size case only we add support functions for the PUD
level of the page table tree, also the TLB exception handlers get an
extra level of tree walk.

[david.daney@cavium.com: Forward port to v4.10.]
[david.daney@cavium.com: Forward port to v4.11.]

Signed-off-by: Alex Belits <alex.belits@cavium.com>
Signed-off-by: David Daney <david.daney@cavium.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Alex Belits <alex.belits@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15312/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig                  | 13 +++---
 arch/mips/include/asm/pgalloc.h    | 26 +++++++++++
 arch/mips/include/asm/pgtable-64.h | 88 +++++++++++++++++++++++++++++++++++---
 arch/mips/mm/init.c                |  3 ++
 arch/mips/mm/pgtable-64.c          | 33 ++++++++++++--
 arch/mips/mm/tlbex.c               | 22 ++++++++++
 6 files changed, 172 insertions(+), 13 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 68af16b72e9c..f4dd2c322d4b 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2121,10 +2121,13 @@ config MIPS_VA_BITS_48
 	bool "48 bits virtual memory"
 	depends on 64BIT
 	help
-	  Support a maximum at least 48 bits of application virtual memory.
-	  Default is 40 bits or less, depending on the CPU.
-	  This option result in a small memory overhead for page tables.
-	  This option is only supported with 16k and 64k page sizes.
+	  Support a maximum at least 48 bits of application virtual
+	  memory.  Default is 40 bits or less, depending on the CPU.
+	  For page sizes 16k and above, this option results in a small
+	  memory overhead for page tables.  For 4k page size, a fourth
+	  level of page tables is added which imposes both a memory
+	  overhead as well as slower TLB fault handling.
+
 	  If unsure, say N.
 
 choice
@@ -2134,7 +2137,6 @@ choice
 config PAGE_SIZE_4KB
 	bool "4kB"
 	depends on !CPU_LOONGSON2 && !CPU_LOONGSON3
-	depends on !MIPS_VA_BITS_48
 	help
 	 This option select the standard 4kB Linux page size.  On some
 	 R3000-family processors this is the only available page size.  Using
@@ -2983,6 +2985,7 @@ config HAVE_LATENCYTOP_SUPPORT
 
 config PGTABLE_LEVELS
 	int
+	default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48
 	default 3 if 64BIT && !PAGE_SIZE_64KB
 	default 2
 
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index a8705f6c8180..a1bdb1ea5234 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -110,6 +110,32 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #endif
 
+#ifndef __PAGETABLE_PUD_FOLDED
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	pud_t *pud;
+
+	pud = (pud_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PUD_ORDER);
+	if (pud)
+		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+	return pud;
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	free_pages((unsigned long)pud, PUD_ORDER);
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+	set_pgd(pgd, __pgd((unsigned long)pud));
+}
+
+#define __pud_free_tlb(tlb, x, addr)	pud_free((tlb)->mm, x)
+
+#endif /* __PAGETABLE_PUD_FOLDED */
+
 #define check_pgt_cache()	do { } while (0)
 
 extern void pagetable_init(void);
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 130a2a6c1531..67fe6dc5211c 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -20,7 +20,7 @@
 #define __ARCH_USE_5LEVEL_HACK
 #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
 #include <asm-generic/pgtable-nopmd.h>
-#else
+#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48))
 #include <asm-generic/pgtable-nopud.h>
 #endif
 
@@ -54,9 +54,18 @@
 #define PMD_SIZE	(1UL << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
 
+# ifdef __PAGETABLE_PUD_FOLDED
+# define PGDIR_SHIFT	(PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3))
+# endif
+#endif
 
-#define PGDIR_SHIFT	(PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3))
+#ifndef __PAGETABLE_PUD_FOLDED
+#define PUD_SHIFT	(PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3))
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+#define PGDIR_SHIFT	(PUD_SHIFT + (PAGE_SHIFT + PUD_ORDER - 3))
 #endif
+
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
@@ -79,8 +88,13 @@
  * of virtual address space.
  */
 #ifdef CONFIG_PAGE_SIZE_4KB
-#define PGD_ORDER		1
-#define PUD_ORDER		aieeee_attempt_to_allocate_pud
+# ifdef CONFIG_MIPS_VA_BITS_48
+#  define PGD_ORDER		0
+#  define PUD_ORDER		0
+# else
+#  define PGD_ORDER		1
+#  define PUD_ORDER		aieeee_attempt_to_allocate_pud
+# endif
 #define PMD_ORDER		0
 #define PTE_ORDER		0
 #endif
@@ -118,6 +132,9 @@
 #endif
 
 #define PTRS_PER_PGD	((PAGE_SIZE << PGD_ORDER) / sizeof(pgd_t))
+#ifndef __PAGETABLE_PUD_FOLDED
+#define PTRS_PER_PUD	((PAGE_SIZE << PUD_ORDER) / sizeof(pud_t))
+#endif
 #ifndef __PAGETABLE_PMD_FOLDED
 #define PTRS_PER_PMD	((PAGE_SIZE << PMD_ORDER) / sizeof(pmd_t))
 #endif
@@ -134,7 +151,7 @@
 #define VMALLOC_START		(MAP_BASE + (2 * PAGE_SIZE))
 #define VMALLOC_END	\
 	(MAP_BASE + \
-	 min(PTRS_PER_PGD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, \
+	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, \
 	     (1UL << cpu_vmbits)) - (1UL << 32))
 
 #if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \
@@ -150,12 +167,72 @@
 #define pmd_ERROR(e) \
 	printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
 #endif
+#ifndef __PAGETABLE_PUD_FOLDED
+#define pud_ERROR(e) \
+	printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+#endif
 #define pgd_ERROR(e) \
 	printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
 
 extern pte_t invalid_pte_table[PTRS_PER_PTE];
 extern pte_t empty_bad_page_table[PTRS_PER_PTE];
 
+#ifndef __PAGETABLE_PUD_FOLDED
+/*
+ * For 4-level pagetables we defines these ourselves, for 3-level the
+ * definitions are below, for 2-level the
+ * definitions are supplied by <asm-generic/pgtable-nopmd.h>.
+ */
+typedef struct { unsigned long pud; } pud_t;
+#define pud_val(x)	((x).pud)
+#define __pud(x)	((pud_t) { (x) })
+
+extern pud_t invalid_pud_table[PTRS_PER_PUD];
+
+/*
+ * Empty pgd entries point to the invalid_pud_table.
+ */
+static inline int pgd_none(pgd_t pgd)
+{
+	return pgd_val(pgd) == (unsigned long)invalid_pud_table;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+	if (unlikely(pgd_val(pgd) & ~PAGE_MASK))
+		return 1;
+
+	return 0;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+	return pgd_val(pgd) != (unsigned long)invalid_pud_table;
+}
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+	pgd_val(*pgdp) = (unsigned long)invalid_pud_table;
+}
+
+#define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+{
+	return pgd_val(pgd);
+}
+
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+{
+	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+}
+
+static inline void set_pgd(pgd_t *pgd, pgd_t pgdval)
+{
+	*pgd = pgdval;
+}
+
+#endif
 
 #ifndef __PAGETABLE_PMD_FOLDED
 /*
@@ -281,6 +358,7 @@ static inline pmd_t *pmd_offset(pud_t * pud, unsigned long address)
  * Initialize a new pgd / pmd table with invalid pointers.
  */
 extern void pgd_init(unsigned long page);
+extern void pud_init(unsigned long page, unsigned long pagetable);
 extern void pmd_init(unsigned long page, unsigned long pagetable);
 
 /*
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index aa75849c36bc..37aa931501bf 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -537,6 +537,9 @@ unsigned long pgd_current[NR_CPUS];
  * it in the linker script.
  */
 pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
+#ifndef __PAGETABLE_PUD_FOLDED
+pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss;
+#endif
 #ifndef __PAGETABLE_PMD_FOLDED
 pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
 EXPORT_SYMBOL_GPL(invalid_pmd_table);
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index 0ae7b28b4db5..6fd6e96fdebb 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -19,10 +19,12 @@ void pgd_init(unsigned long page)
 	unsigned long *p, *end;
 	unsigned long entry;
 
-#ifdef __PAGETABLE_PMD_FOLDED
-	entry = (unsigned long)invalid_pte_table;
-#else
+#if !defined(__PAGETABLE_PUD_FOLDED)
+	entry = (unsigned long)invalid_pud_table;
+#elif !defined(__PAGETABLE_PMD_FOLDED)
 	entry = (unsigned long)invalid_pmd_table;
+#else
+	entry = (unsigned long)invalid_pte_table;
 #endif
 
 	p = (unsigned long *) page;
@@ -64,6 +66,28 @@ void pmd_init(unsigned long addr, unsigned long pagetable)
 EXPORT_SYMBOL_GPL(pmd_init);
 #endif
 
+#ifndef __PAGETABLE_PUD_FOLDED
+void pud_init(unsigned long addr, unsigned long pagetable)
+{
+	unsigned long *p, *end;
+
+	p = (unsigned long *)addr;
+	end = p + PTRS_PER_PUD;
+
+	do {
+		p[0] = pagetable;
+		p[1] = pagetable;
+		p[2] = pagetable;
+		p[3] = pagetable;
+		p[4] = pagetable;
+		p += 8;
+		p[-3] = pagetable;
+		p[-2] = pagetable;
+		p[-1] = pagetable;
+	} while (p != end);
+}
+#endif
+
 pmd_t mk_pmd(struct page *page, pgprot_t prot)
 {
 	pmd_t pmd;
@@ -87,6 +111,9 @@ void __init pagetable_init(void)
 
 	/* Initialize the entire pgd.  */
 	pgd_init((unsigned long)swapper_pg_dir);
+#ifndef __PAGETABLE_PUD_FOLDED
+	pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
+#endif
 #ifndef __PAGETABLE_PMD_FOLDED
 	pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
 #endif
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 4f642e07c2b1..ed1c5297547a 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -865,6 +865,13 @@ void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
 
 	uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3);
 	uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */
+#ifndef __PAGETABLE_PUD_FOLDED
+	uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+	uasm_i_ld(p, ptr, 0, ptr); /* get pud pointer */
+	uasm_i_dsrl_safe(p, tmp, tmp, PUD_SHIFT - 3); /* get pud offset in bytes */
+	uasm_i_andi(p, tmp, tmp, (PTRS_PER_PUD - 1) << 3);
+	uasm_i_daddu(p, ptr, ptr, tmp); /* add in pud offset */
+#endif
 #ifndef __PAGETABLE_PMD_FOLDED
 	uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
 	uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */
@@ -1184,6 +1191,21 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
 		uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */
 	}
 
+#ifndef __PAGETABLE_PUD_FOLDED
+	/* get pud offset in bytes */
+	uasm_i_dsrl_safe(p, scratch, tmp, PUD_SHIFT - 3);
+	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PUD - 1) << 3);
+
+	if (use_lwx_insns()) {
+		UASM_i_LWX(p, ptr, scratch, ptr);
+	} else {
+		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */
+		UASM_i_LW(p, ptr, 0, ptr);
+	}
+	/* ptr contains a pointer to PMD entry */
+	/* tmp contains the address */
+#endif
+
 #ifndef __PAGETABLE_PMD_FOLDED
 	/* get pmd offset in bytes */
 	uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3);
-- 
cgit v1.2.3


From 15f6847923a87040ebe962e34eea48711c5d0582 Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <Steven.Hill@cavium.com>
Date: Thu, 9 Mar 2017 08:14:15 -0600
Subject: MIPS: Octeon: Remove unused L2C types and macros.

Remove all unused bitfields and macros. Convert the remaining
bitfields to use __BITFIELD_FIELD instead of #ifdef.

[ralf@linux-mips.org: Add inclusions of <uapi/asm/bitfield.h> as necessary.]

Signed-off-by: Steven J. Hill <steven.hill@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15403/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/executive/cvmx-l2c.c     |  139 +-
 arch/mips/cavium-octeon/executive/octeon-model.c |   21 +-
 arch/mips/include/asm/octeon/cvmx-l2c-defs.h     | 3193 +---------------------
 arch/mips/include/asm/octeon/cvmx-l2c.h          |   59 +-
 arch/mips/include/asm/octeon/cvmx-l2d-defs.h     |  526 ----
 arch/mips/include/asm/octeon/cvmx-l2t-defs.h     |  286 +-
 arch/mips/include/asm/octeon/cvmx.h              |    3 +-
 7 files changed, 305 insertions(+), 3922 deletions(-)
 delete mode 100644 arch/mips/include/asm/octeon/cvmx-l2d-defs.h

diff --git a/arch/mips/cavium-octeon/executive/cvmx-l2c.c b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
index 89b5273299ab..f091c9b70603 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-l2c.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2010 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -239,6 +239,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter)
 		else {
 			uint64_t counter = 0;
 			int tad;
+
 			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
 				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC0(tad));
 			return counter;
@@ -249,6 +250,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter)
 		else {
 			uint64_t counter = 0;
 			int tad;
+
 			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
 				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC1(tad));
 			return counter;
@@ -259,6 +261,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter)
 		else {
 			uint64_t counter = 0;
 			int tad;
+
 			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
 				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC2(tad));
 			return counter;
@@ -270,6 +273,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter)
 		else {
 			uint64_t counter = 0;
 			int tad;
+
 			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
 				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC3(tad));
 			return counter;
@@ -301,7 +305,7 @@ static void fault_in(uint64_t addr, int len)
 	 */
 	CVMX_DCACHE_INVALIDATE;
 	while (len > 0) {
-		ACCESS_ONCE(*ptr);
+		READ_ONCE(*ptr);
 		len -= CVMX_CACHE_LINE_SIZE;
 		ptr += CVMX_CACHE_LINE_SIZE;
 	}
@@ -375,7 +379,9 @@ int cvmx_l2c_lock_line(uint64_t addr)
 		if (((union cvmx_l2c_cfg)(cvmx_read_csr(CVMX_L2C_CFG))).s.idxalias) {
 			int alias_shift = CVMX_L2C_IDX_ADDR_SHIFT + 2 * CVMX_L2_SET_BITS - 1;
 			uint64_t addr_tmp = addr ^ (addr & ((1 << alias_shift) - 1)) >> CVMX_L2_SET_BITS;
+
 			lckbase.s.lck_base = addr_tmp >> 7;
+
 		} else {
 			lckbase.s.lck_base = addr >> 7;
 		}
@@ -435,6 +441,7 @@ void cvmx_l2c_flush(void)
 		/* These may look like constants, but they aren't... */
 		int assoc_shift = CVMX_L2C_TAG_ADDR_ALIAS_SHIFT;
 		int set_shift = CVMX_L2C_IDX_ADDR_SHIFT;
+
 		for (set = 0; set < n_set; set++) {
 			for (assoc = 0; assoc < n_assoc; assoc++) {
 				address = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
@@ -519,89 +526,49 @@ int cvmx_l2c_unlock_mem_region(uint64_t start, uint64_t len)
 union __cvmx_l2c_tag {
 	uint64_t u64;
 	struct cvmx_l2c_tag_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:40;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:20;	/* Phys mem addr (33..14) */
-#else
-		uint64_t addr:20;	/* Phys mem addr (33..14) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:40;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:40,
+		__BITFIELD_FIELD(uint64_t V:1,		/* Line valid */
+		__BITFIELD_FIELD(uint64_t D:1,		/* Line dirty */
+		__BITFIELD_FIELD(uint64_t L:1,		/* Line locked */
+		__BITFIELD_FIELD(uint64_t U:1,		/* Use, LRU eviction */
+		__BITFIELD_FIELD(uint64_t addr:20,	/* Phys addr (33..14) */
+		;))))))
 	} cn50xx;
 	struct cvmx_l2c_tag_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:41;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:19;	/* Phys mem addr (33..15) */
-#else
-		uint64_t addr:19;	/* Phys mem addr (33..15) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:41;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:41,
+		__BITFIELD_FIELD(uint64_t V:1,		/* Line valid */
+		__BITFIELD_FIELD(uint64_t D:1,		/* Line dirty */
+		__BITFIELD_FIELD(uint64_t L:1,		/* Line locked */
+		__BITFIELD_FIELD(uint64_t U:1,		/* Use, LRU eviction */
+		__BITFIELD_FIELD(uint64_t addr:19,	/* Phys addr (33..15) */
+		;))))))
 	} cn30xx;
 	struct cvmx_l2c_tag_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:42;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:18;	/* Phys mem addr (33..16) */
-#else
-		uint64_t addr:18;	/* Phys mem addr (33..16) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:42;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:42,
+		__BITFIELD_FIELD(uint64_t V:1,		/* Line valid */
+		__BITFIELD_FIELD(uint64_t D:1,		/* Line dirty */
+		__BITFIELD_FIELD(uint64_t L:1,		/* Line locked */
+		__BITFIELD_FIELD(uint64_t U:1,		/* Use, LRU eviction */
+		__BITFIELD_FIELD(uint64_t addr:18,	/* Phys addr (33..16) */
+		;))))))
 	} cn31xx;
 	struct cvmx_l2c_tag_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:43;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:17;	/* Phys mem addr (33..17) */
-#else
-		uint64_t addr:17;	/* Phys mem addr (33..17) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:43;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:43,
+		__BITFIELD_FIELD(uint64_t V:1,		/* Line valid */
+		__BITFIELD_FIELD(uint64_t D:1,		/* Line dirty */
+		__BITFIELD_FIELD(uint64_t L:1,		/* Line locked */
+		__BITFIELD_FIELD(uint64_t U:1,		/* Use, LRU eviction */
+		__BITFIELD_FIELD(uint64_t addr:17,	/* Phys addr (33..17) */
+		;))))))
 	} cn38xx;
 	struct cvmx_l2c_tag_cn58xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:44;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:16;	/* Phys mem addr (33..18) */
-#else
-		uint64_t addr:16;	/* Phys mem addr (33..18) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:44;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:44,
+		__BITFIELD_FIELD(uint64_t V:1,		/* Line valid */
+		__BITFIELD_FIELD(uint64_t D:1,		/* Line dirty */
+		__BITFIELD_FIELD(uint64_t L:1,		/* Line locked */
+		__BITFIELD_FIELD(uint64_t U:1,		/* Use, LRU eviction */
+		__BITFIELD_FIELD(uint64_t addr:16,	/* Phys addr (33..18) */
+		;))))))
 	} cn58xx;
 	struct cvmx_l2c_tag_cn58xx cn56xx;	/* 2048 sets */
 	struct cvmx_l2c_tag_cn31xx cn52xx;	/* 512 sets */
@@ -629,8 +596,8 @@ static union __cvmx_l2c_tag __read_l2_tag(uint64_t assoc, uint64_t index)
 	union __cvmx_l2c_tag tag_val;
 	uint64_t dbg_addr = CVMX_L2C_DBG;
 	unsigned long flags;
-
 	union cvmx_l2c_dbg debug_val;
+
 	debug_val.u64 = 0;
 	/*
 	 * For low core count parts, the core number is always small
@@ -683,8 +650,8 @@ static union __cvmx_l2c_tag __read_l2_tag(uint64_t assoc, uint64_t index)
 union cvmx_l2c_tag cvmx_l2c_get_tag(uint32_t association, uint32_t index)
 {
 	union cvmx_l2c_tag tag;
-	tag.u64 = 0;
 
+	tag.u64 = 0;
 	if ((int)association >= cvmx_l2c_get_num_assoc()) {
 		cvmx_dprintf("ERROR: cvmx_l2c_get_tag association out of range\n");
 		return tag;
@@ -767,10 +734,12 @@ uint32_t cvmx_l2c_address_to_index(uint64_t addr)
 
 	if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
 		union cvmx_l2c_ctl l2c_ctl;
+
 		l2c_ctl.u64 = cvmx_read_csr(CVMX_L2C_CTL);
 		indxalias = !l2c_ctl.s.disidxalias;
 	} else {
 		union cvmx_l2c_cfg l2c_cfg;
+
 		l2c_cfg.u64 = cvmx_read_csr(CVMX_L2C_CFG);
 		indxalias = l2c_cfg.s.idxalias;
 	}
@@ -778,6 +747,7 @@ uint32_t cvmx_l2c_address_to_index(uint64_t addr)
 	if (indxalias) {
 		if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
 			uint32_t a_14_12 = (idx / (CVMX_L2C_MEMBANK_SELECT_SIZE/(1<<CVMX_L2C_IDX_ADDR_SHIFT))) & 0x7;
+
 			idx ^= idx / cvmx_l2c_get_num_sets();
 			idx ^= a_14_12;
 		} else {
@@ -801,6 +771,7 @@ int cvmx_l2c_get_cache_size_bytes(void)
 int cvmx_l2c_get_set_bits(void)
 {
 	int l2_set_bits;
+
 	if (OCTEON_IS_MODEL(OCTEON_CN56XX) || OCTEON_IS_MODEL(OCTEON_CN58XX))
 		l2_set_bits = 11;	/* 2048 sets */
 	else if (OCTEON_IS_MODEL(OCTEON_CN38XX) || OCTEON_IS_MODEL(OCTEON_CN63XX))
@@ -828,6 +799,7 @@ int cvmx_l2c_get_num_sets(void)
 int cvmx_l2c_get_num_assoc(void)
 {
 	int l2_assoc;
+
 	if (OCTEON_IS_MODEL(OCTEON_CN56XX) ||
 	    OCTEON_IS_MODEL(OCTEON_CN52XX) ||
 	    OCTEON_IS_MODEL(OCTEON_CN58XX) ||
@@ -869,16 +841,17 @@ int cvmx_l2c_get_num_assoc(void)
 		else if (mio_fus_dat3.s.l2c_crip == 1)
 			l2_assoc = 12;
 	} else {
-		union cvmx_l2d_fus3 val;
-		val.u64 = cvmx_read_csr(CVMX_L2D_FUS3);
+		uint64_t l2d_fus3;
+
+		l2d_fus3 = cvmx_read_csr(CVMX_L2D_FUS3);
 		/*
 		 * Using shifts here, as bit position names are
 		 * different for each model but they all mean the
 		 * same.
 		 */
-		if ((val.u64 >> 35) & 0x1)
+		if ((l2d_fus3 >> 35) & 0x1)
 			l2_assoc = l2_assoc >> 2;
-		else if ((val.u64 >> 34) & 0x1)
+		else if ((l2d_fus3 >> 34) & 0x1)
 			l2_assoc = l2_assoc >> 1;
 	}
 	return l2_assoc;
diff --git a/arch/mips/cavium-octeon/executive/octeon-model.c b/arch/mips/cavium-octeon/executive/octeon-model.c
index d08a2bce653c..341052387b49 100644
--- a/arch/mips/cavium-octeon/executive/octeon-model.c
+++ b/arch/mips/cavium-octeon/executive/octeon-model.c
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2010 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -63,16 +63,15 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 	char pass[4];
 	int clock_mhz;
 	const char *suffix;
-	union cvmx_l2d_fus3 fus3;
 	int num_cores;
 	union cvmx_mio_fus_dat2 fus_dat2;
 	union cvmx_mio_fus_dat3 fus_dat3;
 	char fuse_model[10];
 	uint32_t fuse_data = 0;
+	uint64_t l2d_fus3 = 0;
 
-	fus3.u64 = 0;
 	if (OCTEON_IS_MODEL(OCTEON_CN3XXX) || OCTEON_IS_MODEL(OCTEON_CN5XXX))
-		fus3.u64 = cvmx_read_csr(CVMX_L2D_FUS3);
+		l2d_fus3 = (cvmx_read_csr(CVMX_L2D_FUS3) >> 34) & 0x3;
 	fus_dat2.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT2);
 	fus_dat3.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT3);
 	num_cores = cvmx_octeon_num_cores();
@@ -192,7 +191,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 	/* Now figure out the family, the first two digits */
 	switch ((chip_id >> 8) & 0xff) {
 	case 0:		/* CN38XX, CN37XX or CN36XX */
-		if (fus3.cn38xx.crip_512k) {
+		if (l2d_fus3) {
 			/*
 			 * For some unknown reason, the 16 core one is
 			 * called 37 instead of 36.
@@ -223,7 +222,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 		}
 		break;
 	case 1:		/* CN31XX or CN3020 */
-		if ((chip_id & 0x10) || fus3.cn31xx.crip_128k)
+		if ((chip_id & 0x10) || l2d_fus3)
 			family = "30";
 		else
 			family = "31";
@@ -246,7 +245,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 	case 2:		/* CN3010 or CN3005 */
 		family = "30";
 		/* A chip with half cache is an 05 */
-		if (fus3.cn30xx.crip_64k)
+		if (l2d_fus3)
 			core_model = "05";
 		/*
 		 * This series of chips didn't follow the standard
@@ -267,7 +266,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 	case 3:		/* CN58XX */
 		family = "58";
 		/* Special case. 4 core, half cache (CP with half cache) */
-		if ((num_cores == 4) && fus3.cn58xx.crip_1024k && !strncmp(suffix, "CP", 2))
+		if ((num_cores == 4) && l2d_fus3 && !strncmp(suffix, "CP", 2))
 			core_model = "29";
 
 		/* Pass 1 uses different encodings for pass numbers */
@@ -290,7 +289,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 		break;
 	case 4:		/* CN57XX, CN56XX, CN55XX, CN54XX */
 		if (fus_dat2.cn56xx.raid_en) {
-			if (fus3.cn56xx.crip_1024k)
+			if (l2d_fus3)
 				family = "55";
 			else
 				family = "57";
@@ -309,7 +308,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 				if (fus_dat3.cn56xx.bar2_en)
 					suffix = "NSPB2";
 			}
-			if (fus3.cn56xx.crip_1024k)
+			if (l2d_fus3)
 				family = "54";
 			else
 				family = "56";
@@ -319,7 +318,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
 		family = "50";
 		break;
 	case 7:		/* CN52XX */
-		if (fus3.cn52xx.crip_256k)
+		if (l2d_fus3)
 			family = "51";
 		else
 			family = "52";
diff --git a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
index 10262cb6ff50..d045973ddb33 100644
--- a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2012 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -28,3140 +28,177 @@
 #ifndef __CVMX_L2C_DEFS_H__
 #define __CVMX_L2C_DEFS_H__
 
-#define CVMX_L2C_BIG_CTL (CVMX_ADD_IO_SEG(0x0001180080800030ull))
-#define CVMX_L2C_BST (CVMX_ADD_IO_SEG(0x00011800808007F8ull))
-#define CVMX_L2C_BST0 (CVMX_ADD_IO_SEG(0x00011800800007F8ull))
-#define CVMX_L2C_BST1 (CVMX_ADD_IO_SEG(0x00011800800007F0ull))
-#define CVMX_L2C_BST2 (CVMX_ADD_IO_SEG(0x00011800800007E8ull))
-#define CVMX_L2C_BST_MEMX(block_id) (CVMX_ADD_IO_SEG(0x0001180080C007F8ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_BST_TDTX(block_id) (CVMX_ADD_IO_SEG(0x0001180080A007F0ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_BST_TTGX(block_id) (CVMX_ADD_IO_SEG(0x0001180080A007F8ull) + ((block_id) & 3) * 0x40000ull)
+#include <uapi/asm/bitfield.h>
+
+#define CVMX_L2C_DBG (CVMX_ADD_IO_SEG(0x0001180080000030ull))
 #define CVMX_L2C_CFG (CVMX_ADD_IO_SEG(0x0001180080000000ull))
-#define CVMX_L2C_COP0_MAPX(offset) (CVMX_ADD_IO_SEG(0x0001180080940000ull) + ((offset) & 16383) * 8)
 #define CVMX_L2C_CTL (CVMX_ADD_IO_SEG(0x0001180080800000ull))
-#define CVMX_L2C_DBG (CVMX_ADD_IO_SEG(0x0001180080000030ull))
-#define CVMX_L2C_DUT (CVMX_ADD_IO_SEG(0x0001180080000050ull))
-#define CVMX_L2C_DUT_MAPX(offset) (CVMX_ADD_IO_SEG(0x0001180080E00000ull) + ((offset) & 8191) * 8)
-#define CVMX_L2C_ERR_TDTX(block_id) (CVMX_ADD_IO_SEG(0x0001180080A007E0ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_ERR_TTGX(block_id) (CVMX_ADD_IO_SEG(0x0001180080A007E8ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_ERR_VBFX(block_id) (CVMX_ADD_IO_SEG(0x0001180080C007F0ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_ERR_XMC (CVMX_ADD_IO_SEG(0x00011800808007D8ull))
-#define CVMX_L2C_GRPWRR0 (CVMX_ADD_IO_SEG(0x00011800800000C8ull))
-#define CVMX_L2C_GRPWRR1 (CVMX_ADD_IO_SEG(0x00011800800000D0ull))
-#define CVMX_L2C_INT_EN (CVMX_ADD_IO_SEG(0x0001180080000100ull))
-#define CVMX_L2C_INT_ENA (CVMX_ADD_IO_SEG(0x0001180080800020ull))
-#define CVMX_L2C_INT_REG (CVMX_ADD_IO_SEG(0x0001180080800018ull))
-#define CVMX_L2C_INT_STAT (CVMX_ADD_IO_SEG(0x00011800800000F8ull))
-#define CVMX_L2C_IOCX_PFC(block_id) (CVMX_ADD_IO_SEG(0x0001180080800420ull))
-#define CVMX_L2C_IORX_PFC(block_id) (CVMX_ADD_IO_SEG(0x0001180080800428ull))
 #define CVMX_L2C_LCKBASE (CVMX_ADD_IO_SEG(0x0001180080000058ull))
 #define CVMX_L2C_LCKOFF (CVMX_ADD_IO_SEG(0x0001180080000060ull))
-#define CVMX_L2C_LFB0 (CVMX_ADD_IO_SEG(0x0001180080000038ull))
-#define CVMX_L2C_LFB1 (CVMX_ADD_IO_SEG(0x0001180080000040ull))
-#define CVMX_L2C_LFB2 (CVMX_ADD_IO_SEG(0x0001180080000048ull))
-#define CVMX_L2C_LFB3 (CVMX_ADD_IO_SEG(0x00011800800000B8ull))
-#define CVMX_L2C_OOB (CVMX_ADD_IO_SEG(0x00011800800000D8ull))
-#define CVMX_L2C_OOB1 (CVMX_ADD_IO_SEG(0x00011800800000E0ull))
-#define CVMX_L2C_OOB2 (CVMX_ADD_IO_SEG(0x00011800800000E8ull))
-#define CVMX_L2C_OOB3 (CVMX_ADD_IO_SEG(0x00011800800000F0ull))
+#define CVMX_L2C_PFCTL (CVMX_ADD_IO_SEG(0x0001180080000090ull))
+#define CVMX_L2C_PFCX(offset) (CVMX_ADD_IO_SEG(0x0001180080000098ull) +	       \
+		((offset) & 3) * 8)
 #define CVMX_L2C_PFC0 CVMX_L2C_PFCX(0)
 #define CVMX_L2C_PFC1 CVMX_L2C_PFCX(1)
 #define CVMX_L2C_PFC2 CVMX_L2C_PFCX(2)
 #define CVMX_L2C_PFC3 CVMX_L2C_PFCX(3)
-#define CVMX_L2C_PFCTL (CVMX_ADD_IO_SEG(0x0001180080000090ull))
-#define CVMX_L2C_PFCX(offset) (CVMX_ADD_IO_SEG(0x0001180080000098ull) + ((offset) & 3) * 8)
-#define CVMX_L2C_PPGRP (CVMX_ADD_IO_SEG(0x00011800800000C0ull))
-#define CVMX_L2C_QOS_IOBX(offset) (CVMX_ADD_IO_SEG(0x0001180080880200ull) + ((offset) & 1) * 8)
-#define CVMX_L2C_QOS_PPX(offset) (CVMX_ADD_IO_SEG(0x0001180080880000ull) + ((offset) & 31) * 8)
-#define CVMX_L2C_QOS_WGT (CVMX_ADD_IO_SEG(0x0001180080800008ull))
-#define CVMX_L2C_RSCX_PFC(offset) (CVMX_ADD_IO_SEG(0x0001180080800410ull) + ((offset) & 3) * 64)
-#define CVMX_L2C_RSDX_PFC(offset) (CVMX_ADD_IO_SEG(0x0001180080800418ull) + ((offset) & 3) * 64)
 #define CVMX_L2C_SPAR0 (CVMX_ADD_IO_SEG(0x0001180080000068ull))
 #define CVMX_L2C_SPAR1 (CVMX_ADD_IO_SEG(0x0001180080000070ull))
 #define CVMX_L2C_SPAR2 (CVMX_ADD_IO_SEG(0x0001180080000078ull))
 #define CVMX_L2C_SPAR3 (CVMX_ADD_IO_SEG(0x0001180080000080ull))
 #define CVMX_L2C_SPAR4 (CVMX_ADD_IO_SEG(0x0001180080000088ull))
-#define CVMX_L2C_TADX_ECC0(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00018ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_ECC1(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00020ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_IEN(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00000ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_INT(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00028ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_PFC0(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00400ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_PFC1(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00408ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_PFC2(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00410ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_PFC3(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00418ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_PRF(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00008ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_TADX_TAG(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00010ull) + ((block_id) & 3) * 0x40000ull)
-#define CVMX_L2C_VER_ID (CVMX_ADD_IO_SEG(0x00011800808007E0ull))
-#define CVMX_L2C_VER_IOB (CVMX_ADD_IO_SEG(0x00011800808007F0ull))
-#define CVMX_L2C_VER_MSC (CVMX_ADD_IO_SEG(0x00011800808007D0ull))
-#define CVMX_L2C_VER_PP (CVMX_ADD_IO_SEG(0x00011800808007E8ull))
-#define CVMX_L2C_VIRTID_IOBX(offset) (CVMX_ADD_IO_SEG(0x00011800808C0200ull) + ((offset) & 1) * 8)
-#define CVMX_L2C_VIRTID_PPX(offset) (CVMX_ADD_IO_SEG(0x00011800808C0000ull) + ((offset) & 31) * 8)
-#define CVMX_L2C_VRT_CTL (CVMX_ADD_IO_SEG(0x0001180080800010ull))
-#define CVMX_L2C_VRT_MEMX(offset) (CVMX_ADD_IO_SEG(0x0001180080900000ull) + ((offset) & 1023) * 8)
-#define CVMX_L2C_WPAR_IOBX(offset) (CVMX_ADD_IO_SEG(0x0001180080840200ull) + ((offset) & 1) * 8)
-#define CVMX_L2C_WPAR_PPX(offset) (CVMX_ADD_IO_SEG(0x0001180080840000ull) + ((offset) & 31) * 8)
-#define CVMX_L2C_XMCX_PFC(offset) (CVMX_ADD_IO_SEG(0x0001180080800400ull) + ((offset) & 3) * 64)
-#define CVMX_L2C_XMC_CMD (CVMX_ADD_IO_SEG(0x0001180080800028ull))
-#define CVMX_L2C_XMDX_PFC(offset) (CVMX_ADD_IO_SEG(0x0001180080800408ull) + ((offset) & 3) * 64)
-
-union cvmx_l2c_big_ctl {
-	uint64_t u64;
-	struct cvmx_l2c_big_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t maxdram:4;
-		uint64_t reserved_1_3:3;
-		uint64_t disable:1;
-#else
-		uint64_t disable:1;
-		uint64_t reserved_1_3:3;
-		uint64_t maxdram:4;
-		uint64_t reserved_8_63:56;
-#endif
-	} s;
-	struct cvmx_l2c_big_ctl_s cn61xx;
-	struct cvmx_l2c_big_ctl_s cn63xx;
-	struct cvmx_l2c_big_ctl_s cn66xx;
-	struct cvmx_l2c_big_ctl_s cn68xx;
-	struct cvmx_l2c_big_ctl_s cn68xxp1;
-	struct cvmx_l2c_big_ctl_s cnf71xx;
-};
-
-union cvmx_l2c_bst {
-	uint64_t u64;
-	struct cvmx_l2c_bst_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dutfl:32;
-		uint64_t rbffl:4;
-		uint64_t xbffl:4;
-		uint64_t tdpfl:4;
-		uint64_t ioccmdfl:4;
-		uint64_t iocdatfl:4;
-		uint64_t dutresfl:4;
-		uint64_t vrtfl:4;
-		uint64_t tdffl:4;
-#else
-		uint64_t tdffl:4;
-		uint64_t vrtfl:4;
-		uint64_t dutresfl:4;
-		uint64_t iocdatfl:4;
-		uint64_t ioccmdfl:4;
-		uint64_t tdpfl:4;
-		uint64_t xbffl:4;
-		uint64_t rbffl:4;
-		uint64_t dutfl:32;
-#endif
-	} s;
-	struct cvmx_l2c_bst_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_36_63:28;
-		uint64_t dutfl:4;
-		uint64_t reserved_17_31:15;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t tdffl:1;
-#else
-		uint64_t tdffl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_17_31:15;
-		uint64_t dutfl:4;
-		uint64_t reserved_36_63:28;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_bst_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_38_63:26;
-		uint64_t dutfl:6;
-		uint64_t reserved_17_31:15;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t tdffl:1;
-#else
-		uint64_t tdffl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_17_31:15;
-		uint64_t dutfl:6;
-		uint64_t reserved_38_63:26;
-#endif
-	} cn63xx;
-	struct cvmx_l2c_bst_cn63xx cn63xxp1;
-	struct cvmx_l2c_bst_cn66xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_42_63:22;
-		uint64_t dutfl:10;
-		uint64_t reserved_17_31:15;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t tdffl:1;
-#else
-		uint64_t tdffl:1;
-		uint64_t reserved_1_3:3;
-		uint64_t vrtfl:1;
-		uint64_t reserved_5_7:3;
-		uint64_t dutresfl:1;
-		uint64_t reserved_9_11:3;
-		uint64_t iocdatfl:1;
-		uint64_t reserved_13_15:3;
-		uint64_t ioccmdfl:1;
-		uint64_t reserved_17_31:15;
-		uint64_t dutfl:10;
-		uint64_t reserved_42_63:22;
-#endif
-	} cn66xx;
-	struct cvmx_l2c_bst_s cn68xx;
-	struct cvmx_l2c_bst_s cn68xxp1;
-	struct cvmx_l2c_bst_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_bst0 {
-	uint64_t u64;
-	struct cvmx_l2c_bst0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t dtbnk:1;
-		uint64_t wlb_msk:4;
-		uint64_t dtcnt:13;
-		uint64_t dt:1;
-		uint64_t stin_msk:1;
-		uint64_t wlb_dat:4;
-#else
-		uint64_t wlb_dat:4;
-		uint64_t stin_msk:1;
-		uint64_t dt:1;
-		uint64_t dtcnt:13;
-		uint64_t wlb_msk:4;
-		uint64_t dtbnk:1;
-		uint64_t reserved_24_63:40;
-#endif
-	} s;
-	struct cvmx_l2c_bst0_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_23_63:41;
-		uint64_t wlb_msk:4;
-		uint64_t reserved_15_18:4;
-		uint64_t dtcnt:9;
-		uint64_t dt:1;
-		uint64_t reserved_4_4:1;
-		uint64_t wlb_dat:4;
-#else
-		uint64_t wlb_dat:4;
-		uint64_t reserved_4_4:1;
-		uint64_t dt:1;
-		uint64_t dtcnt:9;
-		uint64_t reserved_15_18:4;
-		uint64_t wlb_msk:4;
-		uint64_t reserved_23_63:41;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_bst0_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_23_63:41;
-		uint64_t wlb_msk:4;
-		uint64_t reserved_16_18:3;
-		uint64_t dtcnt:10;
-		uint64_t dt:1;
-		uint64_t stin_msk:1;
-		uint64_t wlb_dat:4;
-#else
-		uint64_t wlb_dat:4;
-		uint64_t stin_msk:1;
-		uint64_t dt:1;
-		uint64_t dtcnt:10;
-		uint64_t reserved_16_18:3;
-		uint64_t wlb_msk:4;
-		uint64_t reserved_23_63:41;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_bst0_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_19_63:45;
-		uint64_t dtcnt:13;
-		uint64_t dt:1;
-		uint64_t stin_msk:1;
-		uint64_t wlb_dat:4;
-#else
-		uint64_t wlb_dat:4;
-		uint64_t stin_msk:1;
-		uint64_t dt:1;
-		uint64_t dtcnt:13;
-		uint64_t reserved_19_63:45;
-#endif
-	} cn38xx;
-	struct cvmx_l2c_bst0_cn38xx cn38xxp2;
-	struct cvmx_l2c_bst0_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t dtbnk:1;
-		uint64_t wlb_msk:4;
-		uint64_t reserved_16_18:3;
-		uint64_t dtcnt:10;
-		uint64_t dt:1;
-		uint64_t stin_msk:1;
-		uint64_t wlb_dat:4;
-#else
-		uint64_t wlb_dat:4;
-		uint64_t stin_msk:1;
-		uint64_t dt:1;
-		uint64_t dtcnt:10;
-		uint64_t reserved_16_18:3;
-		uint64_t wlb_msk:4;
-		uint64_t dtbnk:1;
-		uint64_t reserved_24_63:40;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_bst0_cn50xx cn52xx;
-	struct cvmx_l2c_bst0_cn50xx cn52xxp1;
-	struct cvmx_l2c_bst0_s cn56xx;
-	struct cvmx_l2c_bst0_s cn56xxp1;
-	struct cvmx_l2c_bst0_s cn58xx;
-	struct cvmx_l2c_bst0_s cn58xxp1;
-};
-
-union cvmx_l2c_bst1 {
-	uint64_t u64;
-	struct cvmx_l2c_bst1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t l2t:9;
-#else
-		uint64_t l2t:9;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_bst1_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t vwdf:4;
-		uint64_t lrf:2;
-		uint64_t vab_vwcf:1;
-		uint64_t reserved_5_8:4;
-		uint64_t l2t:5;
-#else
-		uint64_t l2t:5;
-		uint64_t reserved_5_8:4;
-		uint64_t vab_vwcf:1;
-		uint64_t lrf:2;
-		uint64_t vwdf:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_bst1_cn30xx cn31xx;
-	struct cvmx_l2c_bst1_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t vwdf:4;
-		uint64_t lrf:2;
-		uint64_t vab_vwcf:1;
-		uint64_t l2t:9;
-#else
-		uint64_t l2t:9;
-		uint64_t vab_vwcf:1;
-		uint64_t lrf:2;
-		uint64_t vwdf:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn38xx;
-	struct cvmx_l2c_bst1_cn38xx cn38xxp2;
-	struct cvmx_l2c_bst1_cn38xx cn50xx;
-	struct cvmx_l2c_bst1_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_19_63:45;
-		uint64_t plc2:1;
-		uint64_t plc1:1;
-		uint64_t plc0:1;
-		uint64_t vwdf:4;
-		uint64_t reserved_11_11:1;
-		uint64_t ilc:1;
-		uint64_t vab_vwcf:1;
-		uint64_t l2t:9;
-#else
-		uint64_t l2t:9;
-		uint64_t vab_vwcf:1;
-		uint64_t ilc:1;
-		uint64_t reserved_11_11:1;
-		uint64_t vwdf:4;
-		uint64_t plc0:1;
-		uint64_t plc1:1;
-		uint64_t plc2:1;
-		uint64_t reserved_19_63:45;
-#endif
-	} cn52xx;
-	struct cvmx_l2c_bst1_cn52xx cn52xxp1;
-	struct cvmx_l2c_bst1_cn56xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t plc2:1;
-		uint64_t plc1:1;
-		uint64_t plc0:1;
-		uint64_t ilc:1;
-		uint64_t vwdf1:4;
-		uint64_t vwdf0:4;
-		uint64_t vab_vwcf1:1;
-		uint64_t reserved_10_10:1;
-		uint64_t vab_vwcf0:1;
-		uint64_t l2t:9;
-#else
-		uint64_t l2t:9;
-		uint64_t vab_vwcf0:1;
-		uint64_t reserved_10_10:1;
-		uint64_t vab_vwcf1:1;
-		uint64_t vwdf0:4;
-		uint64_t vwdf1:4;
-		uint64_t ilc:1;
-		uint64_t plc0:1;
-		uint64_t plc1:1;
-		uint64_t plc2:1;
-		uint64_t reserved_24_63:40;
-#endif
-	} cn56xx;
-	struct cvmx_l2c_bst1_cn56xx cn56xxp1;
-	struct cvmx_l2c_bst1_cn38xx cn58xx;
-	struct cvmx_l2c_bst1_cn38xx cn58xxp1;
-};
-
-union cvmx_l2c_bst2 {
-	uint64_t u64;
-	struct cvmx_l2c_bst2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mrb:4;
-		uint64_t reserved_4_11:8;
-		uint64_t ipcbst:1;
-		uint64_t picbst:1;
-		uint64_t xrdmsk:1;
-		uint64_t xrddat:1;
-#else
-		uint64_t xrddat:1;
-		uint64_t xrdmsk:1;
-		uint64_t picbst:1;
-		uint64_t ipcbst:1;
-		uint64_t reserved_4_11:8;
-		uint64_t mrb:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_l2c_bst2_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mrb:4;
-		uint64_t rmdf:4;
-		uint64_t reserved_4_7:4;
-		uint64_t ipcbst:1;
-		uint64_t reserved_2_2:1;
-		uint64_t xrdmsk:1;
-		uint64_t xrddat:1;
-#else
-		uint64_t xrddat:1;
-		uint64_t xrdmsk:1;
-		uint64_t reserved_2_2:1;
-		uint64_t ipcbst:1;
-		uint64_t reserved_4_7:4;
-		uint64_t rmdf:4;
-		uint64_t mrb:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_bst2_cn30xx cn31xx;
-	struct cvmx_l2c_bst2_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mrb:4;
-		uint64_t rmdf:4;
-		uint64_t rhdf:4;
-		uint64_t ipcbst:1;
-		uint64_t picbst:1;
-		uint64_t xrdmsk:1;
-		uint64_t xrddat:1;
-#else
-		uint64_t xrddat:1;
-		uint64_t xrdmsk:1;
-		uint64_t picbst:1;
-		uint64_t ipcbst:1;
-		uint64_t rhdf:4;
-		uint64_t rmdf:4;
-		uint64_t mrb:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn38xx;
-	struct cvmx_l2c_bst2_cn38xx cn38xxp2;
-	struct cvmx_l2c_bst2_cn30xx cn50xx;
-	struct cvmx_l2c_bst2_cn30xx cn52xx;
-	struct cvmx_l2c_bst2_cn30xx cn52xxp1;
-	struct cvmx_l2c_bst2_cn56xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mrb:4;
-		uint64_t rmdb:4;
-		uint64_t rhdb:4;
-		uint64_t ipcbst:1;
-		uint64_t picbst:1;
-		uint64_t xrdmsk:1;
-		uint64_t xrddat:1;
-#else
-		uint64_t xrddat:1;
-		uint64_t xrdmsk:1;
-		uint64_t picbst:1;
-		uint64_t ipcbst:1;
-		uint64_t rhdb:4;
-		uint64_t rmdb:4;
-		uint64_t mrb:4;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn56xx;
-	struct cvmx_l2c_bst2_cn56xx cn56xxp1;
-	struct cvmx_l2c_bst2_cn56xx cn58xx;
-	struct cvmx_l2c_bst2_cn56xx cn58xxp1;
-};
-
-union cvmx_l2c_bst_memx {
-	uint64_t u64;
-	struct cvmx_l2c_bst_memx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t start_bist:1;
-		uint64_t clear_bist:1;
-		uint64_t reserved_5_61:57;
-		uint64_t rdffl:1;
-		uint64_t vbffl:4;
-#else
-		uint64_t vbffl:4;
-		uint64_t rdffl:1;
-		uint64_t reserved_5_61:57;
-		uint64_t clear_bist:1;
-		uint64_t start_bist:1;
-#endif
-	} s;
-	struct cvmx_l2c_bst_memx_s cn61xx;
-	struct cvmx_l2c_bst_memx_s cn63xx;
-	struct cvmx_l2c_bst_memx_s cn63xxp1;
-	struct cvmx_l2c_bst_memx_s cn66xx;
-	struct cvmx_l2c_bst_memx_s cn68xx;
-	struct cvmx_l2c_bst_memx_s cn68xxp1;
-	struct cvmx_l2c_bst_memx_s cnf71xx;
-};
-
-union cvmx_l2c_bst_tdtx {
-	uint64_t u64;
-	struct cvmx_l2c_bst_tdtx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t fbfrspfl:8;
-		uint64_t sbffl:8;
-		uint64_t fbffl:8;
-		uint64_t l2dfl:8;
-#else
-		uint64_t l2dfl:8;
-		uint64_t fbffl:8;
-		uint64_t sbffl:8;
-		uint64_t fbfrspfl:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_bst_tdtx_s cn61xx;
-	struct cvmx_l2c_bst_tdtx_s cn63xx;
-	struct cvmx_l2c_bst_tdtx_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t sbffl:8;
-		uint64_t fbffl:8;
-		uint64_t l2dfl:8;
-#else
-		uint64_t l2dfl:8;
-		uint64_t fbffl:8;
-		uint64_t sbffl:8;
-		uint64_t reserved_24_63:40;
-#endif
-	} cn63xxp1;
-	struct cvmx_l2c_bst_tdtx_s cn66xx;
-	struct cvmx_l2c_bst_tdtx_s cn68xx;
-	struct cvmx_l2c_bst_tdtx_s cn68xxp1;
-	struct cvmx_l2c_bst_tdtx_s cnf71xx;
-};
+#define CVMX_L2C_TADX_PFCX(offset, block_id)				       \
+		(CVMX_ADD_IO_SEG(0x0001180080A00400ull) + (((offset) & 3) +    \
+		((block_id) & 7) * 0x8000ull) * 8)
+#define CVMX_L2C_TADX_PFC0(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00400ull) + \
+		((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_TADX_PFC1(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00408ull) + \
+		((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_TADX_PFC2(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00410ull) + \
+		((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_TADX_PFC3(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00418ull) + \
+		((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_TADX_PRF(offset) (CVMX_ADD_IO_SEG(0x0001180080A00008ull)    + \
+		((offset) & 7) * 0x40000ull)
+#define CVMX_L2C_TADX_TAG(block_id) (CVMX_ADD_IO_SEG(0x0001180080A00010ull)  + \
+		((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_WPAR_IOBX(offset) (CVMX_ADD_IO_SEG(0x0001180080840200ull)   + \
+		((offset) & 1) * 8)
+#define CVMX_L2C_WPAR_PPX(offset) (CVMX_ADD_IO_SEG(0x0001180080840000ull)    + \
+		((offset) & 31) * 8)
+#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull))
 
-union cvmx_l2c_bst_ttgx {
-	uint64_t u64;
-	struct cvmx_l2c_bst_ttgx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_17_63:47;
-		uint64_t lrufl:1;
-		uint64_t tagfl:16;
-#else
-		uint64_t tagfl:16;
-		uint64_t lrufl:1;
-		uint64_t reserved_17_63:47;
-#endif
-	} s;
-	struct cvmx_l2c_bst_ttgx_s cn61xx;
-	struct cvmx_l2c_bst_ttgx_s cn63xx;
-	struct cvmx_l2c_bst_ttgx_s cn63xxp1;
-	struct cvmx_l2c_bst_ttgx_s cn66xx;
-	struct cvmx_l2c_bst_ttgx_s cn68xx;
-	struct cvmx_l2c_bst_ttgx_s cn68xxp1;
-	struct cvmx_l2c_bst_ttgx_s cnf71xx;
-};
 
 union cvmx_l2c_cfg {
 	uint64_t u64;
 	struct cvmx_l2c_cfg_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t bstrun:1;
-		uint64_t lbist:1;
-		uint64_t xor_bank:1;
-		uint64_t dpres1:1;
-		uint64_t dpres0:1;
-		uint64_t dfill_dis:1;
-		uint64_t fpexp:4;
-		uint64_t fpempty:1;
-		uint64_t fpen:1;
-		uint64_t idxalias:1;
-		uint64_t mwf_crd:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t lrf_arb_mode:1;
-#else
-		uint64_t lrf_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t mwf_crd:4;
-		uint64_t idxalias:1;
-		uint64_t fpen:1;
-		uint64_t fpempty:1;
-		uint64_t fpexp:4;
-		uint64_t dfill_dis:1;
-		uint64_t dpres0:1;
-		uint64_t dpres1:1;
-		uint64_t xor_bank:1;
-		uint64_t lbist:1;
-		uint64_t bstrun:1;
-		uint64_t reserved_20_63:44;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_20_63:44,
+		__BITFIELD_FIELD(uint64_t bstrun:1,
+		__BITFIELD_FIELD(uint64_t lbist:1,
+		__BITFIELD_FIELD(uint64_t xor_bank:1,
+		__BITFIELD_FIELD(uint64_t dpres1:1,
+		__BITFIELD_FIELD(uint64_t dpres0:1,
+		__BITFIELD_FIELD(uint64_t dfill_dis:1,
+		__BITFIELD_FIELD(uint64_t fpexp:4,
+		__BITFIELD_FIELD(uint64_t fpempty:1,
+		__BITFIELD_FIELD(uint64_t fpen:1,
+		__BITFIELD_FIELD(uint64_t idxalias:1,
+		__BITFIELD_FIELD(uint64_t mwf_crd:4,
+		__BITFIELD_FIELD(uint64_t rsp_arb_mode:1,
+		__BITFIELD_FIELD(uint64_t rfb_arb_mode:1,
+		__BITFIELD_FIELD(uint64_t lrf_arb_mode:1,
+		;)))))))))))))))
 	} s;
-	struct cvmx_l2c_cfg_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t fpexp:4;
-		uint64_t fpempty:1;
-		uint64_t fpen:1;
-		uint64_t idxalias:1;
-		uint64_t mwf_crd:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t lrf_arb_mode:1;
-#else
-		uint64_t lrf_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t mwf_crd:4;
-		uint64_t idxalias:1;
-		uint64_t fpen:1;
-		uint64_t fpempty:1;
-		uint64_t fpexp:4;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_cfg_cn30xx cn31xx;
-	struct cvmx_l2c_cfg_cn30xx cn38xx;
-	struct cvmx_l2c_cfg_cn30xx cn38xxp2;
-	struct cvmx_l2c_cfg_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t bstrun:1;
-		uint64_t lbist:1;
-		uint64_t reserved_14_17:4;
-		uint64_t fpexp:4;
-		uint64_t fpempty:1;
-		uint64_t fpen:1;
-		uint64_t idxalias:1;
-		uint64_t mwf_crd:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t lrf_arb_mode:1;
-#else
-		uint64_t lrf_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t mwf_crd:4;
-		uint64_t idxalias:1;
-		uint64_t fpen:1;
-		uint64_t fpempty:1;
-		uint64_t fpexp:4;
-		uint64_t reserved_14_17:4;
-		uint64_t lbist:1;
-		uint64_t bstrun:1;
-		uint64_t reserved_20_63:44;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_cfg_cn50xx cn52xx;
-	struct cvmx_l2c_cfg_cn50xx cn52xxp1;
-	struct cvmx_l2c_cfg_s cn56xx;
-	struct cvmx_l2c_cfg_s cn56xxp1;
-	struct cvmx_l2c_cfg_cn58xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t bstrun:1;
-		uint64_t lbist:1;
-		uint64_t reserved_15_17:3;
-		uint64_t dfill_dis:1;
-		uint64_t fpexp:4;
-		uint64_t fpempty:1;
-		uint64_t fpen:1;
-		uint64_t idxalias:1;
-		uint64_t mwf_crd:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t lrf_arb_mode:1;
-#else
-		uint64_t lrf_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t mwf_crd:4;
-		uint64_t idxalias:1;
-		uint64_t fpen:1;
-		uint64_t fpempty:1;
-		uint64_t fpexp:4;
-		uint64_t dfill_dis:1;
-		uint64_t reserved_15_17:3;
-		uint64_t lbist:1;
-		uint64_t bstrun:1;
-		uint64_t reserved_20_63:44;
-#endif
-	} cn58xx;
-	struct cvmx_l2c_cfg_cn58xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_15_63:49;
-		uint64_t dfill_dis:1;
-		uint64_t fpexp:4;
-		uint64_t fpempty:1;
-		uint64_t fpen:1;
-		uint64_t idxalias:1;
-		uint64_t mwf_crd:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t lrf_arb_mode:1;
-#else
-		uint64_t lrf_arb_mode:1;
-		uint64_t rfb_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t mwf_crd:4;
-		uint64_t idxalias:1;
-		uint64_t fpen:1;
-		uint64_t fpempty:1;
-		uint64_t fpexp:4;
-		uint64_t dfill_dis:1;
-		uint64_t reserved_15_63:49;
-#endif
-	} cn58xxp1;
-};
-
-union cvmx_l2c_cop0_mapx {
-	uint64_t u64;
-	struct cvmx_l2c_cop0_mapx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_l2c_cop0_mapx_s cn61xx;
-	struct cvmx_l2c_cop0_mapx_s cn63xx;
-	struct cvmx_l2c_cop0_mapx_s cn63xxp1;
-	struct cvmx_l2c_cop0_mapx_s cn66xx;
-	struct cvmx_l2c_cop0_mapx_s cn68xx;
-	struct cvmx_l2c_cop0_mapx_s cn68xxp1;
-	struct cvmx_l2c_cop0_mapx_s cnf71xx;
 };
 
 union cvmx_l2c_ctl {
 	uint64_t u64;
 	struct cvmx_l2c_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_30_63:34;
-		uint64_t sepcmt:1;
-		uint64_t rdf_fast:1;
-		uint64_t disstgl2i:1;
-		uint64_t l2dfsbe:1;
-		uint64_t l2dfdbe:1;
-		uint64_t discclk:1;
-		uint64_t maxvab:4;
-		uint64_t maxlfb:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t ef_ena:1;
-		uint64_t ef_cnt:7;
-		uint64_t vab_thresh:4;
-		uint64_t disecc:1;
-		uint64_t disidxalias:1;
-#else
-		uint64_t disidxalias:1;
-		uint64_t disecc:1;
-		uint64_t vab_thresh:4;
-		uint64_t ef_cnt:7;
-		uint64_t ef_ena:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t maxlfb:4;
-		uint64_t maxvab:4;
-		uint64_t discclk:1;
-		uint64_t l2dfdbe:1;
-		uint64_t l2dfsbe:1;
-		uint64_t disstgl2i:1;
-		uint64_t rdf_fast:1;
-		uint64_t sepcmt:1;
-		uint64_t reserved_30_63:34;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_30_63:34,
+		__BITFIELD_FIELD(uint64_t sepcmt:1,
+		__BITFIELD_FIELD(uint64_t rdf_fast:1,
+		__BITFIELD_FIELD(uint64_t disstgl2i:1,
+		__BITFIELD_FIELD(uint64_t l2dfsbe:1,
+		__BITFIELD_FIELD(uint64_t l2dfdbe:1,
+		__BITFIELD_FIELD(uint64_t discclk:1,
+		__BITFIELD_FIELD(uint64_t maxvab:4,
+		__BITFIELD_FIELD(uint64_t maxlfb:4,
+		__BITFIELD_FIELD(uint64_t rsp_arb_mode:1,
+		__BITFIELD_FIELD(uint64_t xmc_arb_mode:1,
+		__BITFIELD_FIELD(uint64_t ef_ena:1,
+		__BITFIELD_FIELD(uint64_t ef_cnt:7,
+		__BITFIELD_FIELD(uint64_t vab_thresh:4,
+		__BITFIELD_FIELD(uint64_t disecc:1,
+		__BITFIELD_FIELD(uint64_t disidxalias:1,
+		;))))))))))))))))
 	} s;
-	struct cvmx_l2c_ctl_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_29_63:35;
-		uint64_t rdf_fast:1;
-		uint64_t disstgl2i:1;
-		uint64_t l2dfsbe:1;
-		uint64_t l2dfdbe:1;
-		uint64_t discclk:1;
-		uint64_t maxvab:4;
-		uint64_t maxlfb:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t ef_ena:1;
-		uint64_t ef_cnt:7;
-		uint64_t vab_thresh:4;
-		uint64_t disecc:1;
-		uint64_t disidxalias:1;
-#else
-		uint64_t disidxalias:1;
-		uint64_t disecc:1;
-		uint64_t vab_thresh:4;
-		uint64_t ef_cnt:7;
-		uint64_t ef_ena:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t maxlfb:4;
-		uint64_t maxvab:4;
-		uint64_t discclk:1;
-		uint64_t l2dfdbe:1;
-		uint64_t l2dfsbe:1;
-		uint64_t disstgl2i:1;
-		uint64_t rdf_fast:1;
-		uint64_t reserved_29_63:35;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_ctl_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t disstgl2i:1;
-		uint64_t l2dfsbe:1;
-		uint64_t l2dfdbe:1;
-		uint64_t discclk:1;
-		uint64_t maxvab:4;
-		uint64_t maxlfb:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t ef_ena:1;
-		uint64_t ef_cnt:7;
-		uint64_t vab_thresh:4;
-		uint64_t disecc:1;
-		uint64_t disidxalias:1;
-#else
-		uint64_t disidxalias:1;
-		uint64_t disecc:1;
-		uint64_t vab_thresh:4;
-		uint64_t ef_cnt:7;
-		uint64_t ef_ena:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t maxlfb:4;
-		uint64_t maxvab:4;
-		uint64_t discclk:1;
-		uint64_t l2dfdbe:1;
-		uint64_t l2dfsbe:1;
-		uint64_t disstgl2i:1;
-		uint64_t reserved_28_63:36;
-#endif
-	} cn63xx;
-	struct cvmx_l2c_ctl_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_25_63:39;
-		uint64_t discclk:1;
-		uint64_t maxvab:4;
-		uint64_t maxlfb:4;
-		uint64_t rsp_arb_mode:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t ef_ena:1;
-		uint64_t ef_cnt:7;
-		uint64_t vab_thresh:4;
-		uint64_t disecc:1;
-		uint64_t disidxalias:1;
-#else
-		uint64_t disidxalias:1;
-		uint64_t disecc:1;
-		uint64_t vab_thresh:4;
-		uint64_t ef_cnt:7;
-		uint64_t ef_ena:1;
-		uint64_t xmc_arb_mode:1;
-		uint64_t rsp_arb_mode:1;
-		uint64_t maxlfb:4;
-		uint64_t maxvab:4;
-		uint64_t discclk:1;
-		uint64_t reserved_25_63:39;
-#endif
-	} cn63xxp1;
-	struct cvmx_l2c_ctl_cn61xx cn66xx;
-	struct cvmx_l2c_ctl_s cn68xx;
-	struct cvmx_l2c_ctl_cn63xx cn68xxp1;
-	struct cvmx_l2c_ctl_cn61xx cnf71xx;
 };
 
 union cvmx_l2c_dbg {
 	uint64_t u64;
 	struct cvmx_l2c_dbg_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_15_63:49;
-		uint64_t lfb_enum:4;
-		uint64_t lfb_dmp:1;
-		uint64_t ppnum:4;
-		uint64_t set:3;
-		uint64_t finv:1;
-		uint64_t l2d:1;
-		uint64_t l2t:1;
-#else
-		uint64_t l2t:1;
-		uint64_t l2d:1;
-		uint64_t finv:1;
-		uint64_t set:3;
-		uint64_t ppnum:4;
-		uint64_t lfb_dmp:1;
-		uint64_t lfb_enum:4;
-		uint64_t reserved_15_63:49;
-#endif
-	} s;
-	struct cvmx_l2c_dbg_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_13_63:51;
-		uint64_t lfb_enum:2;
-		uint64_t lfb_dmp:1;
-		uint64_t reserved_7_9:3;
-		uint64_t ppnum:1;
-		uint64_t reserved_5_5:1;
-		uint64_t set:2;
-		uint64_t finv:1;
-		uint64_t l2d:1;
-		uint64_t l2t:1;
-#else
-		uint64_t l2t:1;
-		uint64_t l2d:1;
-		uint64_t finv:1;
-		uint64_t set:2;
-		uint64_t reserved_5_5:1;
-		uint64_t ppnum:1;
-		uint64_t reserved_7_9:3;
-		uint64_t lfb_dmp:1;
-		uint64_t lfb_enum:2;
-		uint64_t reserved_13_63:51;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_dbg_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t lfb_enum:3;
-		uint64_t lfb_dmp:1;
-		uint64_t reserved_7_9:3;
-		uint64_t ppnum:1;
-		uint64_t reserved_5_5:1;
-		uint64_t set:2;
-		uint64_t finv:1;
-		uint64_t l2d:1;
-		uint64_t l2t:1;
-#else
-		uint64_t l2t:1;
-		uint64_t l2d:1;
-		uint64_t finv:1;
-		uint64_t set:2;
-		uint64_t reserved_5_5:1;
-		uint64_t ppnum:1;
-		uint64_t reserved_7_9:3;
-		uint64_t lfb_dmp:1;
-		uint64_t lfb_enum:3;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_dbg_s cn38xx;
-	struct cvmx_l2c_dbg_s cn38xxp2;
-	struct cvmx_l2c_dbg_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t lfb_enum:3;
-		uint64_t lfb_dmp:1;
-		uint64_t reserved_7_9:3;
-		uint64_t ppnum:1;
-		uint64_t set:3;
-		uint64_t finv:1;
-		uint64_t l2d:1;
-		uint64_t l2t:1;
-#else
-		uint64_t l2t:1;
-		uint64_t l2d:1;
-		uint64_t finv:1;
-		uint64_t set:3;
-		uint64_t ppnum:1;
-		uint64_t reserved_7_9:3;
-		uint64_t lfb_dmp:1;
-		uint64_t lfb_enum:3;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_dbg_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t lfb_enum:3;
-		uint64_t lfb_dmp:1;
-		uint64_t reserved_8_9:2;
-		uint64_t ppnum:2;
-		uint64_t set:3;
-		uint64_t finv:1;
-		uint64_t l2d:1;
-		uint64_t l2t:1;
-#else
-		uint64_t l2t:1;
-		uint64_t l2d:1;
-		uint64_t finv:1;
-		uint64_t set:3;
-		uint64_t ppnum:2;
-		uint64_t reserved_8_9:2;
-		uint64_t lfb_dmp:1;
-		uint64_t lfb_enum:3;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn52xx;
-	struct cvmx_l2c_dbg_cn52xx cn52xxp1;
-	struct cvmx_l2c_dbg_s cn56xx;
-	struct cvmx_l2c_dbg_s cn56xxp1;
-	struct cvmx_l2c_dbg_s cn58xx;
-	struct cvmx_l2c_dbg_s cn58xxp1;
-};
-
-union cvmx_l2c_dut {
-	uint64_t u64;
-	struct cvmx_l2c_dut_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t dtena:1;
-		uint64_t reserved_30_30:1;
-		uint64_t dt_vld:1;
-		uint64_t dt_tag:29;
-#else
-		uint64_t dt_tag:29;
-		uint64_t dt_vld:1;
-		uint64_t reserved_30_30:1;
-		uint64_t dtena:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_dut_s cn30xx;
-	struct cvmx_l2c_dut_s cn31xx;
-	struct cvmx_l2c_dut_s cn38xx;
-	struct cvmx_l2c_dut_s cn38xxp2;
-	struct cvmx_l2c_dut_s cn50xx;
-	struct cvmx_l2c_dut_s cn52xx;
-	struct cvmx_l2c_dut_s cn52xxp1;
-	struct cvmx_l2c_dut_s cn56xx;
-	struct cvmx_l2c_dut_s cn56xxp1;
-	struct cvmx_l2c_dut_s cn58xx;
-	struct cvmx_l2c_dut_s cn58xxp1;
-};
-
-union cvmx_l2c_dut_mapx {
-	uint64_t u64;
-	struct cvmx_l2c_dut_mapx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_38_63:26;
-		uint64_t tag:28;
-		uint64_t reserved_1_9:9;
-		uint64_t valid:1;
-#else
-		uint64_t valid:1;
-		uint64_t reserved_1_9:9;
-		uint64_t tag:28;
-		uint64_t reserved_38_63:26;
-#endif
-	} s;
-	struct cvmx_l2c_dut_mapx_s cn61xx;
-	struct cvmx_l2c_dut_mapx_s cn63xx;
-	struct cvmx_l2c_dut_mapx_s cn63xxp1;
-	struct cvmx_l2c_dut_mapx_s cn66xx;
-	struct cvmx_l2c_dut_mapx_s cn68xx;
-	struct cvmx_l2c_dut_mapx_s cn68xxp1;
-	struct cvmx_l2c_dut_mapx_s cnf71xx;
-};
-
-union cvmx_l2c_err_tdtx {
-	uint64_t u64;
-	struct cvmx_l2c_err_tdtx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t vdbe:1;
-		uint64_t vsbe:1;
-		uint64_t syn:10;
-		uint64_t reserved_22_49:28;
-		uint64_t wayidx:18;
-		uint64_t reserved_2_3:2;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_3:2;
-		uint64_t wayidx:18;
-		uint64_t reserved_22_49:28;
-		uint64_t syn:10;
-		uint64_t vsbe:1;
-		uint64_t vdbe:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} s;
-	struct cvmx_l2c_err_tdtx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t vdbe:1;
-		uint64_t vsbe:1;
-		uint64_t syn:10;
-		uint64_t reserved_20_49:30;
-		uint64_t wayidx:16;
-		uint64_t reserved_2_3:2;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_3:2;
-		uint64_t wayidx:16;
-		uint64_t reserved_20_49:30;
-		uint64_t syn:10;
-		uint64_t vsbe:1;
-		uint64_t vdbe:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_err_tdtx_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t vdbe:1;
-		uint64_t vsbe:1;
-		uint64_t syn:10;
-		uint64_t reserved_21_49:29;
-		uint64_t wayidx:17;
-		uint64_t reserved_2_3:2;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_3:2;
-		uint64_t wayidx:17;
-		uint64_t reserved_21_49:29;
-		uint64_t syn:10;
-		uint64_t vsbe:1;
-		uint64_t vdbe:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} cn63xx;
-	struct cvmx_l2c_err_tdtx_cn63xx cn63xxp1;
-	struct cvmx_l2c_err_tdtx_cn63xx cn66xx;
-	struct cvmx_l2c_err_tdtx_s cn68xx;
-	struct cvmx_l2c_err_tdtx_s cn68xxp1;
-	struct cvmx_l2c_err_tdtx_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_err_ttgx {
-	uint64_t u64;
-	struct cvmx_l2c_err_ttgx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t noway:1;
-		uint64_t reserved_56_60:5;
-		uint64_t syn:6;
-		uint64_t reserved_22_49:28;
-		uint64_t wayidx:15;
-		uint64_t reserved_2_6:5;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_6:5;
-		uint64_t wayidx:15;
-		uint64_t reserved_22_49:28;
-		uint64_t syn:6;
-		uint64_t reserved_56_60:5;
-		uint64_t noway:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} s;
-	struct cvmx_l2c_err_ttgx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t noway:1;
-		uint64_t reserved_56_60:5;
-		uint64_t syn:6;
-		uint64_t reserved_20_49:30;
-		uint64_t wayidx:13;
-		uint64_t reserved_2_6:5;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_6:5;
-		uint64_t wayidx:13;
-		uint64_t reserved_20_49:30;
-		uint64_t syn:6;
-		uint64_t reserved_56_60:5;
-		uint64_t noway:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_err_ttgx_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t dbe:1;
-		uint64_t sbe:1;
-		uint64_t noway:1;
-		uint64_t reserved_56_60:5;
-		uint64_t syn:6;
-		uint64_t reserved_21_49:29;
-		uint64_t wayidx:14;
-		uint64_t reserved_2_6:5;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_6:5;
-		uint64_t wayidx:14;
-		uint64_t reserved_21_49:29;
-		uint64_t syn:6;
-		uint64_t reserved_56_60:5;
-		uint64_t noway:1;
-		uint64_t sbe:1;
-		uint64_t dbe:1;
-#endif
-	} cn63xx;
-	struct cvmx_l2c_err_ttgx_cn63xx cn63xxp1;
-	struct cvmx_l2c_err_ttgx_cn63xx cn66xx;
-	struct cvmx_l2c_err_ttgx_s cn68xx;
-	struct cvmx_l2c_err_ttgx_s cn68xxp1;
-	struct cvmx_l2c_err_ttgx_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_err_vbfx {
-	uint64_t u64;
-	struct cvmx_l2c_err_vbfx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t vdbe:1;
-		uint64_t vsbe:1;
-		uint64_t vsyn:10;
-		uint64_t reserved_2_49:48;
-		uint64_t type:2;
-#else
-		uint64_t type:2;
-		uint64_t reserved_2_49:48;
-		uint64_t vsyn:10;
-		uint64_t vsbe:1;
-		uint64_t vdbe:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} s;
-	struct cvmx_l2c_err_vbfx_s cn61xx;
-	struct cvmx_l2c_err_vbfx_s cn63xx;
-	struct cvmx_l2c_err_vbfx_s cn63xxp1;
-	struct cvmx_l2c_err_vbfx_s cn66xx;
-	struct cvmx_l2c_err_vbfx_s cn68xx;
-	struct cvmx_l2c_err_vbfx_s cn68xxp1;
-	struct cvmx_l2c_err_vbfx_s cnf71xx;
-};
-
-union cvmx_l2c_err_xmc {
-	uint64_t u64;
-	struct cvmx_l2c_err_xmc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t cmd:6;
-		uint64_t reserved_54_57:4;
-		uint64_t sid:6;
-		uint64_t reserved_38_47:10;
-		uint64_t addr:38;
-#else
-		uint64_t addr:38;
-		uint64_t reserved_38_47:10;
-		uint64_t sid:6;
-		uint64_t reserved_54_57:4;
-		uint64_t cmd:6;
-#endif
-	} s;
-	struct cvmx_l2c_err_xmc_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t cmd:6;
-		uint64_t reserved_52_57:6;
-		uint64_t sid:4;
-		uint64_t reserved_38_47:10;
-		uint64_t addr:38;
-#else
-		uint64_t addr:38;
-		uint64_t reserved_38_47:10;
-		uint64_t sid:4;
-		uint64_t reserved_52_57:6;
-		uint64_t cmd:6;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_err_xmc_cn61xx cn63xx;
-	struct cvmx_l2c_err_xmc_cn61xx cn63xxp1;
-	struct cvmx_l2c_err_xmc_cn66xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t cmd:6;
-		uint64_t reserved_53_57:5;
-		uint64_t sid:5;
-		uint64_t reserved_38_47:10;
-		uint64_t addr:38;
-#else
-		uint64_t addr:38;
-		uint64_t reserved_38_47:10;
-		uint64_t sid:5;
-		uint64_t reserved_53_57:5;
-		uint64_t cmd:6;
-#endif
-	} cn66xx;
-	struct cvmx_l2c_err_xmc_s cn68xx;
-	struct cvmx_l2c_err_xmc_s cn68xxp1;
-	struct cvmx_l2c_err_xmc_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_grpwrr0 {
-	uint64_t u64;
-	struct cvmx_l2c_grpwrr0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t plc1rmsk:32;
-		uint64_t plc0rmsk:32;
-#else
-		uint64_t plc0rmsk:32;
-		uint64_t plc1rmsk:32;
-#endif
-	} s;
-	struct cvmx_l2c_grpwrr0_s cn52xx;
-	struct cvmx_l2c_grpwrr0_s cn52xxp1;
-	struct cvmx_l2c_grpwrr0_s cn56xx;
-	struct cvmx_l2c_grpwrr0_s cn56xxp1;
-};
-
-union cvmx_l2c_grpwrr1 {
-	uint64_t u64;
-	struct cvmx_l2c_grpwrr1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t ilcrmsk:32;
-		uint64_t plc2rmsk:32;
-#else
-		uint64_t plc2rmsk:32;
-		uint64_t ilcrmsk:32;
-#endif
-	} s;
-	struct cvmx_l2c_grpwrr1_s cn52xx;
-	struct cvmx_l2c_grpwrr1_s cn52xxp1;
-	struct cvmx_l2c_grpwrr1_s cn56xx;
-	struct cvmx_l2c_grpwrr1_s cn56xxp1;
-};
-
-union cvmx_l2c_int_en {
-	uint64_t u64;
-	struct cvmx_l2c_int_en_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t lck2ena:1;
-		uint64_t lckena:1;
-		uint64_t l2ddeden:1;
-		uint64_t l2dsecen:1;
-		uint64_t l2tdeden:1;
-		uint64_t l2tsecen:1;
-		uint64_t oob3en:1;
-		uint64_t oob2en:1;
-		uint64_t oob1en:1;
-#else
-		uint64_t oob1en:1;
-		uint64_t oob2en:1;
-		uint64_t oob3en:1;
-		uint64_t l2tsecen:1;
-		uint64_t l2tdeden:1;
-		uint64_t l2dsecen:1;
-		uint64_t l2ddeden:1;
-		uint64_t lckena:1;
-		uint64_t lck2ena:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_int_en_s cn52xx;
-	struct cvmx_l2c_int_en_s cn52xxp1;
-	struct cvmx_l2c_int_en_s cn56xx;
-	struct cvmx_l2c_int_en_s cn56xxp1;
-};
-
-union cvmx_l2c_int_ena {
-	uint64_t u64;
-	struct cvmx_l2c_int_ena_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t bigrd:1;
-		uint64_t bigwr:1;
-		uint64_t vrtpe:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtwr:1;
-		uint64_t holewr:1;
-		uint64_t holerd:1;
-#else
-		uint64_t holerd:1;
-		uint64_t holewr:1;
-		uint64_t vrtwr:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtpe:1;
-		uint64_t bigwr:1;
-		uint64_t bigrd:1;
-		uint64_t reserved_8_63:56;
-#endif
-	} s;
-	struct cvmx_l2c_int_ena_s cn61xx;
-	struct cvmx_l2c_int_ena_s cn63xx;
-	struct cvmx_l2c_int_ena_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_6_63:58;
-		uint64_t vrtpe:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtwr:1;
-		uint64_t holewr:1;
-		uint64_t holerd:1;
-#else
-		uint64_t holerd:1;
-		uint64_t holewr:1;
-		uint64_t vrtwr:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtpe:1;
-		uint64_t reserved_6_63:58;
-#endif
-	} cn63xxp1;
-	struct cvmx_l2c_int_ena_s cn66xx;
-	struct cvmx_l2c_int_ena_s cn68xx;
-	struct cvmx_l2c_int_ena_s cn68xxp1;
-	struct cvmx_l2c_int_ena_s cnf71xx;
-};
-
-union cvmx_l2c_int_reg {
-	uint64_t u64;
-	struct cvmx_l2c_int_reg_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t tad3:1;
-		uint64_t tad2:1;
-		uint64_t tad1:1;
-		uint64_t tad0:1;
-		uint64_t reserved_8_15:8;
-		uint64_t bigrd:1;
-		uint64_t bigwr:1;
-		uint64_t vrtpe:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtwr:1;
-		uint64_t holewr:1;
-		uint64_t holerd:1;
-#else
-		uint64_t holerd:1;
-		uint64_t holewr:1;
-		uint64_t vrtwr:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtpe:1;
-		uint64_t bigwr:1;
-		uint64_t bigrd:1;
-		uint64_t reserved_8_15:8;
-		uint64_t tad0:1;
-		uint64_t tad1:1;
-		uint64_t tad2:1;
-		uint64_t tad3:1;
-		uint64_t reserved_20_63:44;
-#endif
-	} s;
-	struct cvmx_l2c_int_reg_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_17_63:47;
-		uint64_t tad0:1;
-		uint64_t reserved_8_15:8;
-		uint64_t bigrd:1;
-		uint64_t bigwr:1;
-		uint64_t vrtpe:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtwr:1;
-		uint64_t holewr:1;
-		uint64_t holerd:1;
-#else
-		uint64_t holerd:1;
-		uint64_t holewr:1;
-		uint64_t vrtwr:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtpe:1;
-		uint64_t bigwr:1;
-		uint64_t bigrd:1;
-		uint64_t reserved_8_15:8;
-		uint64_t tad0:1;
-		uint64_t reserved_17_63:47;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_int_reg_cn61xx cn63xx;
-	struct cvmx_l2c_int_reg_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_17_63:47;
-		uint64_t tad0:1;
-		uint64_t reserved_6_15:10;
-		uint64_t vrtpe:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtwr:1;
-		uint64_t holewr:1;
-		uint64_t holerd:1;
-#else
-		uint64_t holerd:1;
-		uint64_t holewr:1;
-		uint64_t vrtwr:1;
-		uint64_t vrtidrng:1;
-		uint64_t vrtadrng:1;
-		uint64_t vrtpe:1;
-		uint64_t reserved_6_15:10;
-		uint64_t tad0:1;
-		uint64_t reserved_17_63:47;
-#endif
-	} cn63xxp1;
-	struct cvmx_l2c_int_reg_cn61xx cn66xx;
-	struct cvmx_l2c_int_reg_s cn68xx;
-	struct cvmx_l2c_int_reg_s cn68xxp1;
-	struct cvmx_l2c_int_reg_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_int_stat {
-	uint64_t u64;
-	struct cvmx_l2c_int_stat_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t lck2:1;
-		uint64_t lck:1;
-		uint64_t l2dded:1;
-		uint64_t l2dsec:1;
-		uint64_t l2tded:1;
-		uint64_t l2tsec:1;
-		uint64_t oob3:1;
-		uint64_t oob2:1;
-		uint64_t oob1:1;
-#else
-		uint64_t oob1:1;
-		uint64_t oob2:1;
-		uint64_t oob3:1;
-		uint64_t l2tsec:1;
-		uint64_t l2tded:1;
-		uint64_t l2dsec:1;
-		uint64_t l2dded:1;
-		uint64_t lck:1;
-		uint64_t lck2:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_int_stat_s cn52xx;
-	struct cvmx_l2c_int_stat_s cn52xxp1;
-	struct cvmx_l2c_int_stat_s cn56xx;
-	struct cvmx_l2c_int_stat_s cn56xxp1;
-};
-
-union cvmx_l2c_iocx_pfc {
-	uint64_t u64;
-	struct cvmx_l2c_iocx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_iocx_pfc_s cn61xx;
-	struct cvmx_l2c_iocx_pfc_s cn63xx;
-	struct cvmx_l2c_iocx_pfc_s cn63xxp1;
-	struct cvmx_l2c_iocx_pfc_s cn66xx;
-	struct cvmx_l2c_iocx_pfc_s cn68xx;
-	struct cvmx_l2c_iocx_pfc_s cn68xxp1;
-	struct cvmx_l2c_iocx_pfc_s cnf71xx;
-};
-
-union cvmx_l2c_iorx_pfc {
-	uint64_t u64;
-	struct cvmx_l2c_iorx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_iorx_pfc_s cn61xx;
-	struct cvmx_l2c_iorx_pfc_s cn63xx;
-	struct cvmx_l2c_iorx_pfc_s cn63xxp1;
-	struct cvmx_l2c_iorx_pfc_s cn66xx;
-	struct cvmx_l2c_iorx_pfc_s cn68xx;
-	struct cvmx_l2c_iorx_pfc_s cn68xxp1;
-	struct cvmx_l2c_iorx_pfc_s cnf71xx;
-};
-
-union cvmx_l2c_lckbase {
-	uint64_t u64;
-	struct cvmx_l2c_lckbase_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_31_63:33;
-		uint64_t lck_base:27;
-		uint64_t reserved_1_3:3;
-		uint64_t lck_ena:1;
-#else
-		uint64_t lck_ena:1;
-		uint64_t reserved_1_3:3;
-		uint64_t lck_base:27;
-		uint64_t reserved_31_63:33;
-#endif
-	} s;
-	struct cvmx_l2c_lckbase_s cn30xx;
-	struct cvmx_l2c_lckbase_s cn31xx;
-	struct cvmx_l2c_lckbase_s cn38xx;
-	struct cvmx_l2c_lckbase_s cn38xxp2;
-	struct cvmx_l2c_lckbase_s cn50xx;
-	struct cvmx_l2c_lckbase_s cn52xx;
-	struct cvmx_l2c_lckbase_s cn52xxp1;
-	struct cvmx_l2c_lckbase_s cn56xx;
-	struct cvmx_l2c_lckbase_s cn56xxp1;
-	struct cvmx_l2c_lckbase_s cn58xx;
-	struct cvmx_l2c_lckbase_s cn58xxp1;
-};
-
-union cvmx_l2c_lckoff {
-	uint64_t u64;
-	struct cvmx_l2c_lckoff_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_10_63:54;
-		uint64_t lck_offset:10;
-#else
-		uint64_t lck_offset:10;
-		uint64_t reserved_10_63:54;
-#endif
-	} s;
-	struct cvmx_l2c_lckoff_s cn30xx;
-	struct cvmx_l2c_lckoff_s cn31xx;
-	struct cvmx_l2c_lckoff_s cn38xx;
-	struct cvmx_l2c_lckoff_s cn38xxp2;
-	struct cvmx_l2c_lckoff_s cn50xx;
-	struct cvmx_l2c_lckoff_s cn52xx;
-	struct cvmx_l2c_lckoff_s cn52xxp1;
-	struct cvmx_l2c_lckoff_s cn56xx;
-	struct cvmx_l2c_lckoff_s cn56xxp1;
-	struct cvmx_l2c_lckoff_s cn58xx;
-	struct cvmx_l2c_lckoff_s cn58xxp1;
-};
-
-union cvmx_l2c_lfb0 {
-	uint64_t u64;
-	struct cvmx_l2c_lfb0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t stcpnd:1;
-		uint64_t stpnd:1;
-		uint64_t stinv:1;
-		uint64_t stcfl:1;
-		uint64_t vam:1;
-		uint64_t inxt:4;
-		uint64_t itl:1;
-		uint64_t ihd:1;
-		uint64_t set:3;
-		uint64_t vabnum:4;
-		uint64_t sid:9;
-		uint64_t cmd:4;
-		uint64_t vld:1;
-#else
-		uint64_t vld:1;
-		uint64_t cmd:4;
-		uint64_t sid:9;
-		uint64_t vabnum:4;
-		uint64_t set:3;
-		uint64_t ihd:1;
-		uint64_t itl:1;
-		uint64_t inxt:4;
-		uint64_t vam:1;
-		uint64_t stcfl:1;
-		uint64_t stinv:1;
-		uint64_t stpnd:1;
-		uint64_t stcpnd:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_lfb0_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t stcpnd:1;
-		uint64_t stpnd:1;
-		uint64_t stinv:1;
-		uint64_t stcfl:1;
-		uint64_t vam:1;
-		uint64_t reserved_25_26:2;
-		uint64_t inxt:2;
-		uint64_t itl:1;
-		uint64_t ihd:1;
-		uint64_t reserved_20_20:1;
-		uint64_t set:2;
-		uint64_t reserved_16_17:2;
-		uint64_t vabnum:2;
-		uint64_t sid:9;
-		uint64_t cmd:4;
-		uint64_t vld:1;
-#else
-		uint64_t vld:1;
-		uint64_t cmd:4;
-		uint64_t sid:9;
-		uint64_t vabnum:2;
-		uint64_t reserved_16_17:2;
-		uint64_t set:2;
-		uint64_t reserved_20_20:1;
-		uint64_t ihd:1;
-		uint64_t itl:1;
-		uint64_t inxt:2;
-		uint64_t reserved_25_26:2;
-		uint64_t vam:1;
-		uint64_t stcfl:1;
-		uint64_t stinv:1;
-		uint64_t stpnd:1;
-		uint64_t stcpnd:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_lfb0_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t stcpnd:1;
-		uint64_t stpnd:1;
-		uint64_t stinv:1;
-		uint64_t stcfl:1;
-		uint64_t vam:1;
-		uint64_t reserved_26_26:1;
-		uint64_t inxt:3;
-		uint64_t itl:1;
-		uint64_t ihd:1;
-		uint64_t reserved_20_20:1;
-		uint64_t set:2;
-		uint64_t reserved_17_17:1;
-		uint64_t vabnum:3;
-		uint64_t sid:9;
-		uint64_t cmd:4;
-		uint64_t vld:1;
-#else
-		uint64_t vld:1;
-		uint64_t cmd:4;
-		uint64_t sid:9;
-		uint64_t vabnum:3;
-		uint64_t reserved_17_17:1;
-		uint64_t set:2;
-		uint64_t reserved_20_20:1;
-		uint64_t ihd:1;
-		uint64_t itl:1;
-		uint64_t inxt:3;
-		uint64_t reserved_26_26:1;
-		uint64_t vam:1;
-		uint64_t stcfl:1;
-		uint64_t stinv:1;
-		uint64_t stpnd:1;
-		uint64_t stcpnd:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_lfb0_s cn38xx;
-	struct cvmx_l2c_lfb0_s cn38xxp2;
-	struct cvmx_l2c_lfb0_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t stcpnd:1;
-		uint64_t stpnd:1;
-		uint64_t stinv:1;
-		uint64_t stcfl:1;
-		uint64_t vam:1;
-		uint64_t reserved_26_26:1;
-		uint64_t inxt:3;
-		uint64_t itl:1;
-		uint64_t ihd:1;
-		uint64_t set:3;
-		uint64_t reserved_17_17:1;
-		uint64_t vabnum:3;
-		uint64_t sid:9;
-		uint64_t cmd:4;
-		uint64_t vld:1;
-#else
-		uint64_t vld:1;
-		uint64_t cmd:4;
-		uint64_t sid:9;
-		uint64_t vabnum:3;
-		uint64_t reserved_17_17:1;
-		uint64_t set:3;
-		uint64_t ihd:1;
-		uint64_t itl:1;
-		uint64_t inxt:3;
-		uint64_t reserved_26_26:1;
-		uint64_t vam:1;
-		uint64_t stcfl:1;
-		uint64_t stinv:1;
-		uint64_t stpnd:1;
-		uint64_t stcpnd:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_lfb0_cn50xx cn52xx;
-	struct cvmx_l2c_lfb0_cn50xx cn52xxp1;
-	struct cvmx_l2c_lfb0_s cn56xx;
-	struct cvmx_l2c_lfb0_s cn56xxp1;
-	struct cvmx_l2c_lfb0_s cn58xx;
-	struct cvmx_l2c_lfb0_s cn58xxp1;
-};
-
-union cvmx_l2c_lfb1 {
-	uint64_t u64;
-	struct cvmx_l2c_lfb1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_19_63:45;
-		uint64_t dsgoing:1;
-		uint64_t bid:2;
-		uint64_t wtrsp:1;
-		uint64_t wtdw:1;
-		uint64_t wtdq:1;
-		uint64_t wtwhp:1;
-		uint64_t wtwhf:1;
-		uint64_t wtwrm:1;
-		uint64_t wtstm:1;
-		uint64_t wtrda:1;
-		uint64_t wtstdt:1;
-		uint64_t wtstrsp:1;
-		uint64_t wtstrsc:1;
-		uint64_t wtvtm:1;
-		uint64_t wtmfl:1;
-		uint64_t prbrty:1;
-		uint64_t wtprb:1;
-		uint64_t vld:1;
-#else
-		uint64_t vld:1;
-		uint64_t wtprb:1;
-		uint64_t prbrty:1;
-		uint64_t wtmfl:1;
-		uint64_t wtvtm:1;
-		uint64_t wtstrsc:1;
-		uint64_t wtstrsp:1;
-		uint64_t wtstdt:1;
-		uint64_t wtrda:1;
-		uint64_t wtstm:1;
-		uint64_t wtwrm:1;
-		uint64_t wtwhf:1;
-		uint64_t wtwhp:1;
-		uint64_t wtdq:1;
-		uint64_t wtdw:1;
-		uint64_t wtrsp:1;
-		uint64_t bid:2;
-		uint64_t dsgoing:1;
-		uint64_t reserved_19_63:45;
-#endif
-	} s;
-	struct cvmx_l2c_lfb1_s cn30xx;
-	struct cvmx_l2c_lfb1_s cn31xx;
-	struct cvmx_l2c_lfb1_s cn38xx;
-	struct cvmx_l2c_lfb1_s cn38xxp2;
-	struct cvmx_l2c_lfb1_s cn50xx;
-	struct cvmx_l2c_lfb1_s cn52xx;
-	struct cvmx_l2c_lfb1_s cn52xxp1;
-	struct cvmx_l2c_lfb1_s cn56xx;
-	struct cvmx_l2c_lfb1_s cn56xxp1;
-	struct cvmx_l2c_lfb1_s cn58xx;
-	struct cvmx_l2c_lfb1_s cn58xxp1;
-};
-
-union cvmx_l2c_lfb2 {
-	uint64_t u64;
-	struct cvmx_l2c_lfb2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_0_63:64;
-#else
-		uint64_t reserved_0_63:64;
-#endif
-	} s;
-	struct cvmx_l2c_lfb2_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_27_63:37;
-		uint64_t lfb_tag:19;
-		uint64_t lfb_idx:8;
-#else
-		uint64_t lfb_idx:8;
-		uint64_t lfb_tag:19;
-		uint64_t reserved_27_63:37;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_lfb2_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_27_63:37;
-		uint64_t lfb_tag:17;
-		uint64_t lfb_idx:10;
-#else
-		uint64_t lfb_idx:10;
-		uint64_t lfb_tag:17;
-		uint64_t reserved_27_63:37;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_lfb2_cn31xx cn38xx;
-	struct cvmx_l2c_lfb2_cn31xx cn38xxp2;
-	struct cvmx_l2c_lfb2_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_27_63:37;
-		uint64_t lfb_tag:20;
-		uint64_t lfb_idx:7;
-#else
-		uint64_t lfb_idx:7;
-		uint64_t lfb_tag:20;
-		uint64_t reserved_27_63:37;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_lfb2_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_27_63:37;
-		uint64_t lfb_tag:18;
-		uint64_t lfb_idx:9;
-#else
-		uint64_t lfb_idx:9;
-		uint64_t lfb_tag:18;
-		uint64_t reserved_27_63:37;
-#endif
-	} cn52xx;
-	struct cvmx_l2c_lfb2_cn52xx cn52xxp1;
-	struct cvmx_l2c_lfb2_cn56xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_27_63:37;
-		uint64_t lfb_tag:16;
-		uint64_t lfb_idx:11;
-#else
-		uint64_t lfb_idx:11;
-		uint64_t lfb_tag:16;
-		uint64_t reserved_27_63:37;
-#endif
-	} cn56xx;
-	struct cvmx_l2c_lfb2_cn56xx cn56xxp1;
-	struct cvmx_l2c_lfb2_cn56xx cn58xx;
-	struct cvmx_l2c_lfb2_cn56xx cn58xxp1;
-};
-
-union cvmx_l2c_lfb3 {
-	uint64_t u64;
-	struct cvmx_l2c_lfb3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_5_63:59;
-		uint64_t stpartdis:1;
-		uint64_t lfb_hwm:4;
-#else
-		uint64_t lfb_hwm:4;
-		uint64_t stpartdis:1;
-		uint64_t reserved_5_63:59;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_15_63:49,
+		__BITFIELD_FIELD(uint64_t lfb_enum:4,
+		__BITFIELD_FIELD(uint64_t lfb_dmp:1,
+		__BITFIELD_FIELD(uint64_t ppnum:4,
+		__BITFIELD_FIELD(uint64_t set:3,
+		__BITFIELD_FIELD(uint64_t finv:1,
+		__BITFIELD_FIELD(uint64_t l2d:1,
+		__BITFIELD_FIELD(uint64_t l2t:1,
+		;))))))))
 	} s;
-	struct cvmx_l2c_lfb3_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_5_63:59;
-		uint64_t stpartdis:1;
-		uint64_t reserved_2_3:2;
-		uint64_t lfb_hwm:2;
-#else
-		uint64_t lfb_hwm:2;
-		uint64_t reserved_2_3:2;
-		uint64_t stpartdis:1;
-		uint64_t reserved_5_63:59;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_lfb3_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_5_63:59;
-		uint64_t stpartdis:1;
-		uint64_t reserved_3_3:1;
-		uint64_t lfb_hwm:3;
-#else
-		uint64_t lfb_hwm:3;
-		uint64_t reserved_3_3:1;
-		uint64_t stpartdis:1;
-		uint64_t reserved_5_63:59;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_lfb3_s cn38xx;
-	struct cvmx_l2c_lfb3_s cn38xxp2;
-	struct cvmx_l2c_lfb3_cn31xx cn50xx;
-	struct cvmx_l2c_lfb3_cn31xx cn52xx;
-	struct cvmx_l2c_lfb3_cn31xx cn52xxp1;
-	struct cvmx_l2c_lfb3_s cn56xx;
-	struct cvmx_l2c_lfb3_s cn56xxp1;
-	struct cvmx_l2c_lfb3_s cn58xx;
-	struct cvmx_l2c_lfb3_s cn58xxp1;
-};
-
-union cvmx_l2c_oob {
-	uint64_t u64;
-	struct cvmx_l2c_oob_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_2_63:62;
-		uint64_t dwbena:1;
-		uint64_t stena:1;
-#else
-		uint64_t stena:1;
-		uint64_t dwbena:1;
-		uint64_t reserved_2_63:62;
-#endif
-	} s;
-	struct cvmx_l2c_oob_s cn52xx;
-	struct cvmx_l2c_oob_s cn52xxp1;
-	struct cvmx_l2c_oob_s cn56xx;
-	struct cvmx_l2c_oob_s cn56xxp1;
-};
-
-union cvmx_l2c_oob1 {
-	uint64_t u64;
-	struct cvmx_l2c_oob1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t fadr:27;
-		uint64_t fsrc:1;
-		uint64_t reserved_34_35:2;
-		uint64_t sadr:14;
-		uint64_t reserved_14_19:6;
-		uint64_t size:14;
-#else
-		uint64_t size:14;
-		uint64_t reserved_14_19:6;
-		uint64_t sadr:14;
-		uint64_t reserved_34_35:2;
-		uint64_t fsrc:1;
-		uint64_t fadr:27;
-#endif
-	} s;
-	struct cvmx_l2c_oob1_s cn52xx;
-	struct cvmx_l2c_oob1_s cn52xxp1;
-	struct cvmx_l2c_oob1_s cn56xx;
-	struct cvmx_l2c_oob1_s cn56xxp1;
-};
-
-union cvmx_l2c_oob2 {
-	uint64_t u64;
-	struct cvmx_l2c_oob2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t fadr:27;
-		uint64_t fsrc:1;
-		uint64_t reserved_34_35:2;
-		uint64_t sadr:14;
-		uint64_t reserved_14_19:6;
-		uint64_t size:14;
-#else
-		uint64_t size:14;
-		uint64_t reserved_14_19:6;
-		uint64_t sadr:14;
-		uint64_t reserved_34_35:2;
-		uint64_t fsrc:1;
-		uint64_t fadr:27;
-#endif
-	} s;
-	struct cvmx_l2c_oob2_s cn52xx;
-	struct cvmx_l2c_oob2_s cn52xxp1;
-	struct cvmx_l2c_oob2_s cn56xx;
-	struct cvmx_l2c_oob2_s cn56xxp1;
-};
-
-union cvmx_l2c_oob3 {
-	uint64_t u64;
-	struct cvmx_l2c_oob3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t fadr:27;
-		uint64_t fsrc:1;
-		uint64_t reserved_34_35:2;
-		uint64_t sadr:14;
-		uint64_t reserved_14_19:6;
-		uint64_t size:14;
-#else
-		uint64_t size:14;
-		uint64_t reserved_14_19:6;
-		uint64_t sadr:14;
-		uint64_t reserved_34_35:2;
-		uint64_t fsrc:1;
-		uint64_t fadr:27;
-#endif
-	} s;
-	struct cvmx_l2c_oob3_s cn52xx;
-	struct cvmx_l2c_oob3_s cn52xxp1;
-	struct cvmx_l2c_oob3_s cn56xx;
-	struct cvmx_l2c_oob3_s cn56xxp1;
-};
-
-union cvmx_l2c_pfcx {
-	uint64_t u64;
-	struct cvmx_l2c_pfcx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_36_63:28;
-		uint64_t pfcnt0:36;
-#else
-		uint64_t pfcnt0:36;
-		uint64_t reserved_36_63:28;
-#endif
-	} s;
-	struct cvmx_l2c_pfcx_s cn30xx;
-	struct cvmx_l2c_pfcx_s cn31xx;
-	struct cvmx_l2c_pfcx_s cn38xx;
-	struct cvmx_l2c_pfcx_s cn38xxp2;
-	struct cvmx_l2c_pfcx_s cn50xx;
-	struct cvmx_l2c_pfcx_s cn52xx;
-	struct cvmx_l2c_pfcx_s cn52xxp1;
-	struct cvmx_l2c_pfcx_s cn56xx;
-	struct cvmx_l2c_pfcx_s cn56xxp1;
-	struct cvmx_l2c_pfcx_s cn58xx;
-	struct cvmx_l2c_pfcx_s cn58xxp1;
 };
 
 union cvmx_l2c_pfctl {
 	uint64_t u64;
 	struct cvmx_l2c_pfctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_36_63:28;
-		uint64_t cnt3rdclr:1;
-		uint64_t cnt2rdclr:1;
-		uint64_t cnt1rdclr:1;
-		uint64_t cnt0rdclr:1;
-		uint64_t cnt3ena:1;
-		uint64_t cnt3clr:1;
-		uint64_t cnt3sel:6;
-		uint64_t cnt2ena:1;
-		uint64_t cnt2clr:1;
-		uint64_t cnt2sel:6;
-		uint64_t cnt1ena:1;
-		uint64_t cnt1clr:1;
-		uint64_t cnt1sel:6;
-		uint64_t cnt0ena:1;
-		uint64_t cnt0clr:1;
-		uint64_t cnt0sel:6;
-#else
-		uint64_t cnt0sel:6;
-		uint64_t cnt0clr:1;
-		uint64_t cnt0ena:1;
-		uint64_t cnt1sel:6;
-		uint64_t cnt1clr:1;
-		uint64_t cnt1ena:1;
-		uint64_t cnt2sel:6;
-		uint64_t cnt2clr:1;
-		uint64_t cnt2ena:1;
-		uint64_t cnt3sel:6;
-		uint64_t cnt3clr:1;
-		uint64_t cnt3ena:1;
-		uint64_t cnt0rdclr:1;
-		uint64_t cnt1rdclr:1;
-		uint64_t cnt2rdclr:1;
-		uint64_t cnt3rdclr:1;
-		uint64_t reserved_36_63:28;
-#endif
-	} s;
-	struct cvmx_l2c_pfctl_s cn30xx;
-	struct cvmx_l2c_pfctl_s cn31xx;
-	struct cvmx_l2c_pfctl_s cn38xx;
-	struct cvmx_l2c_pfctl_s cn38xxp2;
-	struct cvmx_l2c_pfctl_s cn50xx;
-	struct cvmx_l2c_pfctl_s cn52xx;
-	struct cvmx_l2c_pfctl_s cn52xxp1;
-	struct cvmx_l2c_pfctl_s cn56xx;
-	struct cvmx_l2c_pfctl_s cn56xxp1;
-	struct cvmx_l2c_pfctl_s cn58xx;
-	struct cvmx_l2c_pfctl_s cn58xxp1;
-};
-
-union cvmx_l2c_ppgrp {
-	uint64_t u64;
-	struct cvmx_l2c_ppgrp_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t pp11grp:2;
-		uint64_t pp10grp:2;
-		uint64_t pp9grp:2;
-		uint64_t pp8grp:2;
-		uint64_t pp7grp:2;
-		uint64_t pp6grp:2;
-		uint64_t pp5grp:2;
-		uint64_t pp4grp:2;
-		uint64_t pp3grp:2;
-		uint64_t pp2grp:2;
-		uint64_t pp1grp:2;
-		uint64_t pp0grp:2;
-#else
-		uint64_t pp0grp:2;
-		uint64_t pp1grp:2;
-		uint64_t pp2grp:2;
-		uint64_t pp3grp:2;
-		uint64_t pp4grp:2;
-		uint64_t pp5grp:2;
-		uint64_t pp6grp:2;
-		uint64_t pp7grp:2;
-		uint64_t pp8grp:2;
-		uint64_t pp9grp:2;
-		uint64_t pp10grp:2;
-		uint64_t pp11grp:2;
-		uint64_t reserved_24_63:40;
-#endif
-	} s;
-	struct cvmx_l2c_ppgrp_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t pp3grp:2;
-		uint64_t pp2grp:2;
-		uint64_t pp1grp:2;
-		uint64_t pp0grp:2;
-#else
-		uint64_t pp0grp:2;
-		uint64_t pp1grp:2;
-		uint64_t pp2grp:2;
-		uint64_t pp3grp:2;
-		uint64_t reserved_8_63:56;
-#endif
-	} cn52xx;
-	struct cvmx_l2c_ppgrp_cn52xx cn52xxp1;
-	struct cvmx_l2c_ppgrp_s cn56xx;
-	struct cvmx_l2c_ppgrp_s cn56xxp1;
-};
-
-union cvmx_l2c_qos_iobx {
-	uint64_t u64;
-	struct cvmx_l2c_qos_iobx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_7_63:57;
-		uint64_t dwblvl:3;
-		uint64_t reserved_3_3:1;
-		uint64_t lvl:3;
-#else
-		uint64_t lvl:3;
-		uint64_t reserved_3_3:1;
-		uint64_t dwblvl:3;
-		uint64_t reserved_7_63:57;
-#endif
-	} s;
-	struct cvmx_l2c_qos_iobx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_6_63:58;
-		uint64_t dwblvl:2;
-		uint64_t reserved_2_3:2;
-		uint64_t lvl:2;
-#else
-		uint64_t lvl:2;
-		uint64_t reserved_2_3:2;
-		uint64_t dwblvl:2;
-		uint64_t reserved_6_63:58;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_qos_iobx_cn61xx cn63xx;
-	struct cvmx_l2c_qos_iobx_cn61xx cn63xxp1;
-	struct cvmx_l2c_qos_iobx_cn61xx cn66xx;
-	struct cvmx_l2c_qos_iobx_s cn68xx;
-	struct cvmx_l2c_qos_iobx_s cn68xxp1;
-	struct cvmx_l2c_qos_iobx_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_qos_ppx {
-	uint64_t u64;
-	struct cvmx_l2c_qos_ppx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_3_63:61;
-		uint64_t lvl:3;
-#else
-		uint64_t lvl:3;
-		uint64_t reserved_3_63:61;
-#endif
-	} s;
-	struct cvmx_l2c_qos_ppx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_2_63:62;
-		uint64_t lvl:2;
-#else
-		uint64_t lvl:2;
-		uint64_t reserved_2_63:62;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_qos_ppx_cn61xx cn63xx;
-	struct cvmx_l2c_qos_ppx_cn61xx cn63xxp1;
-	struct cvmx_l2c_qos_ppx_cn61xx cn66xx;
-	struct cvmx_l2c_qos_ppx_s cn68xx;
-	struct cvmx_l2c_qos_ppx_s cn68xxp1;
-	struct cvmx_l2c_qos_ppx_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_qos_wgt {
-	uint64_t u64;
-	struct cvmx_l2c_qos_wgt_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t wgt7:8;
-		uint64_t wgt6:8;
-		uint64_t wgt5:8;
-		uint64_t wgt4:8;
-		uint64_t wgt3:8;
-		uint64_t wgt2:8;
-		uint64_t wgt1:8;
-		uint64_t wgt0:8;
-#else
-		uint64_t wgt0:8;
-		uint64_t wgt1:8;
-		uint64_t wgt2:8;
-		uint64_t wgt3:8;
-		uint64_t wgt4:8;
-		uint64_t wgt5:8;
-		uint64_t wgt6:8;
-		uint64_t wgt7:8;
-#endif
-	} s;
-	struct cvmx_l2c_qos_wgt_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t wgt3:8;
-		uint64_t wgt2:8;
-		uint64_t wgt1:8;
-		uint64_t wgt0:8;
-#else
-		uint64_t wgt0:8;
-		uint64_t wgt1:8;
-		uint64_t wgt2:8;
-		uint64_t wgt3:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_qos_wgt_cn61xx cn63xx;
-	struct cvmx_l2c_qos_wgt_cn61xx cn63xxp1;
-	struct cvmx_l2c_qos_wgt_cn61xx cn66xx;
-	struct cvmx_l2c_qos_wgt_s cn68xx;
-	struct cvmx_l2c_qos_wgt_s cn68xxp1;
-	struct cvmx_l2c_qos_wgt_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_rscx_pfc {
-	uint64_t u64;
-	struct cvmx_l2c_rscx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_rscx_pfc_s cn61xx;
-	struct cvmx_l2c_rscx_pfc_s cn63xx;
-	struct cvmx_l2c_rscx_pfc_s cn63xxp1;
-	struct cvmx_l2c_rscx_pfc_s cn66xx;
-	struct cvmx_l2c_rscx_pfc_s cn68xx;
-	struct cvmx_l2c_rscx_pfc_s cn68xxp1;
-	struct cvmx_l2c_rscx_pfc_s cnf71xx;
-};
-
-union cvmx_l2c_rsdx_pfc {
-	uint64_t u64;
-	struct cvmx_l2c_rsdx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_rsdx_pfc_s cn61xx;
-	struct cvmx_l2c_rsdx_pfc_s cn63xx;
-	struct cvmx_l2c_rsdx_pfc_s cn63xxp1;
-	struct cvmx_l2c_rsdx_pfc_s cn66xx;
-	struct cvmx_l2c_rsdx_pfc_s cn68xx;
-	struct cvmx_l2c_rsdx_pfc_s cn68xxp1;
-	struct cvmx_l2c_rsdx_pfc_s cnf71xx;
-};
-
-union cvmx_l2c_spar0 {
-	uint64_t u64;
-	struct cvmx_l2c_spar0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t umsk3:8;
-		uint64_t umsk2:8;
-		uint64_t umsk1:8;
-		uint64_t umsk0:8;
-#else
-		uint64_t umsk0:8;
-		uint64_t umsk1:8;
-		uint64_t umsk2:8;
-		uint64_t umsk3:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_spar0_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_4_63:60;
-		uint64_t umsk0:4;
-#else
-		uint64_t umsk0:4;
-		uint64_t reserved_4_63:60;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_spar0_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_12_63:52;
-		uint64_t umsk1:4;
-		uint64_t reserved_4_7:4;
-		uint64_t umsk0:4;
-#else
-		uint64_t umsk0:4;
-		uint64_t reserved_4_7:4;
-		uint64_t umsk1:4;
-		uint64_t reserved_12_63:52;
-#endif
-	} cn31xx;
-	struct cvmx_l2c_spar0_s cn38xx;
-	struct cvmx_l2c_spar0_s cn38xxp2;
-	struct cvmx_l2c_spar0_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t umsk1:8;
-		uint64_t umsk0:8;
-#else
-		uint64_t umsk0:8;
-		uint64_t umsk1:8;
-		uint64_t reserved_16_63:48;
-#endif
-	} cn50xx;
-	struct cvmx_l2c_spar0_s cn52xx;
-	struct cvmx_l2c_spar0_s cn52xxp1;
-	struct cvmx_l2c_spar0_s cn56xx;
-	struct cvmx_l2c_spar0_s cn56xxp1;
-	struct cvmx_l2c_spar0_s cn58xx;
-	struct cvmx_l2c_spar0_s cn58xxp1;
-};
-
-union cvmx_l2c_spar1 {
-	uint64_t u64;
-	struct cvmx_l2c_spar1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t umsk7:8;
-		uint64_t umsk6:8;
-		uint64_t umsk5:8;
-		uint64_t umsk4:8;
-#else
-		uint64_t umsk4:8;
-		uint64_t umsk5:8;
-		uint64_t umsk6:8;
-		uint64_t umsk7:8;
-		uint64_t reserved_32_63:32;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_36_63:28,
+		__BITFIELD_FIELD(uint64_t cnt3rdclr:1,
+		__BITFIELD_FIELD(uint64_t cnt2rdclr:1,
+		__BITFIELD_FIELD(uint64_t cnt1rdclr:1,
+		__BITFIELD_FIELD(uint64_t cnt0rdclr:1,
+		__BITFIELD_FIELD(uint64_t cnt3ena:1,
+		__BITFIELD_FIELD(uint64_t cnt3clr:1,
+		__BITFIELD_FIELD(uint64_t cnt3sel:6,
+		__BITFIELD_FIELD(uint64_t cnt2ena:1,
+		__BITFIELD_FIELD(uint64_t cnt2clr:1,
+		__BITFIELD_FIELD(uint64_t cnt2sel:6,
+		__BITFIELD_FIELD(uint64_t cnt1ena:1,
+		__BITFIELD_FIELD(uint64_t cnt1clr:1,
+		__BITFIELD_FIELD(uint64_t cnt1sel:6,
+		__BITFIELD_FIELD(uint64_t cnt0ena:1,
+		__BITFIELD_FIELD(uint64_t cnt0clr:1,
+		__BITFIELD_FIELD(uint64_t cnt0sel:6,
+		;)))))))))))))))))
 	} s;
-	struct cvmx_l2c_spar1_s cn38xx;
-	struct cvmx_l2c_spar1_s cn38xxp2;
-	struct cvmx_l2c_spar1_s cn56xx;
-	struct cvmx_l2c_spar1_s cn56xxp1;
-	struct cvmx_l2c_spar1_s cn58xx;
-	struct cvmx_l2c_spar1_s cn58xxp1;
-};
-
-union cvmx_l2c_spar2 {
-	uint64_t u64;
-	struct cvmx_l2c_spar2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t umsk11:8;
-		uint64_t umsk10:8;
-		uint64_t umsk9:8;
-		uint64_t umsk8:8;
-#else
-		uint64_t umsk8:8;
-		uint64_t umsk9:8;
-		uint64_t umsk10:8;
-		uint64_t umsk11:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_spar2_s cn38xx;
-	struct cvmx_l2c_spar2_s cn38xxp2;
-	struct cvmx_l2c_spar2_s cn56xx;
-	struct cvmx_l2c_spar2_s cn56xxp1;
-	struct cvmx_l2c_spar2_s cn58xx;
-	struct cvmx_l2c_spar2_s cn58xxp1;
-};
-
-union cvmx_l2c_spar3 {
-	uint64_t u64;
-	struct cvmx_l2c_spar3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t umsk15:8;
-		uint64_t umsk14:8;
-		uint64_t umsk13:8;
-		uint64_t umsk12:8;
-#else
-		uint64_t umsk12:8;
-		uint64_t umsk13:8;
-		uint64_t umsk14:8;
-		uint64_t umsk15:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_spar3_s cn38xx;
-	struct cvmx_l2c_spar3_s cn38xxp2;
-	struct cvmx_l2c_spar3_s cn58xx;
-	struct cvmx_l2c_spar3_s cn58xxp1;
-};
-
-union cvmx_l2c_spar4 {
-	uint64_t u64;
-	struct cvmx_l2c_spar4_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t umskiob:8;
-#else
-		uint64_t umskiob:8;
-		uint64_t reserved_8_63:56;
-#endif
-	} s;
-	struct cvmx_l2c_spar4_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_4_63:60;
-		uint64_t umskiob:4;
-#else
-		uint64_t umskiob:4;
-		uint64_t reserved_4_63:60;
-#endif
-	} cn30xx;
-	struct cvmx_l2c_spar4_cn30xx cn31xx;
-	struct cvmx_l2c_spar4_s cn38xx;
-	struct cvmx_l2c_spar4_s cn38xxp2;
-	struct cvmx_l2c_spar4_s cn50xx;
-	struct cvmx_l2c_spar4_s cn52xx;
-	struct cvmx_l2c_spar4_s cn52xxp1;
-	struct cvmx_l2c_spar4_s cn56xx;
-	struct cvmx_l2c_spar4_s cn56xxp1;
-	struct cvmx_l2c_spar4_s cn58xx;
-	struct cvmx_l2c_spar4_s cn58xxp1;
-};
-
-union cvmx_l2c_tadx_ecc0 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_ecc0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_58_63:6;
-		uint64_t ow3ecc:10;
-		uint64_t reserved_42_47:6;
-		uint64_t ow2ecc:10;
-		uint64_t reserved_26_31:6;
-		uint64_t ow1ecc:10;
-		uint64_t reserved_10_15:6;
-		uint64_t ow0ecc:10;
-#else
-		uint64_t ow0ecc:10;
-		uint64_t reserved_10_15:6;
-		uint64_t ow1ecc:10;
-		uint64_t reserved_26_31:6;
-		uint64_t ow2ecc:10;
-		uint64_t reserved_42_47:6;
-		uint64_t ow3ecc:10;
-		uint64_t reserved_58_63:6;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_ecc0_s cn61xx;
-	struct cvmx_l2c_tadx_ecc0_s cn63xx;
-	struct cvmx_l2c_tadx_ecc0_s cn63xxp1;
-	struct cvmx_l2c_tadx_ecc0_s cn66xx;
-	struct cvmx_l2c_tadx_ecc0_s cn68xx;
-	struct cvmx_l2c_tadx_ecc0_s cn68xxp1;
-	struct cvmx_l2c_tadx_ecc0_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_ecc1 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_ecc1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_58_63:6;
-		uint64_t ow7ecc:10;
-		uint64_t reserved_42_47:6;
-		uint64_t ow6ecc:10;
-		uint64_t reserved_26_31:6;
-		uint64_t ow5ecc:10;
-		uint64_t reserved_10_15:6;
-		uint64_t ow4ecc:10;
-#else
-		uint64_t ow4ecc:10;
-		uint64_t reserved_10_15:6;
-		uint64_t ow5ecc:10;
-		uint64_t reserved_26_31:6;
-		uint64_t ow6ecc:10;
-		uint64_t reserved_42_47:6;
-		uint64_t ow7ecc:10;
-		uint64_t reserved_58_63:6;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_ecc1_s cn61xx;
-	struct cvmx_l2c_tadx_ecc1_s cn63xx;
-	struct cvmx_l2c_tadx_ecc1_s cn63xxp1;
-	struct cvmx_l2c_tadx_ecc1_s cn66xx;
-	struct cvmx_l2c_tadx_ecc1_s cn68xx;
-	struct cvmx_l2c_tadx_ecc1_s cn68xxp1;
-	struct cvmx_l2c_tadx_ecc1_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_ien {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_ien_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t wrdislmc:1;
-		uint64_t rddislmc:1;
-		uint64_t noway:1;
-		uint64_t vbfdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t tagsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t l2dsbe:1;
-#else
-		uint64_t l2dsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t tagsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t vbfdbe:1;
-		uint64_t noway:1;
-		uint64_t rddislmc:1;
-		uint64_t wrdislmc:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_ien_s cn61xx;
-	struct cvmx_l2c_tadx_ien_s cn63xx;
-	struct cvmx_l2c_tadx_ien_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_7_63:57;
-		uint64_t noway:1;
-		uint64_t vbfdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t tagsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t l2dsbe:1;
-#else
-		uint64_t l2dsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t tagsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t vbfdbe:1;
-		uint64_t noway:1;
-		uint64_t reserved_7_63:57;
-#endif
-	} cn63xxp1;
-	struct cvmx_l2c_tadx_ien_s cn66xx;
-	struct cvmx_l2c_tadx_ien_s cn68xx;
-	struct cvmx_l2c_tadx_ien_s cn68xxp1;
-	struct cvmx_l2c_tadx_ien_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_int {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_int_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t wrdislmc:1;
-		uint64_t rddislmc:1;
-		uint64_t noway:1;
-		uint64_t vbfdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t tagsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t l2dsbe:1;
-#else
-		uint64_t l2dsbe:1;
-		uint64_t l2ddbe:1;
-		uint64_t tagsbe:1;
-		uint64_t tagdbe:1;
-		uint64_t vbfsbe:1;
-		uint64_t vbfdbe:1;
-		uint64_t noway:1;
-		uint64_t rddislmc:1;
-		uint64_t wrdislmc:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_int_s cn61xx;
-	struct cvmx_l2c_tadx_int_s cn63xx;
-	struct cvmx_l2c_tadx_int_s cn66xx;
-	struct cvmx_l2c_tadx_int_s cn68xx;
-	struct cvmx_l2c_tadx_int_s cn68xxp1;
-	struct cvmx_l2c_tadx_int_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_pfc0 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_pfc0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_pfc0_s cn61xx;
-	struct cvmx_l2c_tadx_pfc0_s cn63xx;
-	struct cvmx_l2c_tadx_pfc0_s cn63xxp1;
-	struct cvmx_l2c_tadx_pfc0_s cn66xx;
-	struct cvmx_l2c_tadx_pfc0_s cn68xx;
-	struct cvmx_l2c_tadx_pfc0_s cn68xxp1;
-	struct cvmx_l2c_tadx_pfc0_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_pfc1 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_pfc1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_pfc1_s cn61xx;
-	struct cvmx_l2c_tadx_pfc1_s cn63xx;
-	struct cvmx_l2c_tadx_pfc1_s cn63xxp1;
-	struct cvmx_l2c_tadx_pfc1_s cn66xx;
-	struct cvmx_l2c_tadx_pfc1_s cn68xx;
-	struct cvmx_l2c_tadx_pfc1_s cn68xxp1;
-	struct cvmx_l2c_tadx_pfc1_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_pfc2 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_pfc2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_pfc2_s cn61xx;
-	struct cvmx_l2c_tadx_pfc2_s cn63xx;
-	struct cvmx_l2c_tadx_pfc2_s cn63xxp1;
-	struct cvmx_l2c_tadx_pfc2_s cn66xx;
-	struct cvmx_l2c_tadx_pfc2_s cn68xx;
-	struct cvmx_l2c_tadx_pfc2_s cn68xxp1;
-	struct cvmx_l2c_tadx_pfc2_s cnf71xx;
-};
-
-union cvmx_l2c_tadx_pfc3 {
-	uint64_t u64;
-	struct cvmx_l2c_tadx_pfc3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_pfc3_s cn61xx;
-	struct cvmx_l2c_tadx_pfc3_s cn63xx;
-	struct cvmx_l2c_tadx_pfc3_s cn63xxp1;
-	struct cvmx_l2c_tadx_pfc3_s cn66xx;
-	struct cvmx_l2c_tadx_pfc3_s cn68xx;
-	struct cvmx_l2c_tadx_pfc3_s cn68xxp1;
-	struct cvmx_l2c_tadx_pfc3_s cnf71xx;
 };
 
 union cvmx_l2c_tadx_prf {
 	uint64_t u64;
 	struct cvmx_l2c_tadx_prf_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t cnt3sel:8;
-		uint64_t cnt2sel:8;
-		uint64_t cnt1sel:8;
-		uint64_t cnt0sel:8;
-#else
-		uint64_t cnt0sel:8;
-		uint64_t cnt1sel:8;
-		uint64_t cnt2sel:8;
-		uint64_t cnt3sel:8;
-		uint64_t reserved_32_63:32;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_32_63:32,
+		__BITFIELD_FIELD(uint64_t cnt3sel:8,
+		__BITFIELD_FIELD(uint64_t cnt2sel:8,
+		__BITFIELD_FIELD(uint64_t cnt1sel:8,
+		__BITFIELD_FIELD(uint64_t cnt0sel:8,
+		;)))))
 	} s;
-	struct cvmx_l2c_tadx_prf_s cn61xx;
-	struct cvmx_l2c_tadx_prf_s cn63xx;
-	struct cvmx_l2c_tadx_prf_s cn63xxp1;
-	struct cvmx_l2c_tadx_prf_s cn66xx;
-	struct cvmx_l2c_tadx_prf_s cn68xx;
-	struct cvmx_l2c_tadx_prf_s cn68xxp1;
-	struct cvmx_l2c_tadx_prf_s cnf71xx;
 };
 
 union cvmx_l2c_tadx_tag {
 	uint64_t u64;
 	struct cvmx_l2c_tadx_tag_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_46_63:18;
-		uint64_t ecc:6;
-		uint64_t reserved_36_39:4;
-		uint64_t tag:19;
-		uint64_t reserved_4_16:13;
-		uint64_t use:1;
-		uint64_t valid:1;
-		uint64_t dirty:1;
-		uint64_t lock:1;
-#else
-		uint64_t lock:1;
-		uint64_t dirty:1;
-		uint64_t valid:1;
-		uint64_t use:1;
-		uint64_t reserved_4_16:13;
-		uint64_t tag:19;
-		uint64_t reserved_36_39:4;
-		uint64_t ecc:6;
-		uint64_t reserved_46_63:18;
-#endif
-	} s;
-	struct cvmx_l2c_tadx_tag_s cn61xx;
-	struct cvmx_l2c_tadx_tag_s cn63xx;
-	struct cvmx_l2c_tadx_tag_s cn63xxp1;
-	struct cvmx_l2c_tadx_tag_s cn66xx;
-	struct cvmx_l2c_tadx_tag_s cn68xx;
-	struct cvmx_l2c_tadx_tag_s cn68xxp1;
-	struct cvmx_l2c_tadx_tag_s cnf71xx;
-};
-
-union cvmx_l2c_ver_id {
-	uint64_t u64;
-	struct cvmx_l2c_ver_id_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t mask:64;
-#else
-		uint64_t mask:64;
-#endif
-	} s;
-	struct cvmx_l2c_ver_id_s cn61xx;
-	struct cvmx_l2c_ver_id_s cn63xx;
-	struct cvmx_l2c_ver_id_s cn63xxp1;
-	struct cvmx_l2c_ver_id_s cn66xx;
-	struct cvmx_l2c_ver_id_s cn68xx;
-	struct cvmx_l2c_ver_id_s cn68xxp1;
-	struct cvmx_l2c_ver_id_s cnf71xx;
-};
-
-union cvmx_l2c_ver_iob {
-	uint64_t u64;
-	struct cvmx_l2c_ver_iob_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_2_63:62;
-		uint64_t mask:2;
-#else
-		uint64_t mask:2;
-		uint64_t reserved_2_63:62;
-#endif
-	} s;
-	struct cvmx_l2c_ver_iob_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_1_63:63;
-		uint64_t mask:1;
-#else
-		uint64_t mask:1;
-		uint64_t reserved_1_63:63;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_ver_iob_cn61xx cn63xx;
-	struct cvmx_l2c_ver_iob_cn61xx cn63xxp1;
-	struct cvmx_l2c_ver_iob_cn61xx cn66xx;
-	struct cvmx_l2c_ver_iob_s cn68xx;
-	struct cvmx_l2c_ver_iob_s cn68xxp1;
-	struct cvmx_l2c_ver_iob_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_ver_msc {
-	uint64_t u64;
-	struct cvmx_l2c_ver_msc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_2_63:62;
-		uint64_t invl2:1;
-		uint64_t dwb:1;
-#else
-		uint64_t dwb:1;
-		uint64_t invl2:1;
-		uint64_t reserved_2_63:62;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_46_63:18,
+		__BITFIELD_FIELD(uint64_t ecc:6,
+		__BITFIELD_FIELD(uint64_t reserved_36_39:4,
+		__BITFIELD_FIELD(uint64_t tag:19,
+		__BITFIELD_FIELD(uint64_t reserved_4_16:13,
+		__BITFIELD_FIELD(uint64_t use:1,
+		__BITFIELD_FIELD(uint64_t valid:1,
+		__BITFIELD_FIELD(uint64_t dirty:1,
+		__BITFIELD_FIELD(uint64_t lock:1,
+		;)))))))))
 	} s;
-	struct cvmx_l2c_ver_msc_s cn61xx;
-	struct cvmx_l2c_ver_msc_s cn63xx;
-	struct cvmx_l2c_ver_msc_s cn66xx;
-	struct cvmx_l2c_ver_msc_s cn68xx;
-	struct cvmx_l2c_ver_msc_s cn68xxp1;
-	struct cvmx_l2c_ver_msc_s cnf71xx;
 };
 
-union cvmx_l2c_ver_pp {
-	uint64_t u64;
-	struct cvmx_l2c_ver_pp_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t mask:32;
-#else
-		uint64_t mask:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_l2c_ver_pp_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_4_63:60;
-		uint64_t mask:4;
-#else
-		uint64_t mask:4;
-		uint64_t reserved_4_63:60;
-#endif
-	} cn61xx;
-	struct cvmx_l2c_ver_pp_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_6_63:58;
-		uint64_t mask:6;
-#else
-		uint64_t mask:6;
-		uint64_t reserved_6_63:58;
-#endif
-	} cn63xx;
-	struct cvmx_l2c_ver_pp_cn63xx cn63xxp1;
-	struct cvmx_l2c_ver_pp_cn66xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_10_63:54;
-		uint64_t mask:10;
-#else
-		uint64_t mask:10;
-		uint64_t reserved_10_63:54;
-#endif
-	} cn66xx;
-	struct cvmx_l2c_ver_pp_s cn68xx;
-	struct cvmx_l2c_ver_pp_s cn68xxp1;
-	struct cvmx_l2c_ver_pp_cn61xx cnf71xx;
-};
-
-union cvmx_l2c_virtid_iobx {
-	uint64_t u64;
-	struct cvmx_l2c_virtid_iobx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t dwbid:6;
-		uint64_t reserved_6_7:2;
-		uint64_t id:6;
-#else
-		uint64_t id:6;
-		uint64_t reserved_6_7:2;
-		uint64_t dwbid:6;
-		uint64_t reserved_14_63:50;
-#endif
-	} s;
-	struct cvmx_l2c_virtid_iobx_s cn61xx;
-	struct cvmx_l2c_virtid_iobx_s cn63xx;
-	struct cvmx_l2c_virtid_iobx_s cn63xxp1;
-	struct cvmx_l2c_virtid_iobx_s cn66xx;
-	struct cvmx_l2c_virtid_iobx_s cn68xx;
-	struct cvmx_l2c_virtid_iobx_s cn68xxp1;
-	struct cvmx_l2c_virtid_iobx_s cnf71xx;
-};
-
-union cvmx_l2c_virtid_ppx {
-	uint64_t u64;
-	struct cvmx_l2c_virtid_ppx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_6_63:58;
-		uint64_t id:6;
-#else
-		uint64_t id:6;
-		uint64_t reserved_6_63:58;
-#endif
-	} s;
-	struct cvmx_l2c_virtid_ppx_s cn61xx;
-	struct cvmx_l2c_virtid_ppx_s cn63xx;
-	struct cvmx_l2c_virtid_ppx_s cn63xxp1;
-	struct cvmx_l2c_virtid_ppx_s cn66xx;
-	struct cvmx_l2c_virtid_ppx_s cn68xx;
-	struct cvmx_l2c_virtid_ppx_s cn68xxp1;
-	struct cvmx_l2c_virtid_ppx_s cnf71xx;
-};
-
-union cvmx_l2c_vrt_ctl {
-	uint64_t u64;
-	struct cvmx_l2c_vrt_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t ooberr:1;
-		uint64_t reserved_7_7:1;
-		uint64_t memsz:3;
-		uint64_t numid:3;
-		uint64_t enable:1;
-#else
-		uint64_t enable:1;
-		uint64_t numid:3;
-		uint64_t memsz:3;
-		uint64_t reserved_7_7:1;
-		uint64_t ooberr:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_l2c_vrt_ctl_s cn61xx;
-	struct cvmx_l2c_vrt_ctl_s cn63xx;
-	struct cvmx_l2c_vrt_ctl_s cn63xxp1;
-	struct cvmx_l2c_vrt_ctl_s cn66xx;
-	struct cvmx_l2c_vrt_ctl_s cn68xx;
-	struct cvmx_l2c_vrt_ctl_s cn68xxp1;
-	struct cvmx_l2c_vrt_ctl_s cnf71xx;
-};
-
-union cvmx_l2c_vrt_memx {
-	uint64_t u64;
-	struct cvmx_l2c_vrt_memx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_36_63:28;
-		uint64_t parity:4;
-		uint64_t data:32;
-#else
-		uint64_t data:32;
-		uint64_t parity:4;
-		uint64_t reserved_36_63:28;
-#endif
-	} s;
-	struct cvmx_l2c_vrt_memx_s cn61xx;
-	struct cvmx_l2c_vrt_memx_s cn63xx;
-	struct cvmx_l2c_vrt_memx_s cn63xxp1;
-	struct cvmx_l2c_vrt_memx_s cn66xx;
-	struct cvmx_l2c_vrt_memx_s cn68xx;
-	struct cvmx_l2c_vrt_memx_s cn68xxp1;
-	struct cvmx_l2c_vrt_memx_s cnf71xx;
-};
-
-union cvmx_l2c_wpar_iobx {
-	uint64_t u64;
-	struct cvmx_l2c_wpar_iobx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mask:16;
-#else
-		uint64_t mask:16;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_l2c_wpar_iobx_s cn61xx;
-	struct cvmx_l2c_wpar_iobx_s cn63xx;
-	struct cvmx_l2c_wpar_iobx_s cn63xxp1;
-	struct cvmx_l2c_wpar_iobx_s cn66xx;
-	struct cvmx_l2c_wpar_iobx_s cn68xx;
-	struct cvmx_l2c_wpar_iobx_s cn68xxp1;
-	struct cvmx_l2c_wpar_iobx_s cnf71xx;
-};
-
-union cvmx_l2c_wpar_ppx {
-	uint64_t u64;
-	struct cvmx_l2c_wpar_ppx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t mask:16;
-#else
-		uint64_t mask:16;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_l2c_wpar_ppx_s cn61xx;
-	struct cvmx_l2c_wpar_ppx_s cn63xx;
-	struct cvmx_l2c_wpar_ppx_s cn63xxp1;
-	struct cvmx_l2c_wpar_ppx_s cn66xx;
-	struct cvmx_l2c_wpar_ppx_s cn68xx;
-	struct cvmx_l2c_wpar_ppx_s cn68xxp1;
-	struct cvmx_l2c_wpar_ppx_s cnf71xx;
-};
-
-union cvmx_l2c_xmcx_pfc {
-	uint64_t u64;
-	struct cvmx_l2c_xmcx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
-	} s;
-	struct cvmx_l2c_xmcx_pfc_s cn61xx;
-	struct cvmx_l2c_xmcx_pfc_s cn63xx;
-	struct cvmx_l2c_xmcx_pfc_s cn63xxp1;
-	struct cvmx_l2c_xmcx_pfc_s cn66xx;
-	struct cvmx_l2c_xmcx_pfc_s cn68xx;
-	struct cvmx_l2c_xmcx_pfc_s cn68xxp1;
-	struct cvmx_l2c_xmcx_pfc_s cnf71xx;
-};
-
-union cvmx_l2c_xmc_cmd {
+union cvmx_l2c_lckbase {
 	uint64_t u64;
-	struct cvmx_l2c_xmc_cmd_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t inuse:1;
-		uint64_t cmd:6;
-		uint64_t reserved_38_56:19;
-		uint64_t addr:38;
-#else
-		uint64_t addr:38;
-		uint64_t reserved_38_56:19;
-		uint64_t cmd:6;
-		uint64_t inuse:1;
-#endif
+	struct cvmx_l2c_lckbase_s {
+		__BITFIELD_FIELD(uint64_t reserved_31_63:33,
+		__BITFIELD_FIELD(uint64_t lck_base:27,
+		__BITFIELD_FIELD(uint64_t reserved_1_3:3,
+		__BITFIELD_FIELD(uint64_t lck_ena:1,
+		;))))
 	} s;
-	struct cvmx_l2c_xmc_cmd_s cn61xx;
-	struct cvmx_l2c_xmc_cmd_s cn63xx;
-	struct cvmx_l2c_xmc_cmd_s cn63xxp1;
-	struct cvmx_l2c_xmc_cmd_s cn66xx;
-	struct cvmx_l2c_xmc_cmd_s cn68xx;
-	struct cvmx_l2c_xmc_cmd_s cn68xxp1;
-	struct cvmx_l2c_xmc_cmd_s cnf71xx;
 };
 
-union cvmx_l2c_xmdx_pfc {
+union cvmx_l2c_lckoff {
 	uint64_t u64;
-	struct cvmx_l2c_xmdx_pfc_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t count:64;
-#else
-		uint64_t count:64;
-#endif
+	struct cvmx_l2c_lckoff_s {
+		__BITFIELD_FIELD(uint64_t reserved_10_63:54,
+		__BITFIELD_FIELD(uint64_t lck_offset:10,
+		;))
 	} s;
-	struct cvmx_l2c_xmdx_pfc_s cn61xx;
-	struct cvmx_l2c_xmdx_pfc_s cn63xx;
-	struct cvmx_l2c_xmdx_pfc_s cn63xxp1;
-	struct cvmx_l2c_xmdx_pfc_s cn66xx;
-	struct cvmx_l2c_xmdx_pfc_s cn68xx;
-	struct cvmx_l2c_xmdx_pfc_s cn68xxp1;
-	struct cvmx_l2c_xmdx_pfc_s cnf71xx;
 };
 
 #endif
diff --git a/arch/mips/include/asm/octeon/cvmx-l2c.h b/arch/mips/include/asm/octeon/cvmx-l2c.h
index ddb429210a0e..02c4479a90c8 100644
--- a/arch/mips/include/asm/octeon/cvmx-l2c.h
+++ b/arch/mips/include/asm/octeon/cvmx-l2c.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2010 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -33,48 +33,39 @@
 #ifndef __CVMX_L2C_H__
 #define __CVMX_L2C_H__
 
-#define CVMX_L2_ASSOC	  cvmx_l2c_get_num_assoc()   /* Deprecated macro, use function */
-#define CVMX_L2_SET_BITS  cvmx_l2c_get_set_bits()    /* Deprecated macro, use function */
-#define CVMX_L2_SETS	  cvmx_l2c_get_num_sets()    /* Deprecated macro, use function */
+#include <uapi/asm/bitfield.h>
 
+#define CVMX_L2_ASSOC	 cvmx_l2c_get_num_assoc()	/* Deprecated macro */
+#define CVMX_L2_SET_BITS cvmx_l2c_get_set_bits()	/* Deprecated macro */
+#define CVMX_L2_SETS	 cvmx_l2c_get_num_sets()	/* Deprecated macro */
 
-#define CVMX_L2C_IDX_ADDR_SHIFT 7  /* based on 128 byte cache line size */
+/* Based on 128 byte cache line size */
+#define CVMX_L2C_IDX_ADDR_SHIFT	7
 #define CVMX_L2C_IDX_MASK	(cvmx_l2c_get_num_sets() - 1)
 
 /* Defines for index aliasing computations */
-#define CVMX_L2C_TAG_ADDR_ALIAS_SHIFT (CVMX_L2C_IDX_ADDR_SHIFT + cvmx_l2c_get_set_bits())
+#define CVMX_L2C_TAG_ADDR_ALIAS_SHIFT (CVMX_L2C_IDX_ADDR_SHIFT +	       \
+		cvmx_l2c_get_set_bits())
 #define CVMX_L2C_ALIAS_MASK (CVMX_L2C_IDX_MASK << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT)
-#define CVMX_L2C_MEMBANK_SELECT_SIZE  4096
+#define CVMX_L2C_MEMBANK_SELECT_SIZE 4096
 
-/* Defines for Virtualizations, valid only from Octeon II onwards. */
-#define CVMX_L2C_VRT_MAX_VIRTID_ALLOWED ((OCTEON_IS_MODEL(OCTEON_CN63XX)) ? 64 : 0)
-#define CVMX_L2C_VRT_MAX_MEMSZ_ALLOWED ((OCTEON_IS_MODEL(OCTEON_CN63XX)) ? 32 : 0)
+/* Number of L2C Tag-and-data sections (TADs) that are connected to LMC. */
+#define CVMX_L2C_TADS  1
 
 union cvmx_l2c_tag {
 	uint64_t u64;
 	struct {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved:28;
-		uint64_t V:1;		/* Line valid */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t L:1;		/* Line locked */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t addr:32;	/* Phys mem (not all bits valid) */
-#else
-		uint64_t addr:32;	/* Phys mem (not all bits valid) */
-		uint64_t U:1;		/* Use, LRU eviction */
-		uint64_t L:1;		/* Line locked */
-		uint64_t D:1;		/* Line dirty */
-		uint64_t V:1;		/* Line valid */
-		uint64_t reserved:28;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved:28,
+		__BITFIELD_FIELD(uint64_t V:1,
+		__BITFIELD_FIELD(uint64_t D:1,
+		__BITFIELD_FIELD(uint64_t L:1,
+		__BITFIELD_FIELD(uint64_t U:1,
+		__BITFIELD_FIELD(uint64_t addr:32,
+		;))))))
 	} s;
 };
 
-/* Number of L2C Tag-and-data sections (TADs) that are connected to LMC. */
-#define CVMX_L2C_TADS  1
-
-  /* L2C Performance Counter events. */
+/* L2C Performance Counter events. */
 enum cvmx_l2c_event {
 	CVMX_L2C_EVENT_CYCLES		=  0,
 	CVMX_L2C_EVENT_INSTRUCTION_MISS =  1,
@@ -175,7 +166,8 @@ enum cvmx_l2c_tad_event {
  *
  * @note The routine does not clear the counter.
  */
-void cvmx_l2c_config_perf(uint32_t counter, enum cvmx_l2c_event event, uint32_t clear_on_read);
+void cvmx_l2c_config_perf(uint32_t counter, enum cvmx_l2c_event event,
+			  uint32_t clear_on_read);
 
 /**
  * Read the given L2 Cache performance counter. The counter must be configured
@@ -307,8 +299,11 @@ int cvmx_l2c_unlock_mem_region(uint64_t start, uint64_t len);
 union cvmx_l2c_tag cvmx_l2c_get_tag(uint32_t association, uint32_t index);
 
 /* Wrapper providing a deprecated old function name */
-static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association, uint32_t index) __attribute__((deprecated));
-static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association, uint32_t index)
+static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association,
+						  uint32_t index)
+						  __attribute__((deprecated));
+static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association,
+						  uint32_t index)
 {
 	return cvmx_l2c_get_tag(association, index);
 }
diff --git a/arch/mips/include/asm/octeon/cvmx-l2d-defs.h b/arch/mips/include/asm/octeon/cvmx-l2d-defs.h
deleted file mode 100644
index 11a456215638..000000000000
--- a/arch/mips/include/asm/octeon/cvmx-l2d-defs.h
+++ /dev/null
@@ -1,526 +0,0 @@
-/***********************license start***************
- * Author: Cavium Networks
- *
- * Contact: support@caviumnetworks.com
- * This file is part of the OCTEON SDK
- *
- * Copyright (c) 2003-2012 Cavium Networks
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, Version 2, as
- * published by the Free Software Foundation.
- *
- * This file is distributed in the hope that it will be useful, but
- * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this file; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- * or visit http://www.gnu.org/licenses/.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium Networks for more information
- ***********************license end**************************************/
-
-#ifndef __CVMX_L2D_DEFS_H__
-#define __CVMX_L2D_DEFS_H__
-
-#define CVMX_L2D_BST0 (CVMX_ADD_IO_SEG(0x0001180080000780ull))
-#define CVMX_L2D_BST1 (CVMX_ADD_IO_SEG(0x0001180080000788ull))
-#define CVMX_L2D_BST2 (CVMX_ADD_IO_SEG(0x0001180080000790ull))
-#define CVMX_L2D_BST3 (CVMX_ADD_IO_SEG(0x0001180080000798ull))
-#define CVMX_L2D_ERR (CVMX_ADD_IO_SEG(0x0001180080000010ull))
-#define CVMX_L2D_FADR (CVMX_ADD_IO_SEG(0x0001180080000018ull))
-#define CVMX_L2D_FSYN0 (CVMX_ADD_IO_SEG(0x0001180080000020ull))
-#define CVMX_L2D_FSYN1 (CVMX_ADD_IO_SEG(0x0001180080000028ull))
-#define CVMX_L2D_FUS0 (CVMX_ADD_IO_SEG(0x00011800800007A0ull))
-#define CVMX_L2D_FUS1 (CVMX_ADD_IO_SEG(0x00011800800007A8ull))
-#define CVMX_L2D_FUS2 (CVMX_ADD_IO_SEG(0x00011800800007B0ull))
-#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull))
-
-union cvmx_l2d_bst0 {
-	uint64_t u64;
-	struct cvmx_l2d_bst0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_35_63:29;
-		uint64_t ftl:1;
-		uint64_t q0stat:34;
-#else
-		uint64_t q0stat:34;
-		uint64_t ftl:1;
-		uint64_t reserved_35_63:29;
-#endif
-	} s;
-	struct cvmx_l2d_bst0_s cn30xx;
-	struct cvmx_l2d_bst0_s cn31xx;
-	struct cvmx_l2d_bst0_s cn38xx;
-	struct cvmx_l2d_bst0_s cn38xxp2;
-	struct cvmx_l2d_bst0_s cn50xx;
-	struct cvmx_l2d_bst0_s cn52xx;
-	struct cvmx_l2d_bst0_s cn52xxp1;
-	struct cvmx_l2d_bst0_s cn56xx;
-	struct cvmx_l2d_bst0_s cn56xxp1;
-	struct cvmx_l2d_bst0_s cn58xx;
-	struct cvmx_l2d_bst0_s cn58xxp1;
-};
-
-union cvmx_l2d_bst1 {
-	uint64_t u64;
-	struct cvmx_l2d_bst1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q1stat:34;
-#else
-		uint64_t q1stat:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_bst1_s cn30xx;
-	struct cvmx_l2d_bst1_s cn31xx;
-	struct cvmx_l2d_bst1_s cn38xx;
-	struct cvmx_l2d_bst1_s cn38xxp2;
-	struct cvmx_l2d_bst1_s cn50xx;
-	struct cvmx_l2d_bst1_s cn52xx;
-	struct cvmx_l2d_bst1_s cn52xxp1;
-	struct cvmx_l2d_bst1_s cn56xx;
-	struct cvmx_l2d_bst1_s cn56xxp1;
-	struct cvmx_l2d_bst1_s cn58xx;
-	struct cvmx_l2d_bst1_s cn58xxp1;
-};
-
-union cvmx_l2d_bst2 {
-	uint64_t u64;
-	struct cvmx_l2d_bst2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q2stat:34;
-#else
-		uint64_t q2stat:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_bst2_s cn30xx;
-	struct cvmx_l2d_bst2_s cn31xx;
-	struct cvmx_l2d_bst2_s cn38xx;
-	struct cvmx_l2d_bst2_s cn38xxp2;
-	struct cvmx_l2d_bst2_s cn50xx;
-	struct cvmx_l2d_bst2_s cn52xx;
-	struct cvmx_l2d_bst2_s cn52xxp1;
-	struct cvmx_l2d_bst2_s cn56xx;
-	struct cvmx_l2d_bst2_s cn56xxp1;
-	struct cvmx_l2d_bst2_s cn58xx;
-	struct cvmx_l2d_bst2_s cn58xxp1;
-};
-
-union cvmx_l2d_bst3 {
-	uint64_t u64;
-	struct cvmx_l2d_bst3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q3stat:34;
-#else
-		uint64_t q3stat:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_bst3_s cn30xx;
-	struct cvmx_l2d_bst3_s cn31xx;
-	struct cvmx_l2d_bst3_s cn38xx;
-	struct cvmx_l2d_bst3_s cn38xxp2;
-	struct cvmx_l2d_bst3_s cn50xx;
-	struct cvmx_l2d_bst3_s cn52xx;
-	struct cvmx_l2d_bst3_s cn52xxp1;
-	struct cvmx_l2d_bst3_s cn56xx;
-	struct cvmx_l2d_bst3_s cn56xxp1;
-	struct cvmx_l2d_bst3_s cn58xx;
-	struct cvmx_l2d_bst3_s cn58xxp1;
-};
-
-union cvmx_l2d_err {
-	uint64_t u64;
-	struct cvmx_l2d_err_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_6_63:58;
-		uint64_t bmhclsel:1;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t bmhclsel:1;
-		uint64_t reserved_6_63:58;
-#endif
-	} s;
-	struct cvmx_l2d_err_s cn30xx;
-	struct cvmx_l2d_err_s cn31xx;
-	struct cvmx_l2d_err_s cn38xx;
-	struct cvmx_l2d_err_s cn38xxp2;
-	struct cvmx_l2d_err_s cn50xx;
-	struct cvmx_l2d_err_s cn52xx;
-	struct cvmx_l2d_err_s cn52xxp1;
-	struct cvmx_l2d_err_s cn56xx;
-	struct cvmx_l2d_err_s cn56xxp1;
-	struct cvmx_l2d_err_s cn58xx;
-	struct cvmx_l2d_err_s cn58xxp1;
-};
-
-union cvmx_l2d_fadr {
-	uint64_t u64;
-	struct cvmx_l2d_fadr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_19_63:45;
-		uint64_t fadru:1;
-		uint64_t fowmsk:4;
-		uint64_t fset:3;
-		uint64_t fadr:11;
-#else
-		uint64_t fadr:11;
-		uint64_t fset:3;
-		uint64_t fowmsk:4;
-		uint64_t fadru:1;
-		uint64_t reserved_19_63:45;
-#endif
-	} s;
-	struct cvmx_l2d_fadr_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t fowmsk:4;
-		uint64_t reserved_13_13:1;
-		uint64_t fset:2;
-		uint64_t reserved_9_10:2;
-		uint64_t fadr:9;
-#else
-		uint64_t fadr:9;
-		uint64_t reserved_9_10:2;
-		uint64_t fset:2;
-		uint64_t reserved_13_13:1;
-		uint64_t fowmsk:4;
-		uint64_t reserved_18_63:46;
-#endif
-	} cn30xx;
-	struct cvmx_l2d_fadr_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t fowmsk:4;
-		uint64_t reserved_13_13:1;
-		uint64_t fset:2;
-		uint64_t reserved_10_10:1;
-		uint64_t fadr:10;
-#else
-		uint64_t fadr:10;
-		uint64_t reserved_10_10:1;
-		uint64_t fset:2;
-		uint64_t reserved_13_13:1;
-		uint64_t fowmsk:4;
-		uint64_t reserved_18_63:46;
-#endif
-	} cn31xx;
-	struct cvmx_l2d_fadr_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t fowmsk:4;
-		uint64_t fset:3;
-		uint64_t fadr:11;
-#else
-		uint64_t fadr:11;
-		uint64_t fset:3;
-		uint64_t fowmsk:4;
-		uint64_t reserved_18_63:46;
-#endif
-	} cn38xx;
-	struct cvmx_l2d_fadr_cn38xx cn38xxp2;
-	struct cvmx_l2d_fadr_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t fowmsk:4;
-		uint64_t fset:3;
-		uint64_t reserved_8_10:3;
-		uint64_t fadr:8;
-#else
-		uint64_t fadr:8;
-		uint64_t reserved_8_10:3;
-		uint64_t fset:3;
-		uint64_t fowmsk:4;
-		uint64_t reserved_18_63:46;
-#endif
-	} cn50xx;
-	struct cvmx_l2d_fadr_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t fowmsk:4;
-		uint64_t fset:3;
-		uint64_t reserved_10_10:1;
-		uint64_t fadr:10;
-#else
-		uint64_t fadr:10;
-		uint64_t reserved_10_10:1;
-		uint64_t fset:3;
-		uint64_t fowmsk:4;
-		uint64_t reserved_18_63:46;
-#endif
-	} cn52xx;
-	struct cvmx_l2d_fadr_cn52xx cn52xxp1;
-	struct cvmx_l2d_fadr_s cn56xx;
-	struct cvmx_l2d_fadr_s cn56xxp1;
-	struct cvmx_l2d_fadr_s cn58xx;
-	struct cvmx_l2d_fadr_s cn58xxp1;
-};
-
-union cvmx_l2d_fsyn0 {
-	uint64_t u64;
-	struct cvmx_l2d_fsyn0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t fsyn_ow1:10;
-		uint64_t fsyn_ow0:10;
-#else
-		uint64_t fsyn_ow0:10;
-		uint64_t fsyn_ow1:10;
-		uint64_t reserved_20_63:44;
-#endif
-	} s;
-	struct cvmx_l2d_fsyn0_s cn30xx;
-	struct cvmx_l2d_fsyn0_s cn31xx;
-	struct cvmx_l2d_fsyn0_s cn38xx;
-	struct cvmx_l2d_fsyn0_s cn38xxp2;
-	struct cvmx_l2d_fsyn0_s cn50xx;
-	struct cvmx_l2d_fsyn0_s cn52xx;
-	struct cvmx_l2d_fsyn0_s cn52xxp1;
-	struct cvmx_l2d_fsyn0_s cn56xx;
-	struct cvmx_l2d_fsyn0_s cn56xxp1;
-	struct cvmx_l2d_fsyn0_s cn58xx;
-	struct cvmx_l2d_fsyn0_s cn58xxp1;
-};
-
-union cvmx_l2d_fsyn1 {
-	uint64_t u64;
-	struct cvmx_l2d_fsyn1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t fsyn_ow3:10;
-		uint64_t fsyn_ow2:10;
-#else
-		uint64_t fsyn_ow2:10;
-		uint64_t fsyn_ow3:10;
-		uint64_t reserved_20_63:44;
-#endif
-	} s;
-	struct cvmx_l2d_fsyn1_s cn30xx;
-	struct cvmx_l2d_fsyn1_s cn31xx;
-	struct cvmx_l2d_fsyn1_s cn38xx;
-	struct cvmx_l2d_fsyn1_s cn38xxp2;
-	struct cvmx_l2d_fsyn1_s cn50xx;
-	struct cvmx_l2d_fsyn1_s cn52xx;
-	struct cvmx_l2d_fsyn1_s cn52xxp1;
-	struct cvmx_l2d_fsyn1_s cn56xx;
-	struct cvmx_l2d_fsyn1_s cn56xxp1;
-	struct cvmx_l2d_fsyn1_s cn58xx;
-	struct cvmx_l2d_fsyn1_s cn58xxp1;
-};
-
-union cvmx_l2d_fus0 {
-	uint64_t u64;
-	struct cvmx_l2d_fus0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q0fus:34;
-#else
-		uint64_t q0fus:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_fus0_s cn30xx;
-	struct cvmx_l2d_fus0_s cn31xx;
-	struct cvmx_l2d_fus0_s cn38xx;
-	struct cvmx_l2d_fus0_s cn38xxp2;
-	struct cvmx_l2d_fus0_s cn50xx;
-	struct cvmx_l2d_fus0_s cn52xx;
-	struct cvmx_l2d_fus0_s cn52xxp1;
-	struct cvmx_l2d_fus0_s cn56xx;
-	struct cvmx_l2d_fus0_s cn56xxp1;
-	struct cvmx_l2d_fus0_s cn58xx;
-	struct cvmx_l2d_fus0_s cn58xxp1;
-};
-
-union cvmx_l2d_fus1 {
-	uint64_t u64;
-	struct cvmx_l2d_fus1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q1fus:34;
-#else
-		uint64_t q1fus:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_fus1_s cn30xx;
-	struct cvmx_l2d_fus1_s cn31xx;
-	struct cvmx_l2d_fus1_s cn38xx;
-	struct cvmx_l2d_fus1_s cn38xxp2;
-	struct cvmx_l2d_fus1_s cn50xx;
-	struct cvmx_l2d_fus1_s cn52xx;
-	struct cvmx_l2d_fus1_s cn52xxp1;
-	struct cvmx_l2d_fus1_s cn56xx;
-	struct cvmx_l2d_fus1_s cn56xxp1;
-	struct cvmx_l2d_fus1_s cn58xx;
-	struct cvmx_l2d_fus1_s cn58xxp1;
-};
-
-union cvmx_l2d_fus2 {
-	uint64_t u64;
-	struct cvmx_l2d_fus2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_34_63:30;
-		uint64_t q2fus:34;
-#else
-		uint64_t q2fus:34;
-		uint64_t reserved_34_63:30;
-#endif
-	} s;
-	struct cvmx_l2d_fus2_s cn30xx;
-	struct cvmx_l2d_fus2_s cn31xx;
-	struct cvmx_l2d_fus2_s cn38xx;
-	struct cvmx_l2d_fus2_s cn38xxp2;
-	struct cvmx_l2d_fus2_s cn50xx;
-	struct cvmx_l2d_fus2_s cn52xx;
-	struct cvmx_l2d_fus2_s cn52xxp1;
-	struct cvmx_l2d_fus2_s cn56xx;
-	struct cvmx_l2d_fus2_s cn56xxp1;
-	struct cvmx_l2d_fus2_s cn58xx;
-	struct cvmx_l2d_fus2_s cn58xxp1;
-};
-
-union cvmx_l2d_fus3 {
-	uint64_t u64;
-	struct cvmx_l2d_fus3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_40_63:24;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_34_36:3;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t reserved_34_36:3;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_40_63:24;
-#endif
-	} s;
-	struct cvmx_l2d_fus3_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_35_63:29;
-		uint64_t crip_64k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_64k:1;
-		uint64_t reserved_35_63:29;
-#endif
-	} cn30xx;
-	struct cvmx_l2d_fus3_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_35_63:29;
-		uint64_t crip_128k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_128k:1;
-		uint64_t reserved_35_63:29;
-#endif
-	} cn31xx;
-	struct cvmx_l2d_fus3_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_36_63:28;
-		uint64_t crip_256k:1;
-		uint64_t crip_512k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_512k:1;
-		uint64_t crip_256k:1;
-		uint64_t reserved_36_63:28;
-#endif
-	} cn38xx;
-	struct cvmx_l2d_fus3_cn38xx cn38xxp2;
-	struct cvmx_l2d_fus3_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_40_63:24;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_36_36:1;
-		uint64_t crip_32k:1;
-		uint64_t crip_64k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_64k:1;
-		uint64_t crip_32k:1;
-		uint64_t reserved_36_36:1;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_40_63:24;
-#endif
-	} cn50xx;
-	struct cvmx_l2d_fus3_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_40_63:24;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_36_36:1;
-		uint64_t crip_128k:1;
-		uint64_t crip_256k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_256k:1;
-		uint64_t crip_128k:1;
-		uint64_t reserved_36_36:1;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_40_63:24;
-#endif
-	} cn52xx;
-	struct cvmx_l2d_fus3_cn52xx cn52xxp1;
-	struct cvmx_l2d_fus3_cn56xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_40_63:24;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_36_36:1;
-		uint64_t crip_512k:1;
-		uint64_t crip_1024k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_1024k:1;
-		uint64_t crip_512k:1;
-		uint64_t reserved_36_36:1;
-		uint64_t ema_ctl:3;
-		uint64_t reserved_40_63:24;
-#endif
-	} cn56xx;
-	struct cvmx_l2d_fus3_cn56xx cn56xxp1;
-	struct cvmx_l2d_fus3_cn58xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_39_63:25;
-		uint64_t ema_ctl:2;
-		uint64_t reserved_36_36:1;
-		uint64_t crip_512k:1;
-		uint64_t crip_1024k:1;
-		uint64_t q3fus:34;
-#else
-		uint64_t q3fus:34;
-		uint64_t crip_1024k:1;
-		uint64_t crip_512k:1;
-		uint64_t reserved_36_36:1;
-		uint64_t ema_ctl:2;
-		uint64_t reserved_39_63:25;
-#endif
-	} cn58xx;
-	struct cvmx_l2d_fus3_cn58xx cn58xxp1;
-};
-
-#endif
diff --git a/arch/mips/include/asm/octeon/cvmx-l2t-defs.h b/arch/mips/include/asm/octeon/cvmx-l2t-defs.h
index 83ce22c080e6..fe50671fd1bb 100644
--- a/arch/mips/include/asm/octeon/cvmx-l2t-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-l2t-defs.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2012 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -28,210 +28,116 @@
 #ifndef __CVMX_L2T_DEFS_H__
 #define __CVMX_L2T_DEFS_H__
 
-#define CVMX_L2T_ERR (CVMX_ADD_IO_SEG(0x0001180080000008ull))
+#include <uapi/asm/bitfield.h>
+
+#define CVMX_L2T_ERR	(CVMX_ADD_IO_SEG(0x0001180080000008ull))
+
 
 union cvmx_l2t_err {
 	uint64_t u64;
 	struct cvmx_l2t_err_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_29_63:35;
-		uint64_t fadru:1;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t fset:3;
-		uint64_t fadr:10;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:10;
-		uint64_t fset:3;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t fadru:1;
-		uint64_t reserved_29_63:35;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_29_63:35,
+		__BITFIELD_FIELD(uint64_t fadru:1,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t fset:3,
+		__BITFIELD_FIELD(uint64_t fadr:10,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;))))))))))))))
 	} s;
 	struct cvmx_l2t_err_cn30xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t reserved_23_23:1;
-		uint64_t fset:2;
-		uint64_t reserved_19_20:2;
-		uint64_t fadr:8;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:8;
-		uint64_t reserved_19_20:2;
-		uint64_t fset:2;
-		uint64_t reserved_23_23:1;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t reserved_28_63:36;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_28_63:36,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t reserved_23_23:1,
+		__BITFIELD_FIELD(uint64_t fset:2,
+		__BITFIELD_FIELD(uint64_t reserved_19_20:2,
+		__BITFIELD_FIELD(uint64_t fadr:8,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;)))))))))))))))
 	} cn30xx;
 	struct cvmx_l2t_err_cn31xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t reserved_23_23:1;
-		uint64_t fset:2;
-		uint64_t reserved_20_20:1;
-		uint64_t fadr:9;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:9;
-		uint64_t reserved_20_20:1;
-		uint64_t fset:2;
-		uint64_t reserved_23_23:1;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t reserved_28_63:36;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_28_63:36,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t reserved_23_23:1,
+		__BITFIELD_FIELD(uint64_t fset:2,
+		__BITFIELD_FIELD(uint64_t reserved_20_20:1,
+		__BITFIELD_FIELD(uint64_t fadr:9,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;)))))))))))))))
 	} cn31xx;
 	struct cvmx_l2t_err_cn38xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t fset:3;
-		uint64_t fadr:10;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:10;
-		uint64_t fset:3;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t reserved_28_63:36;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_28_63:36,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t fset:3,
+		__BITFIELD_FIELD(uint64_t fadr:10,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;)))))))))))))
 	} cn38xx;
 	struct cvmx_l2t_err_cn38xx cn38xxp2;
 	struct cvmx_l2t_err_cn50xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t fset:3;
-		uint64_t reserved_18_20:3;
-		uint64_t fadr:7;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:7;
-		uint64_t reserved_18_20:3;
-		uint64_t fset:3;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t reserved_28_63:36;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_28_63:36,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t fset:3,
+		__BITFIELD_FIELD(uint64_t reserved_18_20:3,
+		__BITFIELD_FIELD(uint64_t fadr:7,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;))))))))))))))
 	} cn50xx;
 	struct cvmx_l2t_err_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_28_63:36;
-		uint64_t lck_intena2:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr:1;
-		uint64_t fset:3;
-		uint64_t reserved_20_20:1;
-		uint64_t fadr:9;
-		uint64_t fsyn:6;
-		uint64_t ded_err:1;
-		uint64_t sec_err:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_intena:1;
-		uint64_t ecc_ena:1;
-#else
-		uint64_t ecc_ena:1;
-		uint64_t sec_intena:1;
-		uint64_t ded_intena:1;
-		uint64_t sec_err:1;
-		uint64_t ded_err:1;
-		uint64_t fsyn:6;
-		uint64_t fadr:9;
-		uint64_t reserved_20_20:1;
-		uint64_t fset:3;
-		uint64_t lckerr:1;
-		uint64_t lck_intena:1;
-		uint64_t lckerr2:1;
-		uint64_t lck_intena2:1;
-		uint64_t reserved_28_63:36;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_28_63:36,
+		__BITFIELD_FIELD(uint64_t lck_intena2:1,
+		__BITFIELD_FIELD(uint64_t lckerr2:1,
+		__BITFIELD_FIELD(uint64_t lck_intena:1,
+		__BITFIELD_FIELD(uint64_t lckerr:1,
+		__BITFIELD_FIELD(uint64_t fset:3,
+		__BITFIELD_FIELD(uint64_t reserved_20_20:1,
+		__BITFIELD_FIELD(uint64_t fadr:9,
+		__BITFIELD_FIELD(uint64_t fsyn:6,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;))))))))))))))
 	} cn52xx;
 	struct cvmx_l2t_err_cn52xx cn52xxp1;
 	struct cvmx_l2t_err_s cn56xx;
diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 2530e8731c8a..9742202f2a32 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2008 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -62,7 +62,6 @@ enum cvmx_mips_space {
 #include <asm/octeon/cvmx-iob-defs.h>
 #include <asm/octeon/cvmx-ipd-defs.h>
 #include <asm/octeon/cvmx-l2c-defs.h>
-#include <asm/octeon/cvmx-l2d-defs.h>
 #include <asm/octeon/cvmx-l2t-defs.h>
 #include <asm/octeon/cvmx-led-defs.h>
 #include <asm/octeon/cvmx-mio-defs.h>
-- 
cgit v1.2.3


From 930cbb73ee7d0e396987efccc24112c4469c03dc Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <Steven.Hill@cavium.com>
Date: Thu, 9 Mar 2017 08:15:16 -0600
Subject: MIPS: Octeon: Remove unused SLI types and macros.

Remove all unused bitfields and macros. Convert the remaining
bitfields to use __BITFIELD_FIELD instead of #ifdef.

[ralf@linux-mips.org: Add inclusions of <uapi/asm/bitfield.h> as necessary.]

Signed-off-by: Steven J. Hill <steven.hill@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15405/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/octeon/cvmx-sli-defs.h | 3541 +-------------------------
 arch/mips/pci/pcie-octeon.c                  |    4 +-
 2 files changed, 76 insertions(+), 3469 deletions(-)

diff --git a/arch/mips/include/asm/octeon/cvmx-sli-defs.h b/arch/mips/include/asm/octeon/cvmx-sli-defs.h
index e697c2f52a62..52cf96ea43e5 100644
--- a/arch/mips/include/asm/octeon/cvmx-sli-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-sli-defs.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2012 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -28,3494 +28,101 @@
 #ifndef __CVMX_SLI_DEFS_H__
 #define __CVMX_SLI_DEFS_H__
 
-#define CVMX_SLI_BIST_STATUS (0x0000000000000580ull)
-#define CVMX_SLI_CTL_PORTX(offset) (0x0000000000000050ull + ((offset) & 3) * 16)
-#define CVMX_SLI_CTL_STATUS (0x0000000000000570ull)
-#define CVMX_SLI_DATA_OUT_CNT (0x00000000000005F0ull)
-#define CVMX_SLI_DBG_DATA (0x0000000000000310ull)
-#define CVMX_SLI_DBG_SELECT (0x0000000000000300ull)
-#define CVMX_SLI_DMAX_CNT(offset) (0x0000000000000400ull + ((offset) & 1) * 16)
-#define CVMX_SLI_DMAX_INT_LEVEL(offset) (0x00000000000003E0ull + ((offset) & 1) * 16)
-#define CVMX_SLI_DMAX_TIM(offset) (0x0000000000000420ull + ((offset) & 1) * 16)
-#define CVMX_SLI_INT_ENB_CIU (0x0000000000003CD0ull)
-#define CVMX_SLI_INT_ENB_PORTX(offset) (0x0000000000000340ull + ((offset) & 1) * 16)
-#define CVMX_SLI_INT_SUM (0x0000000000000330ull)
-#define CVMX_SLI_LAST_WIN_RDATA0 (0x0000000000000600ull)
-#define CVMX_SLI_LAST_WIN_RDATA1 (0x0000000000000610ull)
-#define CVMX_SLI_LAST_WIN_RDATA2 (0x00000000000006C0ull)
-#define CVMX_SLI_LAST_WIN_RDATA3 (0x00000000000006D0ull)
-#define CVMX_SLI_MAC_CREDIT_CNT (0x0000000000003D70ull)
-#define CVMX_SLI_MAC_CREDIT_CNT2 (0x0000000000003E10ull)
-#define CVMX_SLI_MAC_NUMBER (0x0000000000003E00ull)
-#define CVMX_SLI_MEM_ACCESS_CTL (0x00000000000002F0ull)
-#define CVMX_SLI_MEM_ACCESS_SUBIDX(offset) (0x00000000000000E0ull + ((offset) & 31) * 16 - 16*12)
-#define CVMX_SLI_MSI_ENB0 (0x0000000000003C50ull)
-#define CVMX_SLI_MSI_ENB1 (0x0000000000003C60ull)
-#define CVMX_SLI_MSI_ENB2 (0x0000000000003C70ull)
-#define CVMX_SLI_MSI_ENB3 (0x0000000000003C80ull)
-#define CVMX_SLI_MSI_RCV0 (0x0000000000003C10ull)
-#define CVMX_SLI_MSI_RCV1 (0x0000000000003C20ull)
-#define CVMX_SLI_MSI_RCV2 (0x0000000000003C30ull)
-#define CVMX_SLI_MSI_RCV3 (0x0000000000003C40ull)
-#define CVMX_SLI_MSI_RD_MAP (0x0000000000003CA0ull)
-#define CVMX_SLI_MSI_W1C_ENB0 (0x0000000000003CF0ull)
-#define CVMX_SLI_MSI_W1C_ENB1 (0x0000000000003D00ull)
-#define CVMX_SLI_MSI_W1C_ENB2 (0x0000000000003D10ull)
-#define CVMX_SLI_MSI_W1C_ENB3 (0x0000000000003D20ull)
-#define CVMX_SLI_MSI_W1S_ENB0 (0x0000000000003D30ull)
-#define CVMX_SLI_MSI_W1S_ENB1 (0x0000000000003D40ull)
-#define CVMX_SLI_MSI_W1S_ENB2 (0x0000000000003D50ull)
-#define CVMX_SLI_MSI_W1S_ENB3 (0x0000000000003D60ull)
-#define CVMX_SLI_MSI_WR_MAP (0x0000000000003C90ull)
-#define CVMX_SLI_PCIE_MSI_RCV (0x0000000000003CB0ull)
-#define CVMX_SLI_PCIE_MSI_RCV_B1 (0x0000000000000650ull)
-#define CVMX_SLI_PCIE_MSI_RCV_B2 (0x0000000000000660ull)
-#define CVMX_SLI_PCIE_MSI_RCV_B3 (0x0000000000000670ull)
-#define CVMX_SLI_PKTX_CNTS(offset) (0x0000000000002400ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_INSTR_BADDR(offset) (0x0000000000002800ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_INSTR_BAOFF_DBELL(offset) (0x0000000000002C00ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_INSTR_FIFO_RSIZE(offset) (0x0000000000003000ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_INSTR_HEADER(offset) (0x0000000000003400ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_IN_BP(offset) (0x0000000000003800ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_OUT_SIZE(offset) (0x0000000000000C00ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_SLIST_BADDR(offset) (0x0000000000001400ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_SLIST_BAOFF_DBELL(offset) (0x0000000000001800ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKTX_SLIST_FIFO_RSIZE(offset) (0x0000000000001C00ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKT_CNT_INT (0x0000000000001130ull)
-#define CVMX_SLI_PKT_CNT_INT_ENB (0x0000000000001150ull)
-#define CVMX_SLI_PKT_CTL (0x0000000000001220ull)
-#define CVMX_SLI_PKT_DATA_OUT_ES (0x00000000000010B0ull)
-#define CVMX_SLI_PKT_DATA_OUT_NS (0x00000000000010A0ull)
-#define CVMX_SLI_PKT_DATA_OUT_ROR (0x0000000000001090ull)
-#define CVMX_SLI_PKT_DPADDR (0x0000000000001080ull)
-#define CVMX_SLI_PKT_INPUT_CONTROL (0x0000000000001170ull)
-#define CVMX_SLI_PKT_INSTR_ENB (0x0000000000001000ull)
-#define CVMX_SLI_PKT_INSTR_RD_SIZE (0x00000000000011A0ull)
-#define CVMX_SLI_PKT_INSTR_SIZE (0x0000000000001020ull)
-#define CVMX_SLI_PKT_INT_LEVELS (0x0000000000001120ull)
-#define CVMX_SLI_PKT_IN_BP (0x0000000000001210ull)
-#define CVMX_SLI_PKT_IN_DONEX_CNTS(offset) (0x0000000000002000ull + ((offset) & 31) * 16)
-#define CVMX_SLI_PKT_IN_INSTR_COUNTS (0x0000000000001200ull)
-#define CVMX_SLI_PKT_IN_PCIE_PORT (0x00000000000011B0ull)
-#define CVMX_SLI_PKT_IPTR (0x0000000000001070ull)
-#define CVMX_SLI_PKT_OUTPUT_WMARK (0x0000000000001180ull)
-#define CVMX_SLI_PKT_OUT_BMODE (0x00000000000010D0ull)
-#define CVMX_SLI_PKT_OUT_BP_EN (0x0000000000001240ull)
-#define CVMX_SLI_PKT_OUT_ENB (0x0000000000001010ull)
-#define CVMX_SLI_PKT_PCIE_PORT (0x00000000000010E0ull)
-#define CVMX_SLI_PKT_PORT_IN_RST (0x00000000000011F0ull)
-#define CVMX_SLI_PKT_SLIST_ES (0x0000000000001050ull)
-#define CVMX_SLI_PKT_SLIST_NS (0x0000000000001040ull)
-#define CVMX_SLI_PKT_SLIST_ROR (0x0000000000001030ull)
-#define CVMX_SLI_PKT_TIME_INT (0x0000000000001140ull)
-#define CVMX_SLI_PKT_TIME_INT_ENB (0x0000000000001160ull)
-#define CVMX_SLI_PORTX_PKIND(offset) (0x0000000000000800ull + ((offset) & 31) * 16)
-#define CVMX_SLI_S2M_PORTX_CTL(offset) (0x0000000000003D80ull + ((offset) & 3) * 16)
-#define CVMX_SLI_SCRATCH_1 (0x00000000000003C0ull)
-#define CVMX_SLI_SCRATCH_2 (0x00000000000003D0ull)
-#define CVMX_SLI_STATE1 (0x0000000000000620ull)
-#define CVMX_SLI_STATE2 (0x0000000000000630ull)
-#define CVMX_SLI_STATE3 (0x0000000000000640ull)
-#define CVMX_SLI_TX_PIPE (0x0000000000001230ull)
-#define CVMX_SLI_WINDOW_CTL (0x00000000000002E0ull)
-#define CVMX_SLI_WIN_RD_ADDR (0x0000000000000010ull)
-#define CVMX_SLI_WIN_RD_DATA (0x0000000000000040ull)
-#define CVMX_SLI_WIN_WR_ADDR (0x0000000000000000ull)
-#define CVMX_SLI_WIN_WR_DATA (0x0000000000000020ull)
-#define CVMX_SLI_WIN_WR_MASK (0x0000000000000030ull)
+#include <uapi/asm/bitfield.h>
+
+#define CVMX_SLI_PCIE_MSI_RCV CVMX_SLI_PCIE_MSI_RCV_FUNC()
+static inline uint64_t CVMX_SLI_PCIE_MSI_RCV_FUNC(void)
+{
+	switch (cvmx_get_octeon_family()) {
+	case OCTEON_CNF71XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN61XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN63XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN66XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN68XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN70XX & OCTEON_FAMILY_MASK:
+		return 0x0000000000003CB0ull;
+	case OCTEON_CNF75XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN73XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN78XX & OCTEON_FAMILY_MASK:
+		if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X))
+			return 0x0000000000003CB0ull;
+	default:
+		return 0x0000000000023CB0ull;
+	}
+}
 
-union cvmx_sli_bist_status {
-	uint64_t u64;
-	struct cvmx_sli_bist_status_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t ncb_req:1;
-		uint64_t n2p0_c:1;
-		uint64_t n2p0_o:1;
-		uint64_t n2p1_c:1;
-		uint64_t n2p1_o:1;
-		uint64_t cpl_p0:1;
-		uint64_t cpl_p1:1;
-		uint64_t reserved_19_24:6;
-		uint64_t p2n0_c0:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_p1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t dsi1_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi0_0:1;
-		uint64_t msi:1;
-		uint64_t ncb_cmd:1;
-#else
-		uint64_t ncb_cmd:1;
-		uint64_t msi:1;
-		uint64_t dsi0_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi1_1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t p2n1_p1:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_c0:1;
-		uint64_t reserved_19_24:6;
-		uint64_t cpl_p1:1;
-		uint64_t cpl_p0:1;
-		uint64_t n2p1_o:1;
-		uint64_t n2p1_c:1;
-		uint64_t n2p0_o:1;
-		uint64_t n2p0_c:1;
-		uint64_t ncb_req:1;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_bist_status_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_31_63:33;
-		uint64_t n2p0_c:1;
-		uint64_t n2p0_o:1;
-		uint64_t reserved_27_28:2;
-		uint64_t cpl_p0:1;
-		uint64_t cpl_p1:1;
-		uint64_t reserved_19_24:6;
-		uint64_t p2n0_c0:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_p1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t dsi1_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi0_0:1;
-		uint64_t msi:1;
-		uint64_t ncb_cmd:1;
-#else
-		uint64_t ncb_cmd:1;
-		uint64_t msi:1;
-		uint64_t dsi0_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi1_1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t p2n1_p1:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_c0:1;
-		uint64_t reserved_19_24:6;
-		uint64_t cpl_p1:1;
-		uint64_t cpl_p0:1;
-		uint64_t reserved_27_28:2;
-		uint64_t n2p0_o:1;
-		uint64_t n2p0_c:1;
-		uint64_t reserved_31_63:33;
-#endif
-	} cn61xx;
-	struct cvmx_sli_bist_status_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_31_63:33;
-		uint64_t n2p0_c:1;
-		uint64_t n2p0_o:1;
-		uint64_t n2p1_c:1;
-		uint64_t n2p1_o:1;
-		uint64_t cpl_p0:1;
-		uint64_t cpl_p1:1;
-		uint64_t reserved_19_24:6;
-		uint64_t p2n0_c0:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_p1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t dsi1_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi0_0:1;
-		uint64_t msi:1;
-		uint64_t ncb_cmd:1;
-#else
-		uint64_t ncb_cmd:1;
-		uint64_t msi:1;
-		uint64_t dsi0_0:1;
-		uint64_t dsi0_1:1;
-		uint64_t dsi1_0:1;
-		uint64_t dsi1_1:1;
-		uint64_t reserved_6_8:3;
-		uint64_t p2n1_p1:1;
-		uint64_t p2n1_p0:1;
-		uint64_t p2n1_n:1;
-		uint64_t p2n1_c1:1;
-		uint64_t p2n1_c0:1;
-		uint64_t p2n0_p1:1;
-		uint64_t p2n0_p0:1;
-		uint64_t p2n0_n:1;
-		uint64_t p2n0_c1:1;
-		uint64_t p2n0_c0:1;
-		uint64_t reserved_19_24:6;
-		uint64_t cpl_p1:1;
-		uint64_t cpl_p0:1;
-		uint64_t n2p1_o:1;
-		uint64_t n2p1_c:1;
-		uint64_t n2p0_o:1;
-		uint64_t n2p0_c:1;
-		uint64_t reserved_31_63:33;
-#endif
-	} cn63xx;
-	struct cvmx_sli_bist_status_cn63xx cn63xxp1;
-	struct cvmx_sli_bist_status_cn61xx cn66xx;
-	struct cvmx_sli_bist_status_s cn68xx;
-	struct cvmx_sli_bist_status_s cn68xxp1;
-	struct cvmx_sli_bist_status_cn61xx cnf71xx;
-};
 
 union cvmx_sli_ctl_portx {
 	uint64_t u64;
 	struct cvmx_sli_ctl_portx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_22_63:42;
-		uint64_t intd:1;
-		uint64_t intc:1;
-		uint64_t intb:1;
-		uint64_t inta:1;
-		uint64_t dis_port:1;
-		uint64_t waitl_com:1;
-		uint64_t intd_map:2;
-		uint64_t intc_map:2;
-		uint64_t intb_map:2;
-		uint64_t inta_map:2;
-		uint64_t ctlp_ro:1;
-		uint64_t reserved_6_6:1;
-		uint64_t ptlp_ro:1;
-		uint64_t reserved_1_4:4;
-		uint64_t wait_com:1;
-#else
-		uint64_t wait_com:1;
-		uint64_t reserved_1_4:4;
-		uint64_t ptlp_ro:1;
-		uint64_t reserved_6_6:1;
-		uint64_t ctlp_ro:1;
-		uint64_t inta_map:2;
-		uint64_t intb_map:2;
-		uint64_t intc_map:2;
-		uint64_t intd_map:2;
-		uint64_t waitl_com:1;
-		uint64_t dis_port:1;
-		uint64_t inta:1;
-		uint64_t intb:1;
-		uint64_t intc:1;
-		uint64_t intd:1;
-		uint64_t reserved_22_63:42;
-#endif
-	} s;
-	struct cvmx_sli_ctl_portx_s cn61xx;
-	struct cvmx_sli_ctl_portx_s cn63xx;
-	struct cvmx_sli_ctl_portx_s cn63xxp1;
-	struct cvmx_sli_ctl_portx_s cn66xx;
-	struct cvmx_sli_ctl_portx_s cn68xx;
-	struct cvmx_sli_ctl_portx_s cn68xxp1;
-	struct cvmx_sli_ctl_portx_s cnf71xx;
-};
-
-union cvmx_sli_ctl_status {
-	uint64_t u64;
-	struct cvmx_sli_ctl_status_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_20_63:44;
-		uint64_t p1_ntags:6;
-		uint64_t p0_ntags:6;
-		uint64_t chip_rev:8;
-#else
-		uint64_t chip_rev:8;
-		uint64_t p0_ntags:6;
-		uint64_t p1_ntags:6;
-		uint64_t reserved_20_63:44;
-#endif
-	} s;
-	struct cvmx_sli_ctl_status_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t p0_ntags:6;
-		uint64_t chip_rev:8;
-#else
-		uint64_t chip_rev:8;
-		uint64_t p0_ntags:6;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn61xx;
-	struct cvmx_sli_ctl_status_s cn63xx;
-	struct cvmx_sli_ctl_status_s cn63xxp1;
-	struct cvmx_sli_ctl_status_cn61xx cn66xx;
-	struct cvmx_sli_ctl_status_s cn68xx;
-	struct cvmx_sli_ctl_status_s cn68xxp1;
-	struct cvmx_sli_ctl_status_cn61xx cnf71xx;
-};
-
-union cvmx_sli_data_out_cnt {
-	uint64_t u64;
-	struct cvmx_sli_data_out_cnt_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_44_63:20;
-		uint64_t p1_ucnt:16;
-		uint64_t p1_fcnt:6;
-		uint64_t p0_ucnt:16;
-		uint64_t p0_fcnt:6;
-#else
-		uint64_t p0_fcnt:6;
-		uint64_t p0_ucnt:16;
-		uint64_t p1_fcnt:6;
-		uint64_t p1_ucnt:16;
-		uint64_t reserved_44_63:20;
-#endif
-	} s;
-	struct cvmx_sli_data_out_cnt_s cn61xx;
-	struct cvmx_sli_data_out_cnt_s cn63xx;
-	struct cvmx_sli_data_out_cnt_s cn63xxp1;
-	struct cvmx_sli_data_out_cnt_s cn66xx;
-	struct cvmx_sli_data_out_cnt_s cn68xx;
-	struct cvmx_sli_data_out_cnt_s cn68xxp1;
-	struct cvmx_sli_data_out_cnt_s cnf71xx;
-};
-
-union cvmx_sli_dbg_data {
-	uint64_t u64;
-	struct cvmx_sli_dbg_data_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_18_63:46;
-		uint64_t dsel_ext:1;
-		uint64_t data:17;
-#else
-		uint64_t data:17;
-		uint64_t dsel_ext:1;
-		uint64_t reserved_18_63:46;
-#endif
-	} s;
-	struct cvmx_sli_dbg_data_s cn61xx;
-	struct cvmx_sli_dbg_data_s cn63xx;
-	struct cvmx_sli_dbg_data_s cn63xxp1;
-	struct cvmx_sli_dbg_data_s cn66xx;
-	struct cvmx_sli_dbg_data_s cn68xx;
-	struct cvmx_sli_dbg_data_s cn68xxp1;
-	struct cvmx_sli_dbg_data_s cnf71xx;
-};
-
-union cvmx_sli_dbg_select {
-	uint64_t u64;
-	struct cvmx_sli_dbg_select_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_33_63:31;
-		uint64_t adbg_sel:1;
-		uint64_t dbg_sel:32;
-#else
-		uint64_t dbg_sel:32;
-		uint64_t adbg_sel:1;
-		uint64_t reserved_33_63:31;
-#endif
-	} s;
-	struct cvmx_sli_dbg_select_s cn61xx;
-	struct cvmx_sli_dbg_select_s cn63xx;
-	struct cvmx_sli_dbg_select_s cn63xxp1;
-	struct cvmx_sli_dbg_select_s cn66xx;
-	struct cvmx_sli_dbg_select_s cn68xx;
-	struct cvmx_sli_dbg_select_s cn68xxp1;
-	struct cvmx_sli_dbg_select_s cnf71xx;
-};
-
-union cvmx_sli_dmax_cnt {
-	uint64_t u64;
-	struct cvmx_sli_dmax_cnt_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_dmax_cnt_s cn61xx;
-	struct cvmx_sli_dmax_cnt_s cn63xx;
-	struct cvmx_sli_dmax_cnt_s cn63xxp1;
-	struct cvmx_sli_dmax_cnt_s cn66xx;
-	struct cvmx_sli_dmax_cnt_s cn68xx;
-	struct cvmx_sli_dmax_cnt_s cn68xxp1;
-	struct cvmx_sli_dmax_cnt_s cnf71xx;
-};
-
-union cvmx_sli_dmax_int_level {
-	uint64_t u64;
-	struct cvmx_sli_dmax_int_level_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t time:32;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t time:32;
-#endif
-	} s;
-	struct cvmx_sli_dmax_int_level_s cn61xx;
-	struct cvmx_sli_dmax_int_level_s cn63xx;
-	struct cvmx_sli_dmax_int_level_s cn63xxp1;
-	struct cvmx_sli_dmax_int_level_s cn66xx;
-	struct cvmx_sli_dmax_int_level_s cn68xx;
-	struct cvmx_sli_dmax_int_level_s cn68xxp1;
-	struct cvmx_sli_dmax_int_level_s cnf71xx;
-};
-
-union cvmx_sli_dmax_tim {
-	uint64_t u64;
-	struct cvmx_sli_dmax_tim_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t tim:32;
-#else
-		uint64_t tim:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_dmax_tim_s cn61xx;
-	struct cvmx_sli_dmax_tim_s cn63xx;
-	struct cvmx_sli_dmax_tim_s cn63xxp1;
-	struct cvmx_sli_dmax_tim_s cn66xx;
-	struct cvmx_sli_dmax_tim_s cn68xx;
-	struct cvmx_sli_dmax_tim_s cn68xxp1;
-	struct cvmx_sli_dmax_tim_s cnf71xx;
-};
-
-union cvmx_sli_int_enb_ciu {
-	uint64_t u64;
-	struct cvmx_sli_int_enb_ciu_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t reserved_18_19:2;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t reserved_18_19:2;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} s;
-	struct cvmx_sli_int_enb_ciu_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t reserved_18_19:2;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t reserved_18_19:2;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn61xx;
-	struct cvmx_sli_int_enb_ciu_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_18_31:14;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t reserved_18_31:14;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn63xx;
-	struct cvmx_sli_int_enb_ciu_cn63xx cn63xxp1;
-	struct cvmx_sli_int_enb_ciu_cn61xx cn66xx;
-	struct cvmx_sli_int_enb_ciu_cn68xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_18_31:14;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t reserved_18_31:14;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} cn68xx;
-	struct cvmx_sli_int_enb_ciu_cn68xx cn68xxp1;
-	struct cvmx_sli_int_enb_ciu_cn61xx cnf71xx;
-};
-
-union cvmx_sli_int_enb_portx {
-	uint64_t u64;
-	struct cvmx_sli_int_enb_portx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} s;
-	struct cvmx_sli_int_enb_portx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn61xx;
-	struct cvmx_sli_int_enb_portx_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_20_31:12;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t reserved_20_31:12;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn63xx;
-	struct cvmx_sli_int_enb_portx_cn63xx cn63xxp1;
-	struct cvmx_sli_int_enb_portx_cn61xx cn66xx;
-	struct cvmx_sli_int_enb_portx_cn68xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_20_31:12;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t reserved_20_31:12;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} cn68xx;
-	struct cvmx_sli_int_enb_portx_cn68xx cn68xxp1;
-	struct cvmx_sli_int_enb_portx_cn61xx cnf71xx;
-};
-
-union cvmx_sli_int_sum {
-	uint64_t u64;
-	struct cvmx_sli_int_sum_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} s;
-	struct cvmx_sli_int_sum_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t sprt3_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_28_31:4;
-		uint64_t m3_un_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_up_b0:1;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t m2_up_b0:1;
-		uint64_t m2_up_wi:1;
-		uint64_t m2_un_b0:1;
-		uint64_t m2_un_wi:1;
-		uint64_t m3_up_b0:1;
-		uint64_t m3_up_wi:1;
-		uint64_t m3_un_b0:1;
-		uint64_t m3_un_wi:1;
-		uint64_t reserved_28_31:4;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t sprt2_err:1;
-		uint64_t sprt3_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn61xx;
-	struct cvmx_sli_int_sum_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_61_63:3;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_20_31:12;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t reserved_20_31:12;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t pin_bp:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t reserved_61_63:3;
-#endif
-	} cn63xx;
-	struct cvmx_sli_int_sum_cn63xx cn63xxp1;
-	struct cvmx_sli_int_sum_cn61xx cn66xx;
-	struct cvmx_sli_int_sum_cn68xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_62_63:2;
-		uint64_t pipe_err:1;
-		uint64_t ill_pad:1;
-		uint64_t reserved_58_59:2;
-		uint64_t sprt1_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t pins_err:1;
-		uint64_t pop_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pgl_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pout_err:1;
-		uint64_t psldbof:1;
-		uint64_t pidbof:1;
-		uint64_t reserved_38_47:10;
-		uint64_t dtime:2;
-		uint64_t dcnt:2;
-		uint64_t dmafi:2;
-		uint64_t reserved_20_31:12;
-		uint64_t mac1_int:1;
-		uint64_t mac0_int:1;
-		uint64_t mio_int1:1;
-		uint64_t mio_int0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_up_b0:1;
-		uint64_t reserved_6_7:2;
-		uint64_t ptime:1;
-		uint64_t pcnt:1;
-		uint64_t iob2big:1;
-		uint64_t bar0_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t rml_to:1;
-#else
-		uint64_t rml_to:1;
-		uint64_t reserved_1_1:1;
-		uint64_t bar0_to:1;
-		uint64_t iob2big:1;
-		uint64_t pcnt:1;
-		uint64_t ptime:1;
-		uint64_t reserved_6_7:2;
-		uint64_t m0_up_b0:1;
-		uint64_t m0_up_wi:1;
-		uint64_t m0_un_b0:1;
-		uint64_t m0_un_wi:1;
-		uint64_t m1_up_b0:1;
-		uint64_t m1_up_wi:1;
-		uint64_t m1_un_b0:1;
-		uint64_t m1_un_wi:1;
-		uint64_t mio_int0:1;
-		uint64_t mio_int1:1;
-		uint64_t mac0_int:1;
-		uint64_t mac1_int:1;
-		uint64_t reserved_20_31:12;
-		uint64_t dmafi:2;
-		uint64_t dcnt:2;
-		uint64_t dtime:2;
-		uint64_t reserved_38_47:10;
-		uint64_t pidbof:1;
-		uint64_t psldbof:1;
-		uint64_t pout_err:1;
-		uint64_t reserved_51_51:1;
-		uint64_t pgl_err:1;
-		uint64_t pdi_err:1;
-		uint64_t pop_err:1;
-		uint64_t pins_err:1;
-		uint64_t sprt0_err:1;
-		uint64_t sprt1_err:1;
-		uint64_t reserved_58_59:2;
-		uint64_t ill_pad:1;
-		uint64_t pipe_err:1;
-		uint64_t reserved_62_63:2;
-#endif
-	} cn68xx;
-	struct cvmx_sli_int_sum_cn68xx cn68xxp1;
-	struct cvmx_sli_int_sum_cn61xx cnf71xx;
-};
-
-union cvmx_sli_last_win_rdata0 {
-	uint64_t u64;
-	struct cvmx_sli_last_win_rdata0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_22_63:42,
+		__BITFIELD_FIELD(uint64_t intd:1,
+		__BITFIELD_FIELD(uint64_t intc:1,
+		__BITFIELD_FIELD(uint64_t intb:1,
+		__BITFIELD_FIELD(uint64_t inta:1,
+		__BITFIELD_FIELD(uint64_t dis_port:1,
+		__BITFIELD_FIELD(uint64_t waitl_com:1,
+		__BITFIELD_FIELD(uint64_t intd_map:2,
+		__BITFIELD_FIELD(uint64_t intc_map:2,
+		__BITFIELD_FIELD(uint64_t intb_map:2,
+		__BITFIELD_FIELD(uint64_t inta_map:2,
+		__BITFIELD_FIELD(uint64_t ctlp_ro:1,
+		__BITFIELD_FIELD(uint64_t reserved_6_6:1,
+		__BITFIELD_FIELD(uint64_t ptlp_ro:1,
+		__BITFIELD_FIELD(uint64_t reserved_1_4:4,
+		__BITFIELD_FIELD(uint64_t wait_com:1,
+		;))))))))))))))))
 	} s;
-	struct cvmx_sli_last_win_rdata0_s cn61xx;
-	struct cvmx_sli_last_win_rdata0_s cn63xx;
-	struct cvmx_sli_last_win_rdata0_s cn63xxp1;
-	struct cvmx_sli_last_win_rdata0_s cn66xx;
-	struct cvmx_sli_last_win_rdata0_s cn68xx;
-	struct cvmx_sli_last_win_rdata0_s cn68xxp1;
-	struct cvmx_sli_last_win_rdata0_s cnf71xx;
-};
-
-union cvmx_sli_last_win_rdata1 {
-	uint64_t u64;
-	struct cvmx_sli_last_win_rdata1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_sli_last_win_rdata1_s cn61xx;
-	struct cvmx_sli_last_win_rdata1_s cn63xx;
-	struct cvmx_sli_last_win_rdata1_s cn63xxp1;
-	struct cvmx_sli_last_win_rdata1_s cn66xx;
-	struct cvmx_sli_last_win_rdata1_s cn68xx;
-	struct cvmx_sli_last_win_rdata1_s cn68xxp1;
-	struct cvmx_sli_last_win_rdata1_s cnf71xx;
-};
-
-union cvmx_sli_last_win_rdata2 {
-	uint64_t u64;
-	struct cvmx_sli_last_win_rdata2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_sli_last_win_rdata2_s cn61xx;
-	struct cvmx_sli_last_win_rdata2_s cn66xx;
-	struct cvmx_sli_last_win_rdata2_s cnf71xx;
-};
-
-union cvmx_sli_last_win_rdata3 {
-	uint64_t u64;
-	struct cvmx_sli_last_win_rdata3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_sli_last_win_rdata3_s cn61xx;
-	struct cvmx_sli_last_win_rdata3_s cn66xx;
-	struct cvmx_sli_last_win_rdata3_s cnf71xx;
-};
-
-union cvmx_sli_mac_credit_cnt {
-	uint64_t u64;
-	struct cvmx_sli_mac_credit_cnt_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_54_63:10;
-		uint64_t p1_c_d:1;
-		uint64_t p1_n_d:1;
-		uint64_t p1_p_d:1;
-		uint64_t p0_c_d:1;
-		uint64_t p0_n_d:1;
-		uint64_t p0_p_d:1;
-		uint64_t p1_ccnt:8;
-		uint64_t p1_ncnt:8;
-		uint64_t p1_pcnt:8;
-		uint64_t p0_ccnt:8;
-		uint64_t p0_ncnt:8;
-		uint64_t p0_pcnt:8;
-#else
-		uint64_t p0_pcnt:8;
-		uint64_t p0_ncnt:8;
-		uint64_t p0_ccnt:8;
-		uint64_t p1_pcnt:8;
-		uint64_t p1_ncnt:8;
-		uint64_t p1_ccnt:8;
-		uint64_t p0_p_d:1;
-		uint64_t p0_n_d:1;
-		uint64_t p0_c_d:1;
-		uint64_t p1_p_d:1;
-		uint64_t p1_n_d:1;
-		uint64_t p1_c_d:1;
-		uint64_t reserved_54_63:10;
-#endif
-	} s;
-	struct cvmx_sli_mac_credit_cnt_s cn61xx;
-	struct cvmx_sli_mac_credit_cnt_s cn63xx;
-	struct cvmx_sli_mac_credit_cnt_cn63xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_48_63:16;
-		uint64_t p1_ccnt:8;
-		uint64_t p1_ncnt:8;
-		uint64_t p1_pcnt:8;
-		uint64_t p0_ccnt:8;
-		uint64_t p0_ncnt:8;
-		uint64_t p0_pcnt:8;
-#else
-		uint64_t p0_pcnt:8;
-		uint64_t p0_ncnt:8;
-		uint64_t p0_ccnt:8;
-		uint64_t p1_pcnt:8;
-		uint64_t p1_ncnt:8;
-		uint64_t p1_ccnt:8;
-		uint64_t reserved_48_63:16;
-#endif
-	} cn63xxp1;
-	struct cvmx_sli_mac_credit_cnt_s cn66xx;
-	struct cvmx_sli_mac_credit_cnt_s cn68xx;
-	struct cvmx_sli_mac_credit_cnt_s cn68xxp1;
-	struct cvmx_sli_mac_credit_cnt_s cnf71xx;
-};
-
-union cvmx_sli_mac_credit_cnt2 {
-	uint64_t u64;
-	struct cvmx_sli_mac_credit_cnt2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_54_63:10;
-		uint64_t p3_c_d:1;
-		uint64_t p3_n_d:1;
-		uint64_t p3_p_d:1;
-		uint64_t p2_c_d:1;
-		uint64_t p2_n_d:1;
-		uint64_t p2_p_d:1;
-		uint64_t p3_ccnt:8;
-		uint64_t p3_ncnt:8;
-		uint64_t p3_pcnt:8;
-		uint64_t p2_ccnt:8;
-		uint64_t p2_ncnt:8;
-		uint64_t p2_pcnt:8;
-#else
-		uint64_t p2_pcnt:8;
-		uint64_t p2_ncnt:8;
-		uint64_t p2_ccnt:8;
-		uint64_t p3_pcnt:8;
-		uint64_t p3_ncnt:8;
-		uint64_t p3_ccnt:8;
-		uint64_t p2_p_d:1;
-		uint64_t p2_n_d:1;
-		uint64_t p2_c_d:1;
-		uint64_t p3_p_d:1;
-		uint64_t p3_n_d:1;
-		uint64_t p3_c_d:1;
-		uint64_t reserved_54_63:10;
-#endif
-	} s;
-	struct cvmx_sli_mac_credit_cnt2_s cn61xx;
-	struct cvmx_sli_mac_credit_cnt2_s cn66xx;
-	struct cvmx_sli_mac_credit_cnt2_s cnf71xx;
-};
-
-union cvmx_sli_mac_number {
-	uint64_t u64;
-	struct cvmx_sli_mac_number_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_9_63:55;
-		uint64_t a_mode:1;
-		uint64_t num:8;
-#else
-		uint64_t num:8;
-		uint64_t a_mode:1;
-		uint64_t reserved_9_63:55;
-#endif
-	} s;
-	struct cvmx_sli_mac_number_s cn61xx;
-	struct cvmx_sli_mac_number_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t num:8;
-#else
-		uint64_t num:8;
-		uint64_t reserved_8_63:56;
-#endif
-	} cn63xx;
-	struct cvmx_sli_mac_number_s cn66xx;
-	struct cvmx_sli_mac_number_cn63xx cn68xx;
-	struct cvmx_sli_mac_number_cn63xx cn68xxp1;
-	struct cvmx_sli_mac_number_s cnf71xx;
 };
 
 union cvmx_sli_mem_access_ctl {
 	uint64_t u64;
 	struct cvmx_sli_mem_access_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t max_word:4;
-		uint64_t timer:10;
-#else
-		uint64_t timer:10;
-		uint64_t max_word:4;
-		uint64_t reserved_14_63:50;
-#endif
-	} s;
-	struct cvmx_sli_mem_access_ctl_s cn61xx;
-	struct cvmx_sli_mem_access_ctl_s cn63xx;
-	struct cvmx_sli_mem_access_ctl_s cn63xxp1;
-	struct cvmx_sli_mem_access_ctl_s cn66xx;
-	struct cvmx_sli_mem_access_ctl_s cn68xx;
-	struct cvmx_sli_mem_access_ctl_s cn68xxp1;
-	struct cvmx_sli_mem_access_ctl_s cnf71xx;
-};
-
-union cvmx_sli_mem_access_subidx {
-	uint64_t u64;
-	struct cvmx_sli_mem_access_subidx_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_43_63:21;
-		uint64_t zero:1;
-		uint64_t port:3;
-		uint64_t nmerge:1;
-		uint64_t esr:2;
-		uint64_t esw:2;
-		uint64_t wtype:2;
-		uint64_t rtype:2;
-		uint64_t reserved_0_29:30;
-#else
-		uint64_t reserved_0_29:30;
-		uint64_t rtype:2;
-		uint64_t wtype:2;
-		uint64_t esw:2;
-		uint64_t esr:2;
-		uint64_t nmerge:1;
-		uint64_t port:3;
-		uint64_t zero:1;
-		uint64_t reserved_43_63:21;
-#endif
-	} s;
-	struct cvmx_sli_mem_access_subidx_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_43_63:21;
-		uint64_t zero:1;
-		uint64_t port:3;
-		uint64_t nmerge:1;
-		uint64_t esr:2;
-		uint64_t esw:2;
-		uint64_t wtype:2;
-		uint64_t rtype:2;
-		uint64_t ba:30;
-#else
-		uint64_t ba:30;
-		uint64_t rtype:2;
-		uint64_t wtype:2;
-		uint64_t esw:2;
-		uint64_t esr:2;
-		uint64_t nmerge:1;
-		uint64_t port:3;
-		uint64_t zero:1;
-		uint64_t reserved_43_63:21;
-#endif
-	} cn61xx;
-	struct cvmx_sli_mem_access_subidx_cn61xx cn63xx;
-	struct cvmx_sli_mem_access_subidx_cn61xx cn63xxp1;
-	struct cvmx_sli_mem_access_subidx_cn61xx cn66xx;
-	struct cvmx_sli_mem_access_subidx_cn68xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_43_63:21;
-		uint64_t zero:1;
-		uint64_t port:3;
-		uint64_t nmerge:1;
-		uint64_t esr:2;
-		uint64_t esw:2;
-		uint64_t wtype:2;
-		uint64_t rtype:2;
-		uint64_t ba:28;
-		uint64_t reserved_0_1:2;
-#else
-		uint64_t reserved_0_1:2;
-		uint64_t ba:28;
-		uint64_t rtype:2;
-		uint64_t wtype:2;
-		uint64_t esw:2;
-		uint64_t esr:2;
-		uint64_t nmerge:1;
-		uint64_t port:3;
-		uint64_t zero:1;
-		uint64_t reserved_43_63:21;
-#endif
-	} cn68xx;
-	struct cvmx_sli_mem_access_subidx_cn68xx cn68xxp1;
-	struct cvmx_sli_mem_access_subidx_cn61xx cnf71xx;
-};
-
-union cvmx_sli_msi_enb0 {
-	uint64_t u64;
-	struct cvmx_sli_msi_enb0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t enb:64;
-#else
-		uint64_t enb:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_enb0_s cn61xx;
-	struct cvmx_sli_msi_enb0_s cn63xx;
-	struct cvmx_sli_msi_enb0_s cn63xxp1;
-	struct cvmx_sli_msi_enb0_s cn66xx;
-	struct cvmx_sli_msi_enb0_s cn68xx;
-	struct cvmx_sli_msi_enb0_s cn68xxp1;
-	struct cvmx_sli_msi_enb0_s cnf71xx;
-};
-
-union cvmx_sli_msi_enb1 {
-	uint64_t u64;
-	struct cvmx_sli_msi_enb1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t enb:64;
-#else
-		uint64_t enb:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_enb1_s cn61xx;
-	struct cvmx_sli_msi_enb1_s cn63xx;
-	struct cvmx_sli_msi_enb1_s cn63xxp1;
-	struct cvmx_sli_msi_enb1_s cn66xx;
-	struct cvmx_sli_msi_enb1_s cn68xx;
-	struct cvmx_sli_msi_enb1_s cn68xxp1;
-	struct cvmx_sli_msi_enb1_s cnf71xx;
-};
-
-union cvmx_sli_msi_enb2 {
-	uint64_t u64;
-	struct cvmx_sli_msi_enb2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t enb:64;
-#else
-		uint64_t enb:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_enb2_s cn61xx;
-	struct cvmx_sli_msi_enb2_s cn63xx;
-	struct cvmx_sli_msi_enb2_s cn63xxp1;
-	struct cvmx_sli_msi_enb2_s cn66xx;
-	struct cvmx_sli_msi_enb2_s cn68xx;
-	struct cvmx_sli_msi_enb2_s cn68xxp1;
-	struct cvmx_sli_msi_enb2_s cnf71xx;
-};
-
-union cvmx_sli_msi_enb3 {
-	uint64_t u64;
-	struct cvmx_sli_msi_enb3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t enb:64;
-#else
-		uint64_t enb:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_enb3_s cn61xx;
-	struct cvmx_sli_msi_enb3_s cn63xx;
-	struct cvmx_sli_msi_enb3_s cn63xxp1;
-	struct cvmx_sli_msi_enb3_s cn66xx;
-	struct cvmx_sli_msi_enb3_s cn68xx;
-	struct cvmx_sli_msi_enb3_s cn68xxp1;
-	struct cvmx_sli_msi_enb3_s cnf71xx;
-};
-
-union cvmx_sli_msi_rcv0 {
-	uint64_t u64;
-	struct cvmx_sli_msi_rcv0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t intr:64;
-#else
-		uint64_t intr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_rcv0_s cn61xx;
-	struct cvmx_sli_msi_rcv0_s cn63xx;
-	struct cvmx_sli_msi_rcv0_s cn63xxp1;
-	struct cvmx_sli_msi_rcv0_s cn66xx;
-	struct cvmx_sli_msi_rcv0_s cn68xx;
-	struct cvmx_sli_msi_rcv0_s cn68xxp1;
-	struct cvmx_sli_msi_rcv0_s cnf71xx;
-};
-
-union cvmx_sli_msi_rcv1 {
-	uint64_t u64;
-	struct cvmx_sli_msi_rcv1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t intr:64;
-#else
-		uint64_t intr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_rcv1_s cn61xx;
-	struct cvmx_sli_msi_rcv1_s cn63xx;
-	struct cvmx_sli_msi_rcv1_s cn63xxp1;
-	struct cvmx_sli_msi_rcv1_s cn66xx;
-	struct cvmx_sli_msi_rcv1_s cn68xx;
-	struct cvmx_sli_msi_rcv1_s cn68xxp1;
-	struct cvmx_sli_msi_rcv1_s cnf71xx;
-};
-
-union cvmx_sli_msi_rcv2 {
-	uint64_t u64;
-	struct cvmx_sli_msi_rcv2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t intr:64;
-#else
-		uint64_t intr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_rcv2_s cn61xx;
-	struct cvmx_sli_msi_rcv2_s cn63xx;
-	struct cvmx_sli_msi_rcv2_s cn63xxp1;
-	struct cvmx_sli_msi_rcv2_s cn66xx;
-	struct cvmx_sli_msi_rcv2_s cn68xx;
-	struct cvmx_sli_msi_rcv2_s cn68xxp1;
-	struct cvmx_sli_msi_rcv2_s cnf71xx;
-};
-
-union cvmx_sli_msi_rcv3 {
-	uint64_t u64;
-	struct cvmx_sli_msi_rcv3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t intr:64;
-#else
-		uint64_t intr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_rcv3_s cn61xx;
-	struct cvmx_sli_msi_rcv3_s cn63xx;
-	struct cvmx_sli_msi_rcv3_s cn63xxp1;
-	struct cvmx_sli_msi_rcv3_s cn66xx;
-	struct cvmx_sli_msi_rcv3_s cn68xx;
-	struct cvmx_sli_msi_rcv3_s cn68xxp1;
-	struct cvmx_sli_msi_rcv3_s cnf71xx;
-};
-
-union cvmx_sli_msi_rd_map {
-	uint64_t u64;
-	struct cvmx_sli_msi_rd_map_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t rd_int:8;
-		uint64_t msi_int:8;
-#else
-		uint64_t msi_int:8;
-		uint64_t rd_int:8;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_sli_msi_rd_map_s cn61xx;
-	struct cvmx_sli_msi_rd_map_s cn63xx;
-	struct cvmx_sli_msi_rd_map_s cn63xxp1;
-	struct cvmx_sli_msi_rd_map_s cn66xx;
-	struct cvmx_sli_msi_rd_map_s cn68xx;
-	struct cvmx_sli_msi_rd_map_s cn68xxp1;
-	struct cvmx_sli_msi_rd_map_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1c_enb0 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1c_enb0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t clr:64;
-#else
-		uint64_t clr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1c_enb0_s cn61xx;
-	struct cvmx_sli_msi_w1c_enb0_s cn63xx;
-	struct cvmx_sli_msi_w1c_enb0_s cn63xxp1;
-	struct cvmx_sli_msi_w1c_enb0_s cn66xx;
-	struct cvmx_sli_msi_w1c_enb0_s cn68xx;
-	struct cvmx_sli_msi_w1c_enb0_s cn68xxp1;
-	struct cvmx_sli_msi_w1c_enb0_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1c_enb1 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1c_enb1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t clr:64;
-#else
-		uint64_t clr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1c_enb1_s cn61xx;
-	struct cvmx_sli_msi_w1c_enb1_s cn63xx;
-	struct cvmx_sli_msi_w1c_enb1_s cn63xxp1;
-	struct cvmx_sli_msi_w1c_enb1_s cn66xx;
-	struct cvmx_sli_msi_w1c_enb1_s cn68xx;
-	struct cvmx_sli_msi_w1c_enb1_s cn68xxp1;
-	struct cvmx_sli_msi_w1c_enb1_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1c_enb2 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1c_enb2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t clr:64;
-#else
-		uint64_t clr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1c_enb2_s cn61xx;
-	struct cvmx_sli_msi_w1c_enb2_s cn63xx;
-	struct cvmx_sli_msi_w1c_enb2_s cn63xxp1;
-	struct cvmx_sli_msi_w1c_enb2_s cn66xx;
-	struct cvmx_sli_msi_w1c_enb2_s cn68xx;
-	struct cvmx_sli_msi_w1c_enb2_s cn68xxp1;
-	struct cvmx_sli_msi_w1c_enb2_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1c_enb3 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1c_enb3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t clr:64;
-#else
-		uint64_t clr:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1c_enb3_s cn61xx;
-	struct cvmx_sli_msi_w1c_enb3_s cn63xx;
-	struct cvmx_sli_msi_w1c_enb3_s cn63xxp1;
-	struct cvmx_sli_msi_w1c_enb3_s cn66xx;
-	struct cvmx_sli_msi_w1c_enb3_s cn68xx;
-	struct cvmx_sli_msi_w1c_enb3_s cn68xxp1;
-	struct cvmx_sli_msi_w1c_enb3_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1s_enb0 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1s_enb0_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t set:64;
-#else
-		uint64_t set:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1s_enb0_s cn61xx;
-	struct cvmx_sli_msi_w1s_enb0_s cn63xx;
-	struct cvmx_sli_msi_w1s_enb0_s cn63xxp1;
-	struct cvmx_sli_msi_w1s_enb0_s cn66xx;
-	struct cvmx_sli_msi_w1s_enb0_s cn68xx;
-	struct cvmx_sli_msi_w1s_enb0_s cn68xxp1;
-	struct cvmx_sli_msi_w1s_enb0_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1s_enb1 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1s_enb1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t set:64;
-#else
-		uint64_t set:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1s_enb1_s cn61xx;
-	struct cvmx_sli_msi_w1s_enb1_s cn63xx;
-	struct cvmx_sli_msi_w1s_enb1_s cn63xxp1;
-	struct cvmx_sli_msi_w1s_enb1_s cn66xx;
-	struct cvmx_sli_msi_w1s_enb1_s cn68xx;
-	struct cvmx_sli_msi_w1s_enb1_s cn68xxp1;
-	struct cvmx_sli_msi_w1s_enb1_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1s_enb2 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1s_enb2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t set:64;
-#else
-		uint64_t set:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1s_enb2_s cn61xx;
-	struct cvmx_sli_msi_w1s_enb2_s cn63xx;
-	struct cvmx_sli_msi_w1s_enb2_s cn63xxp1;
-	struct cvmx_sli_msi_w1s_enb2_s cn66xx;
-	struct cvmx_sli_msi_w1s_enb2_s cn68xx;
-	struct cvmx_sli_msi_w1s_enb2_s cn68xxp1;
-	struct cvmx_sli_msi_w1s_enb2_s cnf71xx;
-};
-
-union cvmx_sli_msi_w1s_enb3 {
-	uint64_t u64;
-	struct cvmx_sli_msi_w1s_enb3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t set:64;
-#else
-		uint64_t set:64;
-#endif
-	} s;
-	struct cvmx_sli_msi_w1s_enb3_s cn61xx;
-	struct cvmx_sli_msi_w1s_enb3_s cn63xx;
-	struct cvmx_sli_msi_w1s_enb3_s cn63xxp1;
-	struct cvmx_sli_msi_w1s_enb3_s cn66xx;
-	struct cvmx_sli_msi_w1s_enb3_s cn68xx;
-	struct cvmx_sli_msi_w1s_enb3_s cn68xxp1;
-	struct cvmx_sli_msi_w1s_enb3_s cnf71xx;
-};
-
-union cvmx_sli_msi_wr_map {
-	uint64_t u64;
-	struct cvmx_sli_msi_wr_map_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t ciu_int:8;
-		uint64_t msi_int:8;
-#else
-		uint64_t msi_int:8;
-		uint64_t ciu_int:8;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_sli_msi_wr_map_s cn61xx;
-	struct cvmx_sli_msi_wr_map_s cn63xx;
-	struct cvmx_sli_msi_wr_map_s cn63xxp1;
-	struct cvmx_sli_msi_wr_map_s cn66xx;
-	struct cvmx_sli_msi_wr_map_s cn68xx;
-	struct cvmx_sli_msi_wr_map_s cn68xxp1;
-	struct cvmx_sli_msi_wr_map_s cnf71xx;
-};
-
-union cvmx_sli_pcie_msi_rcv {
-	uint64_t u64;
-	struct cvmx_sli_pcie_msi_rcv_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t intr:8;
-#else
-		uint64_t intr:8;
-		uint64_t reserved_8_63:56;
-#endif
-	} s;
-	struct cvmx_sli_pcie_msi_rcv_s cn61xx;
-	struct cvmx_sli_pcie_msi_rcv_s cn63xx;
-	struct cvmx_sli_pcie_msi_rcv_s cn63xxp1;
-	struct cvmx_sli_pcie_msi_rcv_s cn66xx;
-	struct cvmx_sli_pcie_msi_rcv_s cn68xx;
-	struct cvmx_sli_pcie_msi_rcv_s cn68xxp1;
-	struct cvmx_sli_pcie_msi_rcv_s cnf71xx;
-};
-
-union cvmx_sli_pcie_msi_rcv_b1 {
-	uint64_t u64;
-	struct cvmx_sli_pcie_msi_rcv_b1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_16_63:48;
-		uint64_t intr:8;
-		uint64_t reserved_0_7:8;
-#else
-		uint64_t reserved_0_7:8;
-		uint64_t intr:8;
-		uint64_t reserved_16_63:48;
-#endif
-	} s;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn61xx;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn63xx;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn63xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn66xx;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn68xx;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cn68xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b1_s cnf71xx;
-};
-
-union cvmx_sli_pcie_msi_rcv_b2 {
-	uint64_t u64;
-	struct cvmx_sli_pcie_msi_rcv_b2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t intr:8;
-		uint64_t reserved_0_15:16;
-#else
-		uint64_t reserved_0_15:16;
-		uint64_t intr:8;
-		uint64_t reserved_24_63:40;
-#endif
-	} s;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn61xx;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn63xx;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn63xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn66xx;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn68xx;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cn68xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b2_s cnf71xx;
-};
-
-union cvmx_sli_pcie_msi_rcv_b3 {
-	uint64_t u64;
-	struct cvmx_sli_pcie_msi_rcv_b3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t intr:8;
-		uint64_t reserved_0_23:24;
-#else
-		uint64_t reserved_0_23:24;
-		uint64_t intr:8;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn61xx;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn63xx;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn63xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn66xx;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn68xx;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cn68xxp1;
-	struct cvmx_sli_pcie_msi_rcv_b3_s cnf71xx;
-};
-
-union cvmx_sli_pktx_cnts {
-	uint64_t u64;
-	struct cvmx_sli_pktx_cnts_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_54_63:10;
-		uint64_t timer:22;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t timer:22;
-		uint64_t reserved_54_63:10;
-#endif
-	} s;
-	struct cvmx_sli_pktx_cnts_s cn61xx;
-	struct cvmx_sli_pktx_cnts_s cn63xx;
-	struct cvmx_sli_pktx_cnts_s cn63xxp1;
-	struct cvmx_sli_pktx_cnts_s cn66xx;
-	struct cvmx_sli_pktx_cnts_s cn68xx;
-	struct cvmx_sli_pktx_cnts_s cn68xxp1;
-	struct cvmx_sli_pktx_cnts_s cnf71xx;
-};
-
-union cvmx_sli_pktx_in_bp {
-	uint64_t u64;
-	struct cvmx_sli_pktx_in_bp_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t wmark:32;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t wmark:32;
-#endif
-	} s;
-	struct cvmx_sli_pktx_in_bp_s cn61xx;
-	struct cvmx_sli_pktx_in_bp_s cn63xx;
-	struct cvmx_sli_pktx_in_bp_s cn63xxp1;
-	struct cvmx_sli_pktx_in_bp_s cn66xx;
-	struct cvmx_sli_pktx_in_bp_s cnf71xx;
-};
-
-union cvmx_sli_pktx_instr_baddr {
-	uint64_t u64;
-	struct cvmx_sli_pktx_instr_baddr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t addr:61;
-		uint64_t reserved_0_2:3;
-#else
-		uint64_t reserved_0_2:3;
-		uint64_t addr:61;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_14_63:50,
+		__BITFIELD_FIELD(uint64_t max_word:4,
+		__BITFIELD_FIELD(uint64_t timer:10,
+		;)))
 	} s;
-	struct cvmx_sli_pktx_instr_baddr_s cn61xx;
-	struct cvmx_sli_pktx_instr_baddr_s cn63xx;
-	struct cvmx_sli_pktx_instr_baddr_s cn63xxp1;
-	struct cvmx_sli_pktx_instr_baddr_s cn66xx;
-	struct cvmx_sli_pktx_instr_baddr_s cn68xx;
-	struct cvmx_sli_pktx_instr_baddr_s cn68xxp1;
-	struct cvmx_sli_pktx_instr_baddr_s cnf71xx;
-};
-
-union cvmx_sli_pktx_instr_baoff_dbell {
-	uint64_t u64;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t aoff:32;
-		uint64_t dbell:32;
-#else
-		uint64_t dbell:32;
-		uint64_t aoff:32;
-#endif
-	} s;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn61xx;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn63xx;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn63xxp1;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn66xx;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn68xx;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cn68xxp1;
-	struct cvmx_sli_pktx_instr_baoff_dbell_s cnf71xx;
-};
-
-union cvmx_sli_pktx_instr_fifo_rsize {
-	uint64_t u64;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t max:9;
-		uint64_t rrp:9;
-		uint64_t wrp:9;
-		uint64_t fcnt:5;
-		uint64_t rsize:32;
-#else
-		uint64_t rsize:32;
-		uint64_t fcnt:5;
-		uint64_t wrp:9;
-		uint64_t rrp:9;
-		uint64_t max:9;
-#endif
-	} s;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn61xx;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn63xx;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn63xxp1;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn66xx;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn68xx;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cn68xxp1;
-	struct cvmx_sli_pktx_instr_fifo_rsize_s cnf71xx;
-};
-
-union cvmx_sli_pktx_instr_header {
-	uint64_t u64;
-	struct cvmx_sli_pktx_instr_header_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_44_63:20;
-		uint64_t pbp:1;
-		uint64_t reserved_38_42:5;
-		uint64_t rparmode:2;
-		uint64_t reserved_35_35:1;
-		uint64_t rskp_len:7;
-		uint64_t rngrpext:2;
-		uint64_t rnqos:1;
-		uint64_t rngrp:1;
-		uint64_t rntt:1;
-		uint64_t rntag:1;
-		uint64_t use_ihdr:1;
-		uint64_t reserved_16_20:5;
-		uint64_t par_mode:2;
-		uint64_t reserved_13_13:1;
-		uint64_t skp_len:7;
-		uint64_t ngrpext:2;
-		uint64_t nqos:1;
-		uint64_t ngrp:1;
-		uint64_t ntt:1;
-		uint64_t ntag:1;
-#else
-		uint64_t ntag:1;
-		uint64_t ntt:1;
-		uint64_t ngrp:1;
-		uint64_t nqos:1;
-		uint64_t ngrpext:2;
-		uint64_t skp_len:7;
-		uint64_t reserved_13_13:1;
-		uint64_t par_mode:2;
-		uint64_t reserved_16_20:5;
-		uint64_t use_ihdr:1;
-		uint64_t rntag:1;
-		uint64_t rntt:1;
-		uint64_t rngrp:1;
-		uint64_t rnqos:1;
-		uint64_t rngrpext:2;
-		uint64_t rskp_len:7;
-		uint64_t reserved_35_35:1;
-		uint64_t rparmode:2;
-		uint64_t reserved_38_42:5;
-		uint64_t pbp:1;
-		uint64_t reserved_44_63:20;
-#endif
-	} s;
-	struct cvmx_sli_pktx_instr_header_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_44_63:20;
-		uint64_t pbp:1;
-		uint64_t reserved_38_42:5;
-		uint64_t rparmode:2;
-		uint64_t reserved_35_35:1;
-		uint64_t rskp_len:7;
-		uint64_t reserved_26_27:2;
-		uint64_t rnqos:1;
-		uint64_t rngrp:1;
-		uint64_t rntt:1;
-		uint64_t rntag:1;
-		uint64_t use_ihdr:1;
-		uint64_t reserved_16_20:5;
-		uint64_t par_mode:2;
-		uint64_t reserved_13_13:1;
-		uint64_t skp_len:7;
-		uint64_t reserved_4_5:2;
-		uint64_t nqos:1;
-		uint64_t ngrp:1;
-		uint64_t ntt:1;
-		uint64_t ntag:1;
-#else
-		uint64_t ntag:1;
-		uint64_t ntt:1;
-		uint64_t ngrp:1;
-		uint64_t nqos:1;
-		uint64_t reserved_4_5:2;
-		uint64_t skp_len:7;
-		uint64_t reserved_13_13:1;
-		uint64_t par_mode:2;
-		uint64_t reserved_16_20:5;
-		uint64_t use_ihdr:1;
-		uint64_t rntag:1;
-		uint64_t rntt:1;
-		uint64_t rngrp:1;
-		uint64_t rnqos:1;
-		uint64_t reserved_26_27:2;
-		uint64_t rskp_len:7;
-		uint64_t reserved_35_35:1;
-		uint64_t rparmode:2;
-		uint64_t reserved_38_42:5;
-		uint64_t pbp:1;
-		uint64_t reserved_44_63:20;
-#endif
-	} cn61xx;
-	struct cvmx_sli_pktx_instr_header_cn61xx cn63xx;
-	struct cvmx_sli_pktx_instr_header_cn61xx cn63xxp1;
-	struct cvmx_sli_pktx_instr_header_cn61xx cn66xx;
-	struct cvmx_sli_pktx_instr_header_s cn68xx;
-	struct cvmx_sli_pktx_instr_header_cn61xx cn68xxp1;
-	struct cvmx_sli_pktx_instr_header_cn61xx cnf71xx;
-};
-
-union cvmx_sli_pktx_out_size {
-	uint64_t u64;
-	struct cvmx_sli_pktx_out_size_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_23_63:41;
-		uint64_t isize:7;
-		uint64_t bsize:16;
-#else
-		uint64_t bsize:16;
-		uint64_t isize:7;
-		uint64_t reserved_23_63:41;
-#endif
-	} s;
-	struct cvmx_sli_pktx_out_size_s cn61xx;
-	struct cvmx_sli_pktx_out_size_s cn63xx;
-	struct cvmx_sli_pktx_out_size_s cn63xxp1;
-	struct cvmx_sli_pktx_out_size_s cn66xx;
-	struct cvmx_sli_pktx_out_size_s cn68xx;
-	struct cvmx_sli_pktx_out_size_s cn68xxp1;
-	struct cvmx_sli_pktx_out_size_s cnf71xx;
-};
-
-union cvmx_sli_pktx_slist_baddr {
-	uint64_t u64;
-	struct cvmx_sli_pktx_slist_baddr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t addr:60;
-		uint64_t reserved_0_3:4;
-#else
-		uint64_t reserved_0_3:4;
-		uint64_t addr:60;
-#endif
-	} s;
-	struct cvmx_sli_pktx_slist_baddr_s cn61xx;
-	struct cvmx_sli_pktx_slist_baddr_s cn63xx;
-	struct cvmx_sli_pktx_slist_baddr_s cn63xxp1;
-	struct cvmx_sli_pktx_slist_baddr_s cn66xx;
-	struct cvmx_sli_pktx_slist_baddr_s cn68xx;
-	struct cvmx_sli_pktx_slist_baddr_s cn68xxp1;
-	struct cvmx_sli_pktx_slist_baddr_s cnf71xx;
-};
-
-union cvmx_sli_pktx_slist_baoff_dbell {
-	uint64_t u64;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t aoff:32;
-		uint64_t dbell:32;
-#else
-		uint64_t dbell:32;
-		uint64_t aoff:32;
-#endif
-	} s;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn61xx;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn63xx;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn63xxp1;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn66xx;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn68xx;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cn68xxp1;
-	struct cvmx_sli_pktx_slist_baoff_dbell_s cnf71xx;
-};
-
-union cvmx_sli_pktx_slist_fifo_rsize {
-	uint64_t u64;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t rsize:32;
-#else
-		uint64_t rsize:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn61xx;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn63xx;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn63xxp1;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn66xx;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn68xx;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cn68xxp1;
-	struct cvmx_sli_pktx_slist_fifo_rsize_s cnf71xx;
-};
-
-union cvmx_sli_pkt_cnt_int {
-	uint64_t u64;
-	struct cvmx_sli_pkt_cnt_int_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t port:32;
-#else
-		uint64_t port:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_cnt_int_s cn61xx;
-	struct cvmx_sli_pkt_cnt_int_s cn63xx;
-	struct cvmx_sli_pkt_cnt_int_s cn63xxp1;
-	struct cvmx_sli_pkt_cnt_int_s cn66xx;
-	struct cvmx_sli_pkt_cnt_int_s cn68xx;
-	struct cvmx_sli_pkt_cnt_int_s cn68xxp1;
-	struct cvmx_sli_pkt_cnt_int_s cnf71xx;
-};
-
-union cvmx_sli_pkt_cnt_int_enb {
-	uint64_t u64;
-	struct cvmx_sli_pkt_cnt_int_enb_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t port:32;
-#else
-		uint64_t port:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn61xx;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn63xx;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn63xxp1;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn66xx;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn68xx;
-	struct cvmx_sli_pkt_cnt_int_enb_s cn68xxp1;
-	struct cvmx_sli_pkt_cnt_int_enb_s cnf71xx;
-};
-
-union cvmx_sli_pkt_ctl {
-	uint64_t u64;
-	struct cvmx_sli_pkt_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_5_63:59;
-		uint64_t ring_en:1;
-		uint64_t pkt_bp:4;
-#else
-		uint64_t pkt_bp:4;
-		uint64_t ring_en:1;
-		uint64_t reserved_5_63:59;
-#endif
-	} s;
-	struct cvmx_sli_pkt_ctl_s cn61xx;
-	struct cvmx_sli_pkt_ctl_s cn63xx;
-	struct cvmx_sli_pkt_ctl_s cn63xxp1;
-	struct cvmx_sli_pkt_ctl_s cn66xx;
-	struct cvmx_sli_pkt_ctl_s cn68xx;
-	struct cvmx_sli_pkt_ctl_s cn68xxp1;
-	struct cvmx_sli_pkt_ctl_s cnf71xx;
-};
-
-union cvmx_sli_pkt_data_out_es {
-	uint64_t u64;
-	struct cvmx_sli_pkt_data_out_es_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t es:64;
-#else
-		uint64_t es:64;
-#endif
-	} s;
-	struct cvmx_sli_pkt_data_out_es_s cn61xx;
-	struct cvmx_sli_pkt_data_out_es_s cn63xx;
-	struct cvmx_sli_pkt_data_out_es_s cn63xxp1;
-	struct cvmx_sli_pkt_data_out_es_s cn66xx;
-	struct cvmx_sli_pkt_data_out_es_s cn68xx;
-	struct cvmx_sli_pkt_data_out_es_s cn68xxp1;
-	struct cvmx_sli_pkt_data_out_es_s cnf71xx;
-};
-
-union cvmx_sli_pkt_data_out_ns {
-	uint64_t u64;
-	struct cvmx_sli_pkt_data_out_ns_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t nsr:32;
-#else
-		uint64_t nsr:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_data_out_ns_s cn61xx;
-	struct cvmx_sli_pkt_data_out_ns_s cn63xx;
-	struct cvmx_sli_pkt_data_out_ns_s cn63xxp1;
-	struct cvmx_sli_pkt_data_out_ns_s cn66xx;
-	struct cvmx_sli_pkt_data_out_ns_s cn68xx;
-	struct cvmx_sli_pkt_data_out_ns_s cn68xxp1;
-	struct cvmx_sli_pkt_data_out_ns_s cnf71xx;
-};
-
-union cvmx_sli_pkt_data_out_ror {
-	uint64_t u64;
-	struct cvmx_sli_pkt_data_out_ror_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t ror:32;
-#else
-		uint64_t ror:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_data_out_ror_s cn61xx;
-	struct cvmx_sli_pkt_data_out_ror_s cn63xx;
-	struct cvmx_sli_pkt_data_out_ror_s cn63xxp1;
-	struct cvmx_sli_pkt_data_out_ror_s cn66xx;
-	struct cvmx_sli_pkt_data_out_ror_s cn68xx;
-	struct cvmx_sli_pkt_data_out_ror_s cn68xxp1;
-	struct cvmx_sli_pkt_data_out_ror_s cnf71xx;
-};
-
-union cvmx_sli_pkt_dpaddr {
-	uint64_t u64;
-	struct cvmx_sli_pkt_dpaddr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t dptr:32;
-#else
-		uint64_t dptr:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_dpaddr_s cn61xx;
-	struct cvmx_sli_pkt_dpaddr_s cn63xx;
-	struct cvmx_sli_pkt_dpaddr_s cn63xxp1;
-	struct cvmx_sli_pkt_dpaddr_s cn66xx;
-	struct cvmx_sli_pkt_dpaddr_s cn68xx;
-	struct cvmx_sli_pkt_dpaddr_s cn68xxp1;
-	struct cvmx_sli_pkt_dpaddr_s cnf71xx;
-};
-
-union cvmx_sli_pkt_in_bp {
-	uint64_t u64;
-	struct cvmx_sli_pkt_in_bp_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t bp:32;
-#else
-		uint64_t bp:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_in_bp_s cn61xx;
-	struct cvmx_sli_pkt_in_bp_s cn63xx;
-	struct cvmx_sli_pkt_in_bp_s cn63xxp1;
-	struct cvmx_sli_pkt_in_bp_s cn66xx;
-	struct cvmx_sli_pkt_in_bp_s cnf71xx;
-};
-
-union cvmx_sli_pkt_in_donex_cnts {
-	uint64_t u64;
-	struct cvmx_sli_pkt_in_donex_cnts_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn61xx;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn63xx;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn63xxp1;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn66xx;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn68xx;
-	struct cvmx_sli_pkt_in_donex_cnts_s cn68xxp1;
-	struct cvmx_sli_pkt_in_donex_cnts_s cnf71xx;
-};
-
-union cvmx_sli_pkt_in_instr_counts {
-	uint64_t u64;
-	struct cvmx_sli_pkt_in_instr_counts_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t wr_cnt:32;
-		uint64_t rd_cnt:32;
-#else
-		uint64_t rd_cnt:32;
-		uint64_t wr_cnt:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_in_instr_counts_s cn61xx;
-	struct cvmx_sli_pkt_in_instr_counts_s cn63xx;
-	struct cvmx_sli_pkt_in_instr_counts_s cn63xxp1;
-	struct cvmx_sli_pkt_in_instr_counts_s cn66xx;
-	struct cvmx_sli_pkt_in_instr_counts_s cn68xx;
-	struct cvmx_sli_pkt_in_instr_counts_s cn68xxp1;
-	struct cvmx_sli_pkt_in_instr_counts_s cnf71xx;
-};
-
-union cvmx_sli_pkt_in_pcie_port {
-	uint64_t u64;
-	struct cvmx_sli_pkt_in_pcie_port_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t pp:64;
-#else
-		uint64_t pp:64;
-#endif
-	} s;
-	struct cvmx_sli_pkt_in_pcie_port_s cn61xx;
-	struct cvmx_sli_pkt_in_pcie_port_s cn63xx;
-	struct cvmx_sli_pkt_in_pcie_port_s cn63xxp1;
-	struct cvmx_sli_pkt_in_pcie_port_s cn66xx;
-	struct cvmx_sli_pkt_in_pcie_port_s cn68xx;
-	struct cvmx_sli_pkt_in_pcie_port_s cn68xxp1;
-	struct cvmx_sli_pkt_in_pcie_port_s cnf71xx;
-};
-
-union cvmx_sli_pkt_input_control {
-	uint64_t u64;
-	struct cvmx_sli_pkt_input_control_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t prd_erst:1;
-		uint64_t prd_rds:7;
-		uint64_t gii_erst:1;
-		uint64_t gii_rds:7;
-		uint64_t reserved_41_47:7;
-		uint64_t prc_idle:1;
-		uint64_t reserved_24_39:16;
-		uint64_t pin_rst:1;
-		uint64_t pkt_rr:1;
-		uint64_t pbp_dhi:13;
-		uint64_t d_nsr:1;
-		uint64_t d_esr:2;
-		uint64_t d_ror:1;
-		uint64_t use_csr:1;
-		uint64_t nsr:1;
-		uint64_t esr:2;
-		uint64_t ror:1;
-#else
-		uint64_t ror:1;
-		uint64_t esr:2;
-		uint64_t nsr:1;
-		uint64_t use_csr:1;
-		uint64_t d_ror:1;
-		uint64_t d_esr:2;
-		uint64_t d_nsr:1;
-		uint64_t pbp_dhi:13;
-		uint64_t pkt_rr:1;
-		uint64_t pin_rst:1;
-		uint64_t reserved_24_39:16;
-		uint64_t prc_idle:1;
-		uint64_t reserved_41_47:7;
-		uint64_t gii_rds:7;
-		uint64_t gii_erst:1;
-		uint64_t prd_rds:7;
-		uint64_t prd_erst:1;
-#endif
-	} s;
-	struct cvmx_sli_pkt_input_control_s cn61xx;
-	struct cvmx_sli_pkt_input_control_cn63xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_23_63:41;
-		uint64_t pkt_rr:1;
-		uint64_t pbp_dhi:13;
-		uint64_t d_nsr:1;
-		uint64_t d_esr:2;
-		uint64_t d_ror:1;
-		uint64_t use_csr:1;
-		uint64_t nsr:1;
-		uint64_t esr:2;
-		uint64_t ror:1;
-#else
-		uint64_t ror:1;
-		uint64_t esr:2;
-		uint64_t nsr:1;
-		uint64_t use_csr:1;
-		uint64_t d_ror:1;
-		uint64_t d_esr:2;
-		uint64_t d_nsr:1;
-		uint64_t pbp_dhi:13;
-		uint64_t pkt_rr:1;
-		uint64_t reserved_23_63:41;
-#endif
-	} cn63xx;
-	struct cvmx_sli_pkt_input_control_cn63xx cn63xxp1;
-	struct cvmx_sli_pkt_input_control_s cn66xx;
-	struct cvmx_sli_pkt_input_control_s cn68xx;
-	struct cvmx_sli_pkt_input_control_s cn68xxp1;
-	struct cvmx_sli_pkt_input_control_s cnf71xx;
-};
-
-union cvmx_sli_pkt_instr_enb {
-	uint64_t u64;
-	struct cvmx_sli_pkt_instr_enb_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t enb:32;
-#else
-		uint64_t enb:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_instr_enb_s cn61xx;
-	struct cvmx_sli_pkt_instr_enb_s cn63xx;
-	struct cvmx_sli_pkt_instr_enb_s cn63xxp1;
-	struct cvmx_sli_pkt_instr_enb_s cn66xx;
-	struct cvmx_sli_pkt_instr_enb_s cn68xx;
-	struct cvmx_sli_pkt_instr_enb_s cn68xxp1;
-	struct cvmx_sli_pkt_instr_enb_s cnf71xx;
-};
-
-union cvmx_sli_pkt_instr_rd_size {
-	uint64_t u64;
-	struct cvmx_sli_pkt_instr_rd_size_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t rdsize:64;
-#else
-		uint64_t rdsize:64;
-#endif
-	} s;
-	struct cvmx_sli_pkt_instr_rd_size_s cn61xx;
-	struct cvmx_sli_pkt_instr_rd_size_s cn63xx;
-	struct cvmx_sli_pkt_instr_rd_size_s cn63xxp1;
-	struct cvmx_sli_pkt_instr_rd_size_s cn66xx;
-	struct cvmx_sli_pkt_instr_rd_size_s cn68xx;
-	struct cvmx_sli_pkt_instr_rd_size_s cn68xxp1;
-	struct cvmx_sli_pkt_instr_rd_size_s cnf71xx;
-};
-
-union cvmx_sli_pkt_instr_size {
-	uint64_t u64;
-	struct cvmx_sli_pkt_instr_size_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t is_64b:32;
-#else
-		uint64_t is_64b:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_instr_size_s cn61xx;
-	struct cvmx_sli_pkt_instr_size_s cn63xx;
-	struct cvmx_sli_pkt_instr_size_s cn63xxp1;
-	struct cvmx_sli_pkt_instr_size_s cn66xx;
-	struct cvmx_sli_pkt_instr_size_s cn68xx;
-	struct cvmx_sli_pkt_instr_size_s cn68xxp1;
-	struct cvmx_sli_pkt_instr_size_s cnf71xx;
-};
-
-union cvmx_sli_pkt_int_levels {
-	uint64_t u64;
-	struct cvmx_sli_pkt_int_levels_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_54_63:10;
-		uint64_t time:22;
-		uint64_t cnt:32;
-#else
-		uint64_t cnt:32;
-		uint64_t time:22;
-		uint64_t reserved_54_63:10;
-#endif
-	} s;
-	struct cvmx_sli_pkt_int_levels_s cn61xx;
-	struct cvmx_sli_pkt_int_levels_s cn63xx;
-	struct cvmx_sli_pkt_int_levels_s cn63xxp1;
-	struct cvmx_sli_pkt_int_levels_s cn66xx;
-	struct cvmx_sli_pkt_int_levels_s cn68xx;
-	struct cvmx_sli_pkt_int_levels_s cn68xxp1;
-	struct cvmx_sli_pkt_int_levels_s cnf71xx;
-};
-
-union cvmx_sli_pkt_iptr {
-	uint64_t u64;
-	struct cvmx_sli_pkt_iptr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t iptr:32;
-#else
-		uint64_t iptr:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_iptr_s cn61xx;
-	struct cvmx_sli_pkt_iptr_s cn63xx;
-	struct cvmx_sli_pkt_iptr_s cn63xxp1;
-	struct cvmx_sli_pkt_iptr_s cn66xx;
-	struct cvmx_sli_pkt_iptr_s cn68xx;
-	struct cvmx_sli_pkt_iptr_s cn68xxp1;
-	struct cvmx_sli_pkt_iptr_s cnf71xx;
-};
-
-union cvmx_sli_pkt_out_bmode {
-	uint64_t u64;
-	struct cvmx_sli_pkt_out_bmode_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t bmode:32;
-#else
-		uint64_t bmode:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_out_bmode_s cn61xx;
-	struct cvmx_sli_pkt_out_bmode_s cn63xx;
-	struct cvmx_sli_pkt_out_bmode_s cn63xxp1;
-	struct cvmx_sli_pkt_out_bmode_s cn66xx;
-	struct cvmx_sli_pkt_out_bmode_s cn68xx;
-	struct cvmx_sli_pkt_out_bmode_s cn68xxp1;
-	struct cvmx_sli_pkt_out_bmode_s cnf71xx;
-};
-
-union cvmx_sli_pkt_out_bp_en {
-	uint64_t u64;
-	struct cvmx_sli_pkt_out_bp_en_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t bp_en:32;
-#else
-		uint64_t bp_en:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_out_bp_en_s cn68xx;
-	struct cvmx_sli_pkt_out_bp_en_s cn68xxp1;
-};
-
-union cvmx_sli_pkt_out_enb {
-	uint64_t u64;
-	struct cvmx_sli_pkt_out_enb_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t enb:32;
-#else
-		uint64_t enb:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_out_enb_s cn61xx;
-	struct cvmx_sli_pkt_out_enb_s cn63xx;
-	struct cvmx_sli_pkt_out_enb_s cn63xxp1;
-	struct cvmx_sli_pkt_out_enb_s cn66xx;
-	struct cvmx_sli_pkt_out_enb_s cn68xx;
-	struct cvmx_sli_pkt_out_enb_s cn68xxp1;
-	struct cvmx_sli_pkt_out_enb_s cnf71xx;
-};
-
-union cvmx_sli_pkt_output_wmark {
-	uint64_t u64;
-	struct cvmx_sli_pkt_output_wmark_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t wmark:32;
-#else
-		uint64_t wmark:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_output_wmark_s cn61xx;
-	struct cvmx_sli_pkt_output_wmark_s cn63xx;
-	struct cvmx_sli_pkt_output_wmark_s cn63xxp1;
-	struct cvmx_sli_pkt_output_wmark_s cn66xx;
-	struct cvmx_sli_pkt_output_wmark_s cn68xx;
-	struct cvmx_sli_pkt_output_wmark_s cn68xxp1;
-	struct cvmx_sli_pkt_output_wmark_s cnf71xx;
-};
-
-union cvmx_sli_pkt_pcie_port {
-	uint64_t u64;
-	struct cvmx_sli_pkt_pcie_port_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t pp:64;
-#else
-		uint64_t pp:64;
-#endif
-	} s;
-	struct cvmx_sli_pkt_pcie_port_s cn61xx;
-	struct cvmx_sli_pkt_pcie_port_s cn63xx;
-	struct cvmx_sli_pkt_pcie_port_s cn63xxp1;
-	struct cvmx_sli_pkt_pcie_port_s cn66xx;
-	struct cvmx_sli_pkt_pcie_port_s cn68xx;
-	struct cvmx_sli_pkt_pcie_port_s cn68xxp1;
-	struct cvmx_sli_pkt_pcie_port_s cnf71xx;
-};
-
-union cvmx_sli_pkt_port_in_rst {
-	uint64_t u64;
-	struct cvmx_sli_pkt_port_in_rst_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t in_rst:32;
-		uint64_t out_rst:32;
-#else
-		uint64_t out_rst:32;
-		uint64_t in_rst:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_port_in_rst_s cn61xx;
-	struct cvmx_sli_pkt_port_in_rst_s cn63xx;
-	struct cvmx_sli_pkt_port_in_rst_s cn63xxp1;
-	struct cvmx_sli_pkt_port_in_rst_s cn66xx;
-	struct cvmx_sli_pkt_port_in_rst_s cn68xx;
-	struct cvmx_sli_pkt_port_in_rst_s cn68xxp1;
-	struct cvmx_sli_pkt_port_in_rst_s cnf71xx;
-};
-
-union cvmx_sli_pkt_slist_es {
-	uint64_t u64;
-	struct cvmx_sli_pkt_slist_es_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t es:64;
-#else
-		uint64_t es:64;
-#endif
-	} s;
-	struct cvmx_sli_pkt_slist_es_s cn61xx;
-	struct cvmx_sli_pkt_slist_es_s cn63xx;
-	struct cvmx_sli_pkt_slist_es_s cn63xxp1;
-	struct cvmx_sli_pkt_slist_es_s cn66xx;
-	struct cvmx_sli_pkt_slist_es_s cn68xx;
-	struct cvmx_sli_pkt_slist_es_s cn68xxp1;
-	struct cvmx_sli_pkt_slist_es_s cnf71xx;
-};
-
-union cvmx_sli_pkt_slist_ns {
-	uint64_t u64;
-	struct cvmx_sli_pkt_slist_ns_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t nsr:32;
-#else
-		uint64_t nsr:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_slist_ns_s cn61xx;
-	struct cvmx_sli_pkt_slist_ns_s cn63xx;
-	struct cvmx_sli_pkt_slist_ns_s cn63xxp1;
-	struct cvmx_sli_pkt_slist_ns_s cn66xx;
-	struct cvmx_sli_pkt_slist_ns_s cn68xx;
-	struct cvmx_sli_pkt_slist_ns_s cn68xxp1;
-	struct cvmx_sli_pkt_slist_ns_s cnf71xx;
-};
-
-union cvmx_sli_pkt_slist_ror {
-	uint64_t u64;
-	struct cvmx_sli_pkt_slist_ror_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t ror:32;
-#else
-		uint64_t ror:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_slist_ror_s cn61xx;
-	struct cvmx_sli_pkt_slist_ror_s cn63xx;
-	struct cvmx_sli_pkt_slist_ror_s cn63xxp1;
-	struct cvmx_sli_pkt_slist_ror_s cn66xx;
-	struct cvmx_sli_pkt_slist_ror_s cn68xx;
-	struct cvmx_sli_pkt_slist_ror_s cn68xxp1;
-	struct cvmx_sli_pkt_slist_ror_s cnf71xx;
-};
-
-union cvmx_sli_pkt_time_int {
-	uint64_t u64;
-	struct cvmx_sli_pkt_time_int_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t port:32;
-#else
-		uint64_t port:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_time_int_s cn61xx;
-	struct cvmx_sli_pkt_time_int_s cn63xx;
-	struct cvmx_sli_pkt_time_int_s cn63xxp1;
-	struct cvmx_sli_pkt_time_int_s cn66xx;
-	struct cvmx_sli_pkt_time_int_s cn68xx;
-	struct cvmx_sli_pkt_time_int_s cn68xxp1;
-	struct cvmx_sli_pkt_time_int_s cnf71xx;
-};
-
-union cvmx_sli_pkt_time_int_enb {
-	uint64_t u64;
-	struct cvmx_sli_pkt_time_int_enb_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t port:32;
-#else
-		uint64_t port:32;
-		uint64_t reserved_32_63:32;
-#endif
-	} s;
-	struct cvmx_sli_pkt_time_int_enb_s cn61xx;
-	struct cvmx_sli_pkt_time_int_enb_s cn63xx;
-	struct cvmx_sli_pkt_time_int_enb_s cn63xxp1;
-	struct cvmx_sli_pkt_time_int_enb_s cn66xx;
-	struct cvmx_sli_pkt_time_int_enb_s cn68xx;
-	struct cvmx_sli_pkt_time_int_enb_s cn68xxp1;
-	struct cvmx_sli_pkt_time_int_enb_s cnf71xx;
-};
-
-union cvmx_sli_portx_pkind {
-	uint64_t u64;
-	struct cvmx_sli_portx_pkind_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_25_63:39;
-		uint64_t rpk_enb:1;
-		uint64_t reserved_22_23:2;
-		uint64_t pkindr:6;
-		uint64_t reserved_14_15:2;
-		uint64_t bpkind:6;
-		uint64_t reserved_6_7:2;
-		uint64_t pkind:6;
-#else
-		uint64_t pkind:6;
-		uint64_t reserved_6_7:2;
-		uint64_t bpkind:6;
-		uint64_t reserved_14_15:2;
-		uint64_t pkindr:6;
-		uint64_t reserved_22_23:2;
-		uint64_t rpk_enb:1;
-		uint64_t reserved_25_63:39;
-#endif
-	} s;
-	struct cvmx_sli_portx_pkind_s cn68xx;
-	struct cvmx_sli_portx_pkind_cn68xxp1 {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_14_63:50;
-		uint64_t bpkind:6;
-		uint64_t reserved_6_7:2;
-		uint64_t pkind:6;
-#else
-		uint64_t pkind:6;
-		uint64_t reserved_6_7:2;
-		uint64_t bpkind:6;
-		uint64_t reserved_14_63:50;
-#endif
-	} cn68xxp1;
 };
 
 union cvmx_sli_s2m_portx_ctl {
 	uint64_t u64;
 	struct cvmx_sli_s2m_portx_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_5_63:59;
-		uint64_t wind_d:1;
-		uint64_t bar0_d:1;
-		uint64_t mrrs:3;
-#else
-		uint64_t mrrs:3;
-		uint64_t bar0_d:1;
-		uint64_t wind_d:1;
-		uint64_t reserved_5_63:59;
-#endif
-	} s;
-	struct cvmx_sli_s2m_portx_ctl_s cn61xx;
-	struct cvmx_sli_s2m_portx_ctl_s cn63xx;
-	struct cvmx_sli_s2m_portx_ctl_s cn63xxp1;
-	struct cvmx_sli_s2m_portx_ctl_s cn66xx;
-	struct cvmx_sli_s2m_portx_ctl_s cn68xx;
-	struct cvmx_sli_s2m_portx_ctl_s cn68xxp1;
-	struct cvmx_sli_s2m_portx_ctl_s cnf71xx;
-};
-
-union cvmx_sli_scratch_1 {
-	uint64_t u64;
-	struct cvmx_sli_scratch_1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_sli_scratch_1_s cn61xx;
-	struct cvmx_sli_scratch_1_s cn63xx;
-	struct cvmx_sli_scratch_1_s cn63xxp1;
-	struct cvmx_sli_scratch_1_s cn66xx;
-	struct cvmx_sli_scratch_1_s cn68xx;
-	struct cvmx_sli_scratch_1_s cn68xxp1;
-	struct cvmx_sli_scratch_1_s cnf71xx;
-};
-
-union cvmx_sli_scratch_2 {
-	uint64_t u64;
-	struct cvmx_sli_scratch_2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t data:64;
-#else
-		uint64_t data:64;
-#endif
-	} s;
-	struct cvmx_sli_scratch_2_s cn61xx;
-	struct cvmx_sli_scratch_2_s cn63xx;
-	struct cvmx_sli_scratch_2_s cn63xxp1;
-	struct cvmx_sli_scratch_2_s cn66xx;
-	struct cvmx_sli_scratch_2_s cn68xx;
-	struct cvmx_sli_scratch_2_s cn68xxp1;
-	struct cvmx_sli_scratch_2_s cnf71xx;
-};
-
-union cvmx_sli_state1 {
-	uint64_t u64;
-	struct cvmx_sli_state1_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t cpl1:12;
-		uint64_t cpl0:12;
-		uint64_t arb:1;
-		uint64_t csr:39;
-#else
-		uint64_t csr:39;
-		uint64_t arb:1;
-		uint64_t cpl0:12;
-		uint64_t cpl1:12;
-#endif
-	} s;
-	struct cvmx_sli_state1_s cn61xx;
-	struct cvmx_sli_state1_s cn63xx;
-	struct cvmx_sli_state1_s cn63xxp1;
-	struct cvmx_sli_state1_s cn66xx;
-	struct cvmx_sli_state1_s cn68xx;
-	struct cvmx_sli_state1_s cn68xxp1;
-	struct cvmx_sli_state1_s cnf71xx;
-};
-
-union cvmx_sli_state2 {
-	uint64_t u64;
-	struct cvmx_sli_state2_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_56_63:8;
-		uint64_t nnp1:8;
-		uint64_t reserved_47_47:1;
-		uint64_t rac:1;
-		uint64_t csm1:15;
-		uint64_t csm0:15;
-		uint64_t nnp0:8;
-		uint64_t nnd:8;
-#else
-		uint64_t nnd:8;
-		uint64_t nnp0:8;
-		uint64_t csm0:15;
-		uint64_t csm1:15;
-		uint64_t rac:1;
-		uint64_t reserved_47_47:1;
-		uint64_t nnp1:8;
-		uint64_t reserved_56_63:8;
-#endif
-	} s;
-	struct cvmx_sli_state2_s cn61xx;
-	struct cvmx_sli_state2_s cn63xx;
-	struct cvmx_sli_state2_s cn63xxp1;
-	struct cvmx_sli_state2_s cn66xx;
-	struct cvmx_sli_state2_s cn68xx;
-	struct cvmx_sli_state2_s cn68xxp1;
-	struct cvmx_sli_state2_s cnf71xx;
-};
-
-union cvmx_sli_state3 {
-	uint64_t u64;
-	struct cvmx_sli_state3_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_56_63:8;
-		uint64_t psm1:15;
-		uint64_t psm0:15;
-		uint64_t nsm1:13;
-		uint64_t nsm0:13;
-#else
-		uint64_t nsm0:13;
-		uint64_t nsm1:13;
-		uint64_t psm0:15;
-		uint64_t psm1:15;
-		uint64_t reserved_56_63:8;
-#endif
-	} s;
-	struct cvmx_sli_state3_s cn61xx;
-	struct cvmx_sli_state3_s cn63xx;
-	struct cvmx_sli_state3_s cn63xxp1;
-	struct cvmx_sli_state3_s cn66xx;
-	struct cvmx_sli_state3_s cn68xx;
-	struct cvmx_sli_state3_s cn68xxp1;
-	struct cvmx_sli_state3_s cnf71xx;
-};
-
-union cvmx_sli_tx_pipe {
-	uint64_t u64;
-	struct cvmx_sli_tx_pipe_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_24_63:40;
-		uint64_t nump:8;
-		uint64_t reserved_7_15:9;
-		uint64_t base:7;
-#else
-		uint64_t base:7;
-		uint64_t reserved_7_15:9;
-		uint64_t nump:8;
-		uint64_t reserved_24_63:40;
-#endif
+		__BITFIELD_FIELD(uint64_t reserved_5_63:59,
+		__BITFIELD_FIELD(uint64_t wind_d:1,
+		__BITFIELD_FIELD(uint64_t bar0_d:1,
+		__BITFIELD_FIELD(uint64_t mrrs:3,
+		;))))
 	} s;
-	struct cvmx_sli_tx_pipe_s cn68xx;
-	struct cvmx_sli_tx_pipe_s cn68xxp1;
 };
 
-union cvmx_sli_win_rd_addr {
-	uint64_t u64;
-	struct cvmx_sli_win_rd_addr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_51_63:13;
-		uint64_t ld_cmd:2;
-		uint64_t iobit:1;
-		uint64_t rd_addr:48;
-#else
-		uint64_t rd_addr:48;
-		uint64_t iobit:1;
-		uint64_t ld_cmd:2;
-		uint64_t reserved_51_63:13;
-#endif
-	} s;
-	struct cvmx_sli_win_rd_addr_s cn61xx;
-	struct cvmx_sli_win_rd_addr_s cn63xx;
-	struct cvmx_sli_win_rd_addr_s cn63xxp1;
-	struct cvmx_sli_win_rd_addr_s cn66xx;
-	struct cvmx_sli_win_rd_addr_s cn68xx;
-	struct cvmx_sli_win_rd_addr_s cn68xxp1;
-	struct cvmx_sli_win_rd_addr_s cnf71xx;
-};
-
-union cvmx_sli_win_rd_data {
-	uint64_t u64;
-	struct cvmx_sli_win_rd_data_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t rd_data:64;
-#else
-		uint64_t rd_data:64;
-#endif
-	} s;
-	struct cvmx_sli_win_rd_data_s cn61xx;
-	struct cvmx_sli_win_rd_data_s cn63xx;
-	struct cvmx_sli_win_rd_data_s cn63xxp1;
-	struct cvmx_sli_win_rd_data_s cn66xx;
-	struct cvmx_sli_win_rd_data_s cn68xx;
-	struct cvmx_sli_win_rd_data_s cn68xxp1;
-	struct cvmx_sli_win_rd_data_s cnf71xx;
-};
-
-union cvmx_sli_win_wr_addr {
-	uint64_t u64;
-	struct cvmx_sli_win_wr_addr_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_49_63:15;
-		uint64_t iobit:1;
-		uint64_t wr_addr:45;
-		uint64_t reserved_0_2:3;
-#else
-		uint64_t reserved_0_2:3;
-		uint64_t wr_addr:45;
-		uint64_t iobit:1;
-		uint64_t reserved_49_63:15;
-#endif
-	} s;
-	struct cvmx_sli_win_wr_addr_s cn61xx;
-	struct cvmx_sli_win_wr_addr_s cn63xx;
-	struct cvmx_sli_win_wr_addr_s cn63xxp1;
-	struct cvmx_sli_win_wr_addr_s cn66xx;
-	struct cvmx_sli_win_wr_addr_s cn68xx;
-	struct cvmx_sli_win_wr_addr_s cn68xxp1;
-	struct cvmx_sli_win_wr_addr_s cnf71xx;
-};
-
-union cvmx_sli_win_wr_data {
-	uint64_t u64;
-	struct cvmx_sli_win_wr_data_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t wr_data:64;
-#else
-		uint64_t wr_data:64;
-#endif
-	} s;
-	struct cvmx_sli_win_wr_data_s cn61xx;
-	struct cvmx_sli_win_wr_data_s cn63xx;
-	struct cvmx_sli_win_wr_data_s cn63xxp1;
-	struct cvmx_sli_win_wr_data_s cn66xx;
-	struct cvmx_sli_win_wr_data_s cn68xx;
-	struct cvmx_sli_win_wr_data_s cn68xxp1;
-	struct cvmx_sli_win_wr_data_s cnf71xx;
-};
-
-union cvmx_sli_win_wr_mask {
-	uint64_t u64;
-	struct cvmx_sli_win_wr_mask_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_8_63:56;
-		uint64_t wr_mask:8;
-#else
-		uint64_t wr_mask:8;
-		uint64_t reserved_8_63:56;
-#endif
-	} s;
-	struct cvmx_sli_win_wr_mask_s cn61xx;
-	struct cvmx_sli_win_wr_mask_s cn63xx;
-	struct cvmx_sli_win_wr_mask_s cn63xxp1;
-	struct cvmx_sli_win_wr_mask_s cn66xx;
-	struct cvmx_sli_win_wr_mask_s cn68xx;
-	struct cvmx_sli_win_wr_mask_s cn68xxp1;
-	struct cvmx_sli_win_wr_mask_s cnf71xx;
-};
-
-union cvmx_sli_window_ctl {
+union cvmx_sli_mem_access_subidx {
 	uint64_t u64;
-	struct cvmx_sli_window_ctl_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_32_63:32;
-		uint64_t time:32;
-#else
-		uint64_t time:32;
-		uint64_t reserved_32_63:32;
-#endif
+	struct cvmx_sli_mem_access_subidx_s {
+		__BITFIELD_FIELD(uint64_t reserved_43_63:21,
+		__BITFIELD_FIELD(uint64_t zero:1,
+		__BITFIELD_FIELD(uint64_t port:3,
+		__BITFIELD_FIELD(uint64_t nmerge:1,
+		__BITFIELD_FIELD(uint64_t esr:2,
+		__BITFIELD_FIELD(uint64_t esw:2,
+		__BITFIELD_FIELD(uint64_t wtype:2,
+		__BITFIELD_FIELD(uint64_t rtype:2,
+		__BITFIELD_FIELD(uint64_t ba:30,
+		;)))))))))
 	} s;
-	struct cvmx_sli_window_ctl_s cn61xx;
-	struct cvmx_sli_window_ctl_s cn63xx;
-	struct cvmx_sli_window_ctl_s cn63xxp1;
-	struct cvmx_sli_window_ctl_s cn66xx;
-	struct cvmx_sli_window_ctl_s cn68xx;
-	struct cvmx_sli_window_ctl_s cn68xxp1;
-	struct cvmx_sli_window_ctl_s cnf71xx;
+	struct cvmx_sli_mem_access_subidx_cn68xx {
+		__BITFIELD_FIELD(uint64_t reserved_43_63:21,
+		__BITFIELD_FIELD(uint64_t zero:1,
+		__BITFIELD_FIELD(uint64_t port:3,
+		__BITFIELD_FIELD(uint64_t nmerge:1,
+		__BITFIELD_FIELD(uint64_t esr:2,
+		__BITFIELD_FIELD(uint64_t esw:2,
+		__BITFIELD_FIELD(uint64_t wtype:2,
+		__BITFIELD_FIELD(uint64_t rtype:2,
+		__BITFIELD_FIELD(uint64_t ba:28,
+		__BITFIELD_FIELD(uint64_t reserved_0_1:2,
+		;))))))))))
+	} cn68xx;
 };
 
 #endif
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c
index 9f672ceb089b..ad3584dbc9d7 100644
--- a/arch/mips/pci/pcie-octeon.c
+++ b/arch/mips/pci/pcie-octeon.c
@@ -679,7 +679,7 @@ static void __cvmx_increment_ba(union cvmx_sli_mem_access_subidx *pmas)
 	if (OCTEON_IS_MODEL(OCTEON_CN68XX))
 		pmas->cn68xx.ba++;
 	else
-		pmas->cn63xx.ba++;
+		pmas->s.ba++;
 }
 
 /**
@@ -1351,7 +1351,7 @@ static int __cvmx_pcie_rc_initialize_gen2(int pcie_port)
 	if (OCTEON_IS_MODEL(OCTEON_CN68XX))
 		mem_access_subid.cn68xx.ba = 0;
 	else
-		mem_access_subid.cn63xx.ba = 0;
+		mem_access_subid.s.ba = 0;
 
 	/*
 	 * Setup mem access 12-15 for port 0, 16-19 for port 1,
-- 
cgit v1.2.3


From 7fd57ab9d9cff7ef0181c1e5db0ebd63cd514d6a Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <Steven.Hill@cavium.com>
Date: Thu, 9 Mar 2017 08:15:38 -0600
Subject: MIPS: Octeon: Fix compile error when USB is not enabled.

Move all USB platform code to one place within the file.

Signed-off-by: Steven J. Hill <Steven.Hill@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15406/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/octeon-platform.c | 97 ++++++++++++++++---------------
 1 file changed, 49 insertions(+), 48 deletions(-)

diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index 3375e61daa19..e3c77d8a0d56 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2004-2016 Cavium Networks
+ * Copyright (C) 2004-2017 Cavium, Inc.
  * Copyright (C) 2008 Wind River Systems
  */
 
@@ -13,61 +13,19 @@
 #include <linux/of_platform.h>
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
-#include <linux/usb/ehci_def.h>
-#include <linux/usb/ehci_pdriver.h>
-#include <linux/usb/ohci_pdriver.h>
 
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/cvmx-helper-board.h>
+
+#ifdef CONFIG_USB
+#include <linux/usb/ehci_def.h>
+#include <linux/usb/ehci_pdriver.h>
+#include <linux/usb/ohci_pdriver.h>
 #include <asm/octeon/cvmx-uctlx-defs.h>
 
 #define CVMX_UAHCX_EHCI_USBCMD	(CVMX_ADD_IO_SEG(0x00016F0000000010ull))
 #define CVMX_UAHCX_OHCI_USBCMD	(CVMX_ADD_IO_SEG(0x00016F0000000408ull))
 
-/* Octeon Random Number Generator.  */
-static int __init octeon_rng_device_init(void)
-{
-	struct platform_device *pd;
-	int ret = 0;
-
-	struct resource rng_resources[] = {
-		{
-			.flags	= IORESOURCE_MEM,
-			.start	= XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS),
-			.end	= XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS) + 0xf
-		}, {
-			.flags	= IORESOURCE_MEM,
-			.start	= cvmx_build_io_address(8, 0),
-			.end	= cvmx_build_io_address(8, 0) + 0x7
-		}
-	};
-
-	pd = platform_device_alloc("octeon_rng", -1);
-	if (!pd) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = platform_device_add_resources(pd, rng_resources,
-					    ARRAY_SIZE(rng_resources));
-	if (ret)
-		goto fail;
-
-	ret = platform_device_add(pd);
-	if (ret)
-		goto fail;
-
-	return ret;
-fail:
-	platform_device_put(pd);
-
-out:
-	return ret;
-}
-device_initcall(octeon_rng_device_init);
-
-#ifdef CONFIG_USB
-
 static DEFINE_MUTEX(octeon2_usb_clocks_mutex);
 
 static int octeon2_usb_clock_start_cnt;
@@ -440,6 +398,47 @@ device_initcall(octeon_ohci_device_init);
 
 #endif /* CONFIG_USB */
 
+/* Octeon Random Number Generator.  */
+static int __init octeon_rng_device_init(void)
+{
+	struct platform_device *pd;
+	int ret = 0;
+
+	struct resource rng_resources[] = {
+		{
+			.flags	= IORESOURCE_MEM,
+			.start	= XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS),
+			.end	= XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS) + 0xf
+		}, {
+			.flags	= IORESOURCE_MEM,
+			.start	= cvmx_build_io_address(8, 0),
+			.end	= cvmx_build_io_address(8, 0) + 0x7
+		}
+	};
+
+	pd = platform_device_alloc("octeon_rng", -1);
+	if (!pd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = platform_device_add_resources(pd, rng_resources,
+					    ARRAY_SIZE(rng_resources));
+	if (ret)
+		goto fail;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		goto fail;
+
+	return ret;
+fail:
+	platform_device_put(pd);
+
+out:
+	return ret;
+}
+device_initcall(octeon_rng_device_init);
 
 static struct of_device_id __initdata octeon_ids[] = {
 	{ .compatible = "simple-bus", },
@@ -1004,6 +1003,7 @@ end_led:
 		;
 	}
 
+#ifdef CONFIG_USB
 	/* OHCI/UHCI USB */
 	alias_prop = fdt_getprop(initial_boot_params, aliases,
 				 "uctl", NULL);
@@ -1052,6 +1052,7 @@ end_led:
 			}
 		}
 	}
+#endif
 
 	return 0;
 }
-- 
cgit v1.2.3


From 97b702981da0aa042007cd5b23a0f3904d470ec3 Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <Steven.Hill@cavium.com>
Date: Thu, 9 Mar 2017 08:31:30 -0600
Subject: MIPS: Octeon: Remove unused PCIERCX types and macros.

Remove all unused bitfields and macros. Convert the remaining
bitfields to use __BITFIELD_FIELD instead of #ifdef.

[ralf@linux-mips.org: Add inclusions of <uapi/asm/bitfield.h> as necessary.]

Signed-off-by: Steven J. Hill <steven.hill@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15408/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/octeon/cvmx-pciercx-defs.h | 3225 ++--------------------
 1 file changed, 209 insertions(+), 3016 deletions(-)

diff --git a/arch/mips/include/asm/octeon/cvmx-pciercx-defs.h b/arch/mips/include/asm/octeon/cvmx-pciercx-defs.h
index 4bce393391e2..e2dce1acf029 100644
--- a/arch/mips/include/asm/octeon/cvmx-pciercx-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-pciercx-defs.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2012 Cavium Networks
+ * Copyright (c) 2003-2017 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -28,3148 +28,341 @@
 #ifndef __CVMX_PCIERCX_DEFS_H__
 #define __CVMX_PCIERCX_DEFS_H__
 
-#define CVMX_PCIERCX_CFG000(block_id) (0x0000000000000000ull)
+#include <uapi/asm/bitfield.h>
+
 #define CVMX_PCIERCX_CFG001(block_id) (0x0000000000000004ull)
-#define CVMX_PCIERCX_CFG002(block_id) (0x0000000000000008ull)
-#define CVMX_PCIERCX_CFG003(block_id) (0x000000000000000Cull)
-#define CVMX_PCIERCX_CFG004(block_id) (0x0000000000000010ull)
-#define CVMX_PCIERCX_CFG005(block_id) (0x0000000000000014ull)
 #define CVMX_PCIERCX_CFG006(block_id) (0x0000000000000018ull)
-#define CVMX_PCIERCX_CFG007(block_id) (0x000000000000001Cull)
 #define CVMX_PCIERCX_CFG008(block_id) (0x0000000000000020ull)
 #define CVMX_PCIERCX_CFG009(block_id) (0x0000000000000024ull)
 #define CVMX_PCIERCX_CFG010(block_id) (0x0000000000000028ull)
 #define CVMX_PCIERCX_CFG011(block_id) (0x000000000000002Cull)
-#define CVMX_PCIERCX_CFG012(block_id) (0x0000000000000030ull)
-#define CVMX_PCIERCX_CFG013(block_id) (0x0000000000000034ull)
-#define CVMX_PCIERCX_CFG014(block_id) (0x0000000000000038ull)
-#define CVMX_PCIERCX_CFG015(block_id) (0x000000000000003Cull)
-#define CVMX_PCIERCX_CFG016(block_id) (0x0000000000000040ull)
-#define CVMX_PCIERCX_CFG017(block_id) (0x0000000000000044ull)
-#define CVMX_PCIERCX_CFG020(block_id) (0x0000000000000050ull)
-#define CVMX_PCIERCX_CFG021(block_id) (0x0000000000000054ull)
-#define CVMX_PCIERCX_CFG022(block_id) (0x0000000000000058ull)
-#define CVMX_PCIERCX_CFG023(block_id) (0x000000000000005Cull)
-#define CVMX_PCIERCX_CFG028(block_id) (0x0000000000000070ull)
-#define CVMX_PCIERCX_CFG029(block_id) (0x0000000000000074ull)
 #define CVMX_PCIERCX_CFG030(block_id) (0x0000000000000078ull)
 #define CVMX_PCIERCX_CFG031(block_id) (0x000000000000007Cull)
 #define CVMX_PCIERCX_CFG032(block_id) (0x0000000000000080ull)
-#define CVMX_PCIERCX_CFG033(block_id) (0x0000000000000084ull)
 #define CVMX_PCIERCX_CFG034(block_id) (0x0000000000000088ull)
 #define CVMX_PCIERCX_CFG035(block_id) (0x000000000000008Cull)
-#define CVMX_PCIERCX_CFG036(block_id) (0x0000000000000090ull)
-#define CVMX_PCIERCX_CFG037(block_id) (0x0000000000000094ull)
-#define CVMX_PCIERCX_CFG038(block_id) (0x0000000000000098ull)
-#define CVMX_PCIERCX_CFG039(block_id) (0x000000000000009Cull)
 #define CVMX_PCIERCX_CFG040(block_id) (0x00000000000000A0ull)
-#define CVMX_PCIERCX_CFG041(block_id) (0x00000000000000A4ull)
-#define CVMX_PCIERCX_CFG042(block_id) (0x00000000000000A8ull)
-#define CVMX_PCIERCX_CFG064(block_id) (0x0000000000000100ull)
-#define CVMX_PCIERCX_CFG065(block_id) (0x0000000000000104ull)
 #define CVMX_PCIERCX_CFG066(block_id) (0x0000000000000108ull)
-#define CVMX_PCIERCX_CFG067(block_id) (0x000000000000010Cull)
-#define CVMX_PCIERCX_CFG068(block_id) (0x0000000000000110ull)
 #define CVMX_PCIERCX_CFG069(block_id) (0x0000000000000114ull)
 #define CVMX_PCIERCX_CFG070(block_id) (0x0000000000000118ull)
-#define CVMX_PCIERCX_CFG071(block_id) (0x000000000000011Cull)
-#define CVMX_PCIERCX_CFG072(block_id) (0x0000000000000120ull)
-#define CVMX_PCIERCX_CFG073(block_id) (0x0000000000000124ull)
-#define CVMX_PCIERCX_CFG074(block_id) (0x0000000000000128ull)
 #define CVMX_PCIERCX_CFG075(block_id) (0x000000000000012Cull)
-#define CVMX_PCIERCX_CFG076(block_id) (0x0000000000000130ull)
-#define CVMX_PCIERCX_CFG077(block_id) (0x0000000000000134ull)
 #define CVMX_PCIERCX_CFG448(block_id) (0x0000000000000700ull)
-#define CVMX_PCIERCX_CFG449(block_id) (0x0000000000000704ull)
-#define CVMX_PCIERCX_CFG450(block_id) (0x0000000000000708ull)
-#define CVMX_PCIERCX_CFG451(block_id) (0x000000000000070Cull)
 #define CVMX_PCIERCX_CFG452(block_id) (0x0000000000000710ull)
-#define CVMX_PCIERCX_CFG453(block_id) (0x0000000000000714ull)
-#define CVMX_PCIERCX_CFG454(block_id) (0x0000000000000718ull)
 #define CVMX_PCIERCX_CFG455(block_id) (0x000000000000071Cull)
-#define CVMX_PCIERCX_CFG456(block_id) (0x0000000000000720ull)
-#define CVMX_PCIERCX_CFG458(block_id) (0x0000000000000728ull)
-#define CVMX_PCIERCX_CFG459(block_id) (0x000000000000072Cull)
-#define CVMX_PCIERCX_CFG460(block_id) (0x0000000000000730ull)
-#define CVMX_PCIERCX_CFG461(block_id) (0x0000000000000734ull)
-#define CVMX_PCIERCX_CFG462(block_id) (0x0000000000000738ull)
-#define CVMX_PCIERCX_CFG463(block_id) (0x000000000000073Cull)
-#define CVMX_PCIERCX_CFG464(block_id) (0x0000000000000740ull)
-#define CVMX_PCIERCX_CFG465(block_id) (0x0000000000000744ull)
-#define CVMX_PCIERCX_CFG466(block_id) (0x0000000000000748ull)
-#define CVMX_PCIERCX_CFG467(block_id) (0x000000000000074Cull)
-#define CVMX_PCIERCX_CFG468(block_id) (0x0000000000000750ull)
-#define CVMX_PCIERCX_CFG490(block_id) (0x00000000000007A8ull)
-#define CVMX_PCIERCX_CFG491(block_id) (0x00000000000007ACull)
-#define CVMX_PCIERCX_CFG492(block_id) (0x00000000000007B0ull)
 #define CVMX_PCIERCX_CFG515(block_id) (0x000000000000080Cull)
-#define CVMX_PCIERCX_CFG516(block_id) (0x0000000000000810ull)
-#define CVMX_PCIERCX_CFG517(block_id) (0x0000000000000814ull)
-
-union cvmx_pciercx_cfg000 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg000_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t devid:16;
-		uint32_t vendid:16;
-#else
-		uint32_t vendid:16;
-		uint32_t devid:16;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg000_s cn52xx;
-	struct cvmx_pciercx_cfg000_s cn52xxp1;
-	struct cvmx_pciercx_cfg000_s cn56xx;
-	struct cvmx_pciercx_cfg000_s cn56xxp1;
-	struct cvmx_pciercx_cfg000_s cn61xx;
-	struct cvmx_pciercx_cfg000_s cn63xx;
-	struct cvmx_pciercx_cfg000_s cn63xxp1;
-	struct cvmx_pciercx_cfg000_s cn66xx;
-	struct cvmx_pciercx_cfg000_s cn68xx;
-	struct cvmx_pciercx_cfg000_s cn68xxp1;
-	struct cvmx_pciercx_cfg000_s cnf71xx;
-};
 
 union cvmx_pciercx_cfg001 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg001_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dpe:1;
-		uint32_t sse:1;
-		uint32_t rma:1;
-		uint32_t rta:1;
-		uint32_t sta:1;
-		uint32_t devt:2;
-		uint32_t mdpe:1;
-		uint32_t fbb:1;
-		uint32_t reserved_22_22:1;
-		uint32_t m66:1;
-		uint32_t cl:1;
-		uint32_t i_stat:1;
-		uint32_t reserved_11_18:8;
-		uint32_t i_dis:1;
-		uint32_t fbbe:1;
-		uint32_t see:1;
-		uint32_t ids_wcc:1;
-		uint32_t per:1;
-		uint32_t vps:1;
-		uint32_t mwice:1;
-		uint32_t scse:1;
-		uint32_t me:1;
-		uint32_t msae:1;
-		uint32_t isae:1;
-#else
-		uint32_t isae:1;
-		uint32_t msae:1;
-		uint32_t me:1;
-		uint32_t scse:1;
-		uint32_t mwice:1;
-		uint32_t vps:1;
-		uint32_t per:1;
-		uint32_t ids_wcc:1;
-		uint32_t see:1;
-		uint32_t fbbe:1;
-		uint32_t i_dis:1;
-		uint32_t reserved_11_18:8;
-		uint32_t i_stat:1;
-		uint32_t cl:1;
-		uint32_t m66:1;
-		uint32_t reserved_22_22:1;
-		uint32_t fbb:1;
-		uint32_t mdpe:1;
-		uint32_t devt:2;
-		uint32_t sta:1;
-		uint32_t rta:1;
-		uint32_t rma:1;
-		uint32_t sse:1;
-		uint32_t dpe:1;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg001_s cn52xx;
-	struct cvmx_pciercx_cfg001_s cn52xxp1;
-	struct cvmx_pciercx_cfg001_s cn56xx;
-	struct cvmx_pciercx_cfg001_s cn56xxp1;
-	struct cvmx_pciercx_cfg001_s cn61xx;
-	struct cvmx_pciercx_cfg001_s cn63xx;
-	struct cvmx_pciercx_cfg001_s cn63xxp1;
-	struct cvmx_pciercx_cfg001_s cn66xx;
-	struct cvmx_pciercx_cfg001_s cn68xx;
-	struct cvmx_pciercx_cfg001_s cn68xxp1;
-	struct cvmx_pciercx_cfg001_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg002 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg002_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t bcc:8;
-		uint32_t sc:8;
-		uint32_t pi:8;
-		uint32_t rid:8;
-#else
-		uint32_t rid:8;
-		uint32_t pi:8;
-		uint32_t sc:8;
-		uint32_t bcc:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg002_s cn52xx;
-	struct cvmx_pciercx_cfg002_s cn52xxp1;
-	struct cvmx_pciercx_cfg002_s cn56xx;
-	struct cvmx_pciercx_cfg002_s cn56xxp1;
-	struct cvmx_pciercx_cfg002_s cn61xx;
-	struct cvmx_pciercx_cfg002_s cn63xx;
-	struct cvmx_pciercx_cfg002_s cn63xxp1;
-	struct cvmx_pciercx_cfg002_s cn66xx;
-	struct cvmx_pciercx_cfg002_s cn68xx;
-	struct cvmx_pciercx_cfg002_s cn68xxp1;
-	struct cvmx_pciercx_cfg002_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg003 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg003_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t bist:8;
-		uint32_t mfd:1;
-		uint32_t chf:7;
-		uint32_t lt:8;
-		uint32_t cls:8;
-#else
-		uint32_t cls:8;
-		uint32_t lt:8;
-		uint32_t chf:7;
-		uint32_t mfd:1;
-		uint32_t bist:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg003_s cn52xx;
-	struct cvmx_pciercx_cfg003_s cn52xxp1;
-	struct cvmx_pciercx_cfg003_s cn56xx;
-	struct cvmx_pciercx_cfg003_s cn56xxp1;
-	struct cvmx_pciercx_cfg003_s cn61xx;
-	struct cvmx_pciercx_cfg003_s cn63xx;
-	struct cvmx_pciercx_cfg003_s cn63xxp1;
-	struct cvmx_pciercx_cfg003_s cn66xx;
-	struct cvmx_pciercx_cfg003_s cn68xx;
-	struct cvmx_pciercx_cfg003_s cn68xxp1;
-	struct cvmx_pciercx_cfg003_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg004 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg004_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
+		__BITFIELD_FIELD(uint32_t dpe:1,
+		__BITFIELD_FIELD(uint32_t sse:1,
+		__BITFIELD_FIELD(uint32_t rma:1,
+		__BITFIELD_FIELD(uint32_t rta:1,
+		__BITFIELD_FIELD(uint32_t sta:1,
+		__BITFIELD_FIELD(uint32_t devt:2,
+		__BITFIELD_FIELD(uint32_t mdpe:1,
+		__BITFIELD_FIELD(uint32_t fbb:1,
+		__BITFIELD_FIELD(uint32_t reserved_22_22:1,
+		__BITFIELD_FIELD(uint32_t m66:1,
+		__BITFIELD_FIELD(uint32_t cl:1,
+		__BITFIELD_FIELD(uint32_t i_stat:1,
+		__BITFIELD_FIELD(uint32_t reserved_11_18:8,
+		__BITFIELD_FIELD(uint32_t i_dis:1,
+		__BITFIELD_FIELD(uint32_t fbbe:1,
+		__BITFIELD_FIELD(uint32_t see:1,
+		__BITFIELD_FIELD(uint32_t ids_wcc:1,
+		__BITFIELD_FIELD(uint32_t per:1,
+		__BITFIELD_FIELD(uint32_t vps:1,
+		__BITFIELD_FIELD(uint32_t mwice:1,
+		__BITFIELD_FIELD(uint32_t scse:1,
+		__BITFIELD_FIELD(uint32_t me:1,
+		__BITFIELD_FIELD(uint32_t msae:1,
+		__BITFIELD_FIELD(uint32_t isae:1,
+		;))))))))))))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg004_s cn52xx;
-	struct cvmx_pciercx_cfg004_s cn52xxp1;
-	struct cvmx_pciercx_cfg004_s cn56xx;
-	struct cvmx_pciercx_cfg004_s cn56xxp1;
-	struct cvmx_pciercx_cfg004_s cn61xx;
-	struct cvmx_pciercx_cfg004_s cn63xx;
-	struct cvmx_pciercx_cfg004_s cn63xxp1;
-	struct cvmx_pciercx_cfg004_s cn66xx;
-	struct cvmx_pciercx_cfg004_s cn68xx;
-	struct cvmx_pciercx_cfg004_s cn68xxp1;
-	struct cvmx_pciercx_cfg004_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg005 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg005_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg005_s cn52xx;
-	struct cvmx_pciercx_cfg005_s cn52xxp1;
-	struct cvmx_pciercx_cfg005_s cn56xx;
-	struct cvmx_pciercx_cfg005_s cn56xxp1;
-	struct cvmx_pciercx_cfg005_s cn61xx;
-	struct cvmx_pciercx_cfg005_s cn63xx;
-	struct cvmx_pciercx_cfg005_s cn63xxp1;
-	struct cvmx_pciercx_cfg005_s cn66xx;
-	struct cvmx_pciercx_cfg005_s cn68xx;
-	struct cvmx_pciercx_cfg005_s cn68xxp1;
-	struct cvmx_pciercx_cfg005_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg006 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg006_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t slt:8;
-		uint32_t subbnum:8;
-		uint32_t sbnum:8;
-		uint32_t pbnum:8;
-#else
-		uint32_t pbnum:8;
-		uint32_t sbnum:8;
-		uint32_t subbnum:8;
-		uint32_t slt:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg006_s cn52xx;
-	struct cvmx_pciercx_cfg006_s cn52xxp1;
-	struct cvmx_pciercx_cfg006_s cn56xx;
-	struct cvmx_pciercx_cfg006_s cn56xxp1;
-	struct cvmx_pciercx_cfg006_s cn61xx;
-	struct cvmx_pciercx_cfg006_s cn63xx;
-	struct cvmx_pciercx_cfg006_s cn63xxp1;
-	struct cvmx_pciercx_cfg006_s cn66xx;
-	struct cvmx_pciercx_cfg006_s cn68xx;
-	struct cvmx_pciercx_cfg006_s cn68xxp1;
-	struct cvmx_pciercx_cfg006_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg007 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg007_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dpe:1;
-		uint32_t sse:1;
-		uint32_t rma:1;
-		uint32_t rta:1;
-		uint32_t sta:1;
-		uint32_t devt:2;
-		uint32_t mdpe:1;
-		uint32_t fbb:1;
-		uint32_t reserved_22_22:1;
-		uint32_t m66:1;
-		uint32_t reserved_16_20:5;
-		uint32_t lio_limi:4;
-		uint32_t reserved_9_11:3;
-		uint32_t io32b:1;
-		uint32_t lio_base:4;
-		uint32_t reserved_1_3:3;
-		uint32_t io32a:1;
-#else
-		uint32_t io32a:1;
-		uint32_t reserved_1_3:3;
-		uint32_t lio_base:4;
-		uint32_t io32b:1;
-		uint32_t reserved_9_11:3;
-		uint32_t lio_limi:4;
-		uint32_t reserved_16_20:5;
-		uint32_t m66:1;
-		uint32_t reserved_22_22:1;
-		uint32_t fbb:1;
-		uint32_t mdpe:1;
-		uint32_t devt:2;
-		uint32_t sta:1;
-		uint32_t rta:1;
-		uint32_t rma:1;
-		uint32_t sse:1;
-		uint32_t dpe:1;
-#endif
+		__BITFIELD_FIELD(uint32_t slt:8,
+		__BITFIELD_FIELD(uint32_t subbnum:8,
+		__BITFIELD_FIELD(uint32_t sbnum:8,
+		__BITFIELD_FIELD(uint32_t pbnum:8,
+		;))))
 	} s;
-	struct cvmx_pciercx_cfg007_s cn52xx;
-	struct cvmx_pciercx_cfg007_s cn52xxp1;
-	struct cvmx_pciercx_cfg007_s cn56xx;
-	struct cvmx_pciercx_cfg007_s cn56xxp1;
-	struct cvmx_pciercx_cfg007_s cn61xx;
-	struct cvmx_pciercx_cfg007_s cn63xx;
-	struct cvmx_pciercx_cfg007_s cn63xxp1;
-	struct cvmx_pciercx_cfg007_s cn66xx;
-	struct cvmx_pciercx_cfg007_s cn68xx;
-	struct cvmx_pciercx_cfg007_s cn68xxp1;
-	struct cvmx_pciercx_cfg007_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg008 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg008_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t ml_addr:12;
-		uint32_t reserved_16_19:4;
-		uint32_t mb_addr:12;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t mb_addr:12;
-		uint32_t reserved_16_19:4;
-		uint32_t ml_addr:12;
-#endif
+		__BITFIELD_FIELD(uint32_t ml_addr:12,
+		__BITFIELD_FIELD(uint32_t reserved_16_19:4,
+		__BITFIELD_FIELD(uint32_t mb_addr:12,
+		__BITFIELD_FIELD(uint32_t reserved_0_3:4,
+		;))))
 	} s;
-	struct cvmx_pciercx_cfg008_s cn52xx;
-	struct cvmx_pciercx_cfg008_s cn52xxp1;
-	struct cvmx_pciercx_cfg008_s cn56xx;
-	struct cvmx_pciercx_cfg008_s cn56xxp1;
-	struct cvmx_pciercx_cfg008_s cn61xx;
-	struct cvmx_pciercx_cfg008_s cn63xx;
-	struct cvmx_pciercx_cfg008_s cn63xxp1;
-	struct cvmx_pciercx_cfg008_s cn66xx;
-	struct cvmx_pciercx_cfg008_s cn68xx;
-	struct cvmx_pciercx_cfg008_s cn68xxp1;
-	struct cvmx_pciercx_cfg008_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg009 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg009_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t lmem_limit:12;
-		uint32_t reserved_17_19:3;
-		uint32_t mem64b:1;
-		uint32_t lmem_base:12;
-		uint32_t reserved_1_3:3;
-		uint32_t mem64a:1;
-#else
-		uint32_t mem64a:1;
-		uint32_t reserved_1_3:3;
-		uint32_t lmem_base:12;
-		uint32_t mem64b:1;
-		uint32_t reserved_17_19:3;
-		uint32_t lmem_limit:12;
-#endif
+		__BITFIELD_FIELD(uint32_t lmem_limit:12,
+		__BITFIELD_FIELD(uint32_t reserved_17_19:3,
+		__BITFIELD_FIELD(uint32_t mem64b:1,
+		__BITFIELD_FIELD(uint32_t lmem_base:12,
+		__BITFIELD_FIELD(uint32_t reserved_1_3:3,
+		__BITFIELD_FIELD(uint32_t mem64a:1,
+		;))))))
 	} s;
-	struct cvmx_pciercx_cfg009_s cn52xx;
-	struct cvmx_pciercx_cfg009_s cn52xxp1;
-	struct cvmx_pciercx_cfg009_s cn56xx;
-	struct cvmx_pciercx_cfg009_s cn56xxp1;
-	struct cvmx_pciercx_cfg009_s cn61xx;
-	struct cvmx_pciercx_cfg009_s cn63xx;
-	struct cvmx_pciercx_cfg009_s cn63xxp1;
-	struct cvmx_pciercx_cfg009_s cn66xx;
-	struct cvmx_pciercx_cfg009_s cn68xx;
-	struct cvmx_pciercx_cfg009_s cn68xxp1;
-	struct cvmx_pciercx_cfg009_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg010 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg010_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t umem_base:32;
-#else
-		uint32_t umem_base:32;
-#endif
+		uint32_t umem_base;
 	} s;
-	struct cvmx_pciercx_cfg010_s cn52xx;
-	struct cvmx_pciercx_cfg010_s cn52xxp1;
-	struct cvmx_pciercx_cfg010_s cn56xx;
-	struct cvmx_pciercx_cfg010_s cn56xxp1;
-	struct cvmx_pciercx_cfg010_s cn61xx;
-	struct cvmx_pciercx_cfg010_s cn63xx;
-	struct cvmx_pciercx_cfg010_s cn63xxp1;
-	struct cvmx_pciercx_cfg010_s cn66xx;
-	struct cvmx_pciercx_cfg010_s cn68xx;
-	struct cvmx_pciercx_cfg010_s cn68xxp1;
-	struct cvmx_pciercx_cfg010_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg011 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg011_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t umem_limit:32;
-#else
-		uint32_t umem_limit:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg011_s cn52xx;
-	struct cvmx_pciercx_cfg011_s cn52xxp1;
-	struct cvmx_pciercx_cfg011_s cn56xx;
-	struct cvmx_pciercx_cfg011_s cn56xxp1;
-	struct cvmx_pciercx_cfg011_s cn61xx;
-	struct cvmx_pciercx_cfg011_s cn63xx;
-	struct cvmx_pciercx_cfg011_s cn63xxp1;
-	struct cvmx_pciercx_cfg011_s cn66xx;
-	struct cvmx_pciercx_cfg011_s cn68xx;
-	struct cvmx_pciercx_cfg011_s cn68xxp1;
-	struct cvmx_pciercx_cfg011_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg012 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg012_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t uio_limit:16;
-		uint32_t uio_base:16;
-#else
-		uint32_t uio_base:16;
-		uint32_t uio_limit:16;
-#endif
+		uint32_t umem_limit;
 	} s;
-	struct cvmx_pciercx_cfg012_s cn52xx;
-	struct cvmx_pciercx_cfg012_s cn52xxp1;
-	struct cvmx_pciercx_cfg012_s cn56xx;
-	struct cvmx_pciercx_cfg012_s cn56xxp1;
-	struct cvmx_pciercx_cfg012_s cn61xx;
-	struct cvmx_pciercx_cfg012_s cn63xx;
-	struct cvmx_pciercx_cfg012_s cn63xxp1;
-	struct cvmx_pciercx_cfg012_s cn66xx;
-	struct cvmx_pciercx_cfg012_s cn68xx;
-	struct cvmx_pciercx_cfg012_s cn68xxp1;
-	struct cvmx_pciercx_cfg012_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg013 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg013_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_8_31:24;
-		uint32_t cp:8;
-#else
-		uint32_t cp:8;
-		uint32_t reserved_8_31:24;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg013_s cn52xx;
-	struct cvmx_pciercx_cfg013_s cn52xxp1;
-	struct cvmx_pciercx_cfg013_s cn56xx;
-	struct cvmx_pciercx_cfg013_s cn56xxp1;
-	struct cvmx_pciercx_cfg013_s cn61xx;
-	struct cvmx_pciercx_cfg013_s cn63xx;
-	struct cvmx_pciercx_cfg013_s cn63xxp1;
-	struct cvmx_pciercx_cfg013_s cn66xx;
-	struct cvmx_pciercx_cfg013_s cn68xx;
-	struct cvmx_pciercx_cfg013_s cn68xxp1;
-	struct cvmx_pciercx_cfg013_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg014 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg014_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg014_s cn52xx;
-	struct cvmx_pciercx_cfg014_s cn52xxp1;
-	struct cvmx_pciercx_cfg014_s cn56xx;
-	struct cvmx_pciercx_cfg014_s cn56xxp1;
-	struct cvmx_pciercx_cfg014_s cn61xx;
-	struct cvmx_pciercx_cfg014_s cn63xx;
-	struct cvmx_pciercx_cfg014_s cn63xxp1;
-	struct cvmx_pciercx_cfg014_s cn66xx;
-	struct cvmx_pciercx_cfg014_s cn68xx;
-	struct cvmx_pciercx_cfg014_s cn68xxp1;
-	struct cvmx_pciercx_cfg014_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg015 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg015_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_28_31:4;
-		uint32_t dtsees:1;
-		uint32_t dts:1;
-		uint32_t sdt:1;
-		uint32_t pdt:1;
-		uint32_t fbbe:1;
-		uint32_t sbrst:1;
-		uint32_t mam:1;
-		uint32_t vga16d:1;
-		uint32_t vgae:1;
-		uint32_t isae:1;
-		uint32_t see:1;
-		uint32_t pere:1;
-		uint32_t inta:8;
-		uint32_t il:8;
-#else
-		uint32_t il:8;
-		uint32_t inta:8;
-		uint32_t pere:1;
-		uint32_t see:1;
-		uint32_t isae:1;
-		uint32_t vgae:1;
-		uint32_t vga16d:1;
-		uint32_t mam:1;
-		uint32_t sbrst:1;
-		uint32_t fbbe:1;
-		uint32_t pdt:1;
-		uint32_t sdt:1;
-		uint32_t dts:1;
-		uint32_t dtsees:1;
-		uint32_t reserved_28_31:4;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg015_s cn52xx;
-	struct cvmx_pciercx_cfg015_s cn52xxp1;
-	struct cvmx_pciercx_cfg015_s cn56xx;
-	struct cvmx_pciercx_cfg015_s cn56xxp1;
-	struct cvmx_pciercx_cfg015_s cn61xx;
-	struct cvmx_pciercx_cfg015_s cn63xx;
-	struct cvmx_pciercx_cfg015_s cn63xxp1;
-	struct cvmx_pciercx_cfg015_s cn66xx;
-	struct cvmx_pciercx_cfg015_s cn68xx;
-	struct cvmx_pciercx_cfg015_s cn68xxp1;
-	struct cvmx_pciercx_cfg015_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg016 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg016_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t pmes:5;
-		uint32_t d2s:1;
-		uint32_t d1s:1;
-		uint32_t auxc:3;
-		uint32_t dsi:1;
-		uint32_t reserved_20_20:1;
-		uint32_t pme_clock:1;
-		uint32_t pmsv:3;
-		uint32_t ncp:8;
-		uint32_t pmcid:8;
-#else
-		uint32_t pmcid:8;
-		uint32_t ncp:8;
-		uint32_t pmsv:3;
-		uint32_t pme_clock:1;
-		uint32_t reserved_20_20:1;
-		uint32_t dsi:1;
-		uint32_t auxc:3;
-		uint32_t d1s:1;
-		uint32_t d2s:1;
-		uint32_t pmes:5;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg016_s cn52xx;
-	struct cvmx_pciercx_cfg016_s cn52xxp1;
-	struct cvmx_pciercx_cfg016_s cn56xx;
-	struct cvmx_pciercx_cfg016_s cn56xxp1;
-	struct cvmx_pciercx_cfg016_s cn61xx;
-	struct cvmx_pciercx_cfg016_s cn63xx;
-	struct cvmx_pciercx_cfg016_s cn63xxp1;
-	struct cvmx_pciercx_cfg016_s cn66xx;
-	struct cvmx_pciercx_cfg016_s cn68xx;
-	struct cvmx_pciercx_cfg016_s cn68xxp1;
-	struct cvmx_pciercx_cfg016_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg017 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg017_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t pmdia:8;
-		uint32_t bpccee:1;
-		uint32_t bd3h:1;
-		uint32_t reserved_16_21:6;
-		uint32_t pmess:1;
-		uint32_t pmedsia:2;
-		uint32_t pmds:4;
-		uint32_t pmeens:1;
-		uint32_t reserved_4_7:4;
-		uint32_t nsr:1;
-		uint32_t reserved_2_2:1;
-		uint32_t ps:2;
-#else
-		uint32_t ps:2;
-		uint32_t reserved_2_2:1;
-		uint32_t nsr:1;
-		uint32_t reserved_4_7:4;
-		uint32_t pmeens:1;
-		uint32_t pmds:4;
-		uint32_t pmedsia:2;
-		uint32_t pmess:1;
-		uint32_t reserved_16_21:6;
-		uint32_t bd3h:1;
-		uint32_t bpccee:1;
-		uint32_t pmdia:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg017_s cn52xx;
-	struct cvmx_pciercx_cfg017_s cn52xxp1;
-	struct cvmx_pciercx_cfg017_s cn56xx;
-	struct cvmx_pciercx_cfg017_s cn56xxp1;
-	struct cvmx_pciercx_cfg017_s cn61xx;
-	struct cvmx_pciercx_cfg017_s cn63xx;
-	struct cvmx_pciercx_cfg017_s cn63xxp1;
-	struct cvmx_pciercx_cfg017_s cn66xx;
-	struct cvmx_pciercx_cfg017_s cn68xx;
-	struct cvmx_pciercx_cfg017_s cn68xxp1;
-	struct cvmx_pciercx_cfg017_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg020 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg020_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t pvm:1;
-		uint32_t m64:1;
-		uint32_t mme:3;
-		uint32_t mmc:3;
-		uint32_t msien:1;
-		uint32_t ncp:8;
-		uint32_t msicid:8;
-#else
-		uint32_t msicid:8;
-		uint32_t ncp:8;
-		uint32_t msien:1;
-		uint32_t mmc:3;
-		uint32_t mme:3;
-		uint32_t m64:1;
-		uint32_t pvm:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg020_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_24_31:8;
-		uint32_t m64:1;
-		uint32_t mme:3;
-		uint32_t mmc:3;
-		uint32_t msien:1;
-		uint32_t ncp:8;
-		uint32_t msicid:8;
-#else
-		uint32_t msicid:8;
-		uint32_t ncp:8;
-		uint32_t msien:1;
-		uint32_t mmc:3;
-		uint32_t mme:3;
-		uint32_t m64:1;
-		uint32_t reserved_24_31:8;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg020_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg020_s cn61xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg020_cn52xx cn66xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn68xx;
-	struct cvmx_pciercx_cfg020_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg020_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg021 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg021_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t lmsi:30;
-		uint32_t reserved_0_1:2;
-#else
-		uint32_t reserved_0_1:2;
-		uint32_t lmsi:30;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg021_s cn52xx;
-	struct cvmx_pciercx_cfg021_s cn52xxp1;
-	struct cvmx_pciercx_cfg021_s cn56xx;
-	struct cvmx_pciercx_cfg021_s cn56xxp1;
-	struct cvmx_pciercx_cfg021_s cn61xx;
-	struct cvmx_pciercx_cfg021_s cn63xx;
-	struct cvmx_pciercx_cfg021_s cn63xxp1;
-	struct cvmx_pciercx_cfg021_s cn66xx;
-	struct cvmx_pciercx_cfg021_s cn68xx;
-	struct cvmx_pciercx_cfg021_s cn68xxp1;
-	struct cvmx_pciercx_cfg021_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg022 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg022_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t umsi:32;
-#else
-		uint32_t umsi:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg022_s cn52xx;
-	struct cvmx_pciercx_cfg022_s cn52xxp1;
-	struct cvmx_pciercx_cfg022_s cn56xx;
-	struct cvmx_pciercx_cfg022_s cn56xxp1;
-	struct cvmx_pciercx_cfg022_s cn61xx;
-	struct cvmx_pciercx_cfg022_s cn63xx;
-	struct cvmx_pciercx_cfg022_s cn63xxp1;
-	struct cvmx_pciercx_cfg022_s cn66xx;
-	struct cvmx_pciercx_cfg022_s cn68xx;
-	struct cvmx_pciercx_cfg022_s cn68xxp1;
-	struct cvmx_pciercx_cfg022_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg023 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg023_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_16_31:16;
-		uint32_t msimd:16;
-#else
-		uint32_t msimd:16;
-		uint32_t reserved_16_31:16;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg023_s cn52xx;
-	struct cvmx_pciercx_cfg023_s cn52xxp1;
-	struct cvmx_pciercx_cfg023_s cn56xx;
-	struct cvmx_pciercx_cfg023_s cn56xxp1;
-	struct cvmx_pciercx_cfg023_s cn61xx;
-	struct cvmx_pciercx_cfg023_s cn63xx;
-	struct cvmx_pciercx_cfg023_s cn63xxp1;
-	struct cvmx_pciercx_cfg023_s cn66xx;
-	struct cvmx_pciercx_cfg023_s cn68xx;
-	struct cvmx_pciercx_cfg023_s cn68xxp1;
-	struct cvmx_pciercx_cfg023_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg028 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg028_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_30_31:2;
-		uint32_t imn:5;
-		uint32_t si:1;
-		uint32_t dpt:4;
-		uint32_t pciecv:4;
-		uint32_t ncp:8;
-		uint32_t pcieid:8;
-#else
-		uint32_t pcieid:8;
-		uint32_t ncp:8;
-		uint32_t pciecv:4;
-		uint32_t dpt:4;
-		uint32_t si:1;
-		uint32_t imn:5;
-		uint32_t reserved_30_31:2;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg028_s cn52xx;
-	struct cvmx_pciercx_cfg028_s cn52xxp1;
-	struct cvmx_pciercx_cfg028_s cn56xx;
-	struct cvmx_pciercx_cfg028_s cn56xxp1;
-	struct cvmx_pciercx_cfg028_s cn61xx;
-	struct cvmx_pciercx_cfg028_s cn63xx;
-	struct cvmx_pciercx_cfg028_s cn63xxp1;
-	struct cvmx_pciercx_cfg028_s cn66xx;
-	struct cvmx_pciercx_cfg028_s cn68xx;
-	struct cvmx_pciercx_cfg028_s cn68xxp1;
-	struct cvmx_pciercx_cfg028_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg029 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg029_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_28_31:4;
-		uint32_t cspls:2;
-		uint32_t csplv:8;
-		uint32_t reserved_16_17:2;
-		uint32_t rber:1;
-		uint32_t reserved_12_14:3;
-		uint32_t el1al:3;
-		uint32_t el0al:3;
-		uint32_t etfs:1;
-		uint32_t pfs:2;
-		uint32_t mpss:3;
-#else
-		uint32_t mpss:3;
-		uint32_t pfs:2;
-		uint32_t etfs:1;
-		uint32_t el0al:3;
-		uint32_t el1al:3;
-		uint32_t reserved_12_14:3;
-		uint32_t rber:1;
-		uint32_t reserved_16_17:2;
-		uint32_t csplv:8;
-		uint32_t cspls:2;
-		uint32_t reserved_28_31:4;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg029_s cn52xx;
-	struct cvmx_pciercx_cfg029_s cn52xxp1;
-	struct cvmx_pciercx_cfg029_s cn56xx;
-	struct cvmx_pciercx_cfg029_s cn56xxp1;
-	struct cvmx_pciercx_cfg029_s cn61xx;
-	struct cvmx_pciercx_cfg029_s cn63xx;
-	struct cvmx_pciercx_cfg029_s cn63xxp1;
-	struct cvmx_pciercx_cfg029_s cn66xx;
-	struct cvmx_pciercx_cfg029_s cn68xx;
-	struct cvmx_pciercx_cfg029_s cn68xxp1;
-	struct cvmx_pciercx_cfg029_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg030 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg030_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_22_31:10;
-		uint32_t tp:1;
-		uint32_t ap_d:1;
-		uint32_t ur_d:1;
-		uint32_t fe_d:1;
-		uint32_t nfe_d:1;
-		uint32_t ce_d:1;
-		uint32_t reserved_15_15:1;
-		uint32_t mrrs:3;
-		uint32_t ns_en:1;
-		uint32_t ap_en:1;
-		uint32_t pf_en:1;
-		uint32_t etf_en:1;
-		uint32_t mps:3;
-		uint32_t ro_en:1;
-		uint32_t ur_en:1;
-		uint32_t fe_en:1;
-		uint32_t nfe_en:1;
-		uint32_t ce_en:1;
-#else
-		uint32_t ce_en:1;
-		uint32_t nfe_en:1;
-		uint32_t fe_en:1;
-		uint32_t ur_en:1;
-		uint32_t ro_en:1;
-		uint32_t mps:3;
-		uint32_t etf_en:1;
-		uint32_t pf_en:1;
-		uint32_t ap_en:1;
-		uint32_t ns_en:1;
-		uint32_t mrrs:3;
-		uint32_t reserved_15_15:1;
-		uint32_t ce_d:1;
-		uint32_t nfe_d:1;
-		uint32_t fe_d:1;
-		uint32_t ur_d:1;
-		uint32_t ap_d:1;
-		uint32_t tp:1;
-		uint32_t reserved_22_31:10;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_22_31:10,
+		__BITFIELD_FIELD(uint32_t tp:1,
+		__BITFIELD_FIELD(uint32_t ap_d:1,
+		__BITFIELD_FIELD(uint32_t ur_d:1,
+		__BITFIELD_FIELD(uint32_t fe_d:1,
+		__BITFIELD_FIELD(uint32_t nfe_d:1,
+		__BITFIELD_FIELD(uint32_t ce_d:1,
+		__BITFIELD_FIELD(uint32_t reserved_15_15:1,
+		__BITFIELD_FIELD(uint32_t mrrs:3,
+		__BITFIELD_FIELD(uint32_t ns_en:1,
+		__BITFIELD_FIELD(uint32_t ap_en:1,
+		__BITFIELD_FIELD(uint32_t pf_en:1,
+		__BITFIELD_FIELD(uint32_t etf_en:1,
+		__BITFIELD_FIELD(uint32_t mps:3,
+		__BITFIELD_FIELD(uint32_t ro_en:1,
+		__BITFIELD_FIELD(uint32_t ur_en:1,
+		__BITFIELD_FIELD(uint32_t fe_en:1,
+		__BITFIELD_FIELD(uint32_t nfe_en:1,
+		__BITFIELD_FIELD(uint32_t ce_en:1,
+		;)))))))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg030_s cn52xx;
-	struct cvmx_pciercx_cfg030_s cn52xxp1;
-	struct cvmx_pciercx_cfg030_s cn56xx;
-	struct cvmx_pciercx_cfg030_s cn56xxp1;
-	struct cvmx_pciercx_cfg030_s cn61xx;
-	struct cvmx_pciercx_cfg030_s cn63xx;
-	struct cvmx_pciercx_cfg030_s cn63xxp1;
-	struct cvmx_pciercx_cfg030_s cn66xx;
-	struct cvmx_pciercx_cfg030_s cn68xx;
-	struct cvmx_pciercx_cfg030_s cn68xxp1;
-	struct cvmx_pciercx_cfg030_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg031 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg031_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t pnum:8;
-		uint32_t reserved_23_23:1;
-		uint32_t aspm:1;
-		uint32_t lbnc:1;
-		uint32_t dllarc:1;
-		uint32_t sderc:1;
-		uint32_t cpm:1;
-		uint32_t l1el:3;
-		uint32_t l0el:3;
-		uint32_t aslpms:2;
-		uint32_t mlw:6;
-		uint32_t mls:4;
-#else
-		uint32_t mls:4;
-		uint32_t mlw:6;
-		uint32_t aslpms:2;
-		uint32_t l0el:3;
-		uint32_t l1el:3;
-		uint32_t cpm:1;
-		uint32_t sderc:1;
-		uint32_t dllarc:1;
-		uint32_t lbnc:1;
-		uint32_t aspm:1;
-		uint32_t reserved_23_23:1;
-		uint32_t pnum:8;
-#endif
+		__BITFIELD_FIELD(uint32_t pnum:8,
+		__BITFIELD_FIELD(uint32_t reserved_23_23:1,
+		__BITFIELD_FIELD(uint32_t aspm:1,
+		__BITFIELD_FIELD(uint32_t lbnc:1,
+		__BITFIELD_FIELD(uint32_t dllarc:1,
+		__BITFIELD_FIELD(uint32_t sderc:1,
+		__BITFIELD_FIELD(uint32_t cpm:1,
+		__BITFIELD_FIELD(uint32_t l1el:3,
+		__BITFIELD_FIELD(uint32_t l0el:3,
+		__BITFIELD_FIELD(uint32_t aslpms:2,
+		__BITFIELD_FIELD(uint32_t mlw:6,
+		__BITFIELD_FIELD(uint32_t mls:4,
+		;))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg031_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t pnum:8;
-		uint32_t reserved_22_23:2;
-		uint32_t lbnc:1;
-		uint32_t dllarc:1;
-		uint32_t sderc:1;
-		uint32_t cpm:1;
-		uint32_t l1el:3;
-		uint32_t l0el:3;
-		uint32_t aslpms:2;
-		uint32_t mlw:6;
-		uint32_t mls:4;
-#else
-		uint32_t mls:4;
-		uint32_t mlw:6;
-		uint32_t aslpms:2;
-		uint32_t l0el:3;
-		uint32_t l1el:3;
-		uint32_t cpm:1;
-		uint32_t sderc:1;
-		uint32_t dllarc:1;
-		uint32_t lbnc:1;
-		uint32_t reserved_22_23:2;
-		uint32_t pnum:8;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg031_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg031_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg031_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg031_s cn61xx;
-	struct cvmx_pciercx_cfg031_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg031_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg031_s cn66xx;
-	struct cvmx_pciercx_cfg031_s cn68xx;
-	struct cvmx_pciercx_cfg031_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg031_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg032 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg032_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t lab:1;
-		uint32_t lbm:1;
-		uint32_t dlla:1;
-		uint32_t scc:1;
-		uint32_t lt:1;
-		uint32_t reserved_26_26:1;
-		uint32_t nlw:6;
-		uint32_t ls:4;
-		uint32_t reserved_12_15:4;
-		uint32_t lab_int_enb:1;
-		uint32_t lbm_int_enb:1;
-		uint32_t hawd:1;
-		uint32_t ecpm:1;
-		uint32_t es:1;
-		uint32_t ccc:1;
-		uint32_t rl:1;
-		uint32_t ld:1;
-		uint32_t rcb:1;
-		uint32_t reserved_2_2:1;
-		uint32_t aslpc:2;
-#else
-		uint32_t aslpc:2;
-		uint32_t reserved_2_2:1;
-		uint32_t rcb:1;
-		uint32_t ld:1;
-		uint32_t rl:1;
-		uint32_t ccc:1;
-		uint32_t es:1;
-		uint32_t ecpm:1;
-		uint32_t hawd:1;
-		uint32_t lbm_int_enb:1;
-		uint32_t lab_int_enb:1;
-		uint32_t reserved_12_15:4;
-		uint32_t ls:4;
-		uint32_t nlw:6;
-		uint32_t reserved_26_26:1;
-		uint32_t lt:1;
-		uint32_t scc:1;
-		uint32_t dlla:1;
-		uint32_t lbm:1;
-		uint32_t lab:1;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg032_s cn52xx;
-	struct cvmx_pciercx_cfg032_s cn52xxp1;
-	struct cvmx_pciercx_cfg032_s cn56xx;
-	struct cvmx_pciercx_cfg032_s cn56xxp1;
-	struct cvmx_pciercx_cfg032_s cn61xx;
-	struct cvmx_pciercx_cfg032_s cn63xx;
-	struct cvmx_pciercx_cfg032_s cn63xxp1;
-	struct cvmx_pciercx_cfg032_s cn66xx;
-	struct cvmx_pciercx_cfg032_s cn68xx;
-	struct cvmx_pciercx_cfg032_s cn68xxp1;
-	struct cvmx_pciercx_cfg032_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg033 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg033_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t ps_num:13;
-		uint32_t nccs:1;
-		uint32_t emip:1;
-		uint32_t sp_ls:2;
-		uint32_t sp_lv:8;
-		uint32_t hp_c:1;
-		uint32_t hp_s:1;
-		uint32_t pip:1;
-		uint32_t aip:1;
-		uint32_t mrlsp:1;
-		uint32_t pcp:1;
-		uint32_t abp:1;
-#else
-		uint32_t abp:1;
-		uint32_t pcp:1;
-		uint32_t mrlsp:1;
-		uint32_t aip:1;
-		uint32_t pip:1;
-		uint32_t hp_s:1;
-		uint32_t hp_c:1;
-		uint32_t sp_lv:8;
-		uint32_t sp_ls:2;
-		uint32_t emip:1;
-		uint32_t nccs:1;
-		uint32_t ps_num:13;
-#endif
+		__BITFIELD_FIELD(uint32_t lab:1,
+		__BITFIELD_FIELD(uint32_t lbm:1,
+		__BITFIELD_FIELD(uint32_t dlla:1,
+		__BITFIELD_FIELD(uint32_t scc:1,
+		__BITFIELD_FIELD(uint32_t lt:1,
+		__BITFIELD_FIELD(uint32_t reserved_26_26:1,
+		__BITFIELD_FIELD(uint32_t nlw:6,
+		__BITFIELD_FIELD(uint32_t ls:4,
+		__BITFIELD_FIELD(uint32_t reserved_12_15:4,
+		__BITFIELD_FIELD(uint32_t lab_int_enb:1,
+		__BITFIELD_FIELD(uint32_t lbm_int_enb:1,
+		__BITFIELD_FIELD(uint32_t hawd:1,
+		__BITFIELD_FIELD(uint32_t ecpm:1,
+		__BITFIELD_FIELD(uint32_t es:1,
+		__BITFIELD_FIELD(uint32_t ccc:1,
+		__BITFIELD_FIELD(uint32_t rl:1,
+		__BITFIELD_FIELD(uint32_t ld:1,
+		__BITFIELD_FIELD(uint32_t rcb:1,
+		__BITFIELD_FIELD(uint32_t reserved_2_2:1,
+		__BITFIELD_FIELD(uint32_t aslpc:2,
+		;))))))))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg033_s cn52xx;
-	struct cvmx_pciercx_cfg033_s cn52xxp1;
-	struct cvmx_pciercx_cfg033_s cn56xx;
-	struct cvmx_pciercx_cfg033_s cn56xxp1;
-	struct cvmx_pciercx_cfg033_s cn61xx;
-	struct cvmx_pciercx_cfg033_s cn63xx;
-	struct cvmx_pciercx_cfg033_s cn63xxp1;
-	struct cvmx_pciercx_cfg033_s cn66xx;
-	struct cvmx_pciercx_cfg033_s cn68xx;
-	struct cvmx_pciercx_cfg033_s cn68xxp1;
-	struct cvmx_pciercx_cfg033_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg034 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg034_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t dlls_c:1;
-		uint32_t emis:1;
-		uint32_t pds:1;
-		uint32_t mrlss:1;
-		uint32_t ccint_d:1;
-		uint32_t pd_c:1;
-		uint32_t mrls_c:1;
-		uint32_t pf_d:1;
-		uint32_t abp_d:1;
-		uint32_t reserved_13_15:3;
-		uint32_t dlls_en:1;
-		uint32_t emic:1;
-		uint32_t pcc:1;
-		uint32_t pic:2;
-		uint32_t aic:2;
-		uint32_t hpint_en:1;
-		uint32_t ccint_en:1;
-		uint32_t pd_en:1;
-		uint32_t mrls_en:1;
-		uint32_t pf_en:1;
-		uint32_t abp_en:1;
-#else
-		uint32_t abp_en:1;
-		uint32_t pf_en:1;
-		uint32_t mrls_en:1;
-		uint32_t pd_en:1;
-		uint32_t ccint_en:1;
-		uint32_t hpint_en:1;
-		uint32_t aic:2;
-		uint32_t pic:2;
-		uint32_t pcc:1;
-		uint32_t emic:1;
-		uint32_t dlls_en:1;
-		uint32_t reserved_13_15:3;
-		uint32_t abp_d:1;
-		uint32_t pf_d:1;
-		uint32_t mrls_c:1;
-		uint32_t pd_c:1;
-		uint32_t ccint_d:1;
-		uint32_t mrlss:1;
-		uint32_t pds:1;
-		uint32_t emis:1;
-		uint32_t dlls_c:1;
-		uint32_t reserved_25_31:7;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_25_31:7,
+		__BITFIELD_FIELD(uint32_t dlls_c:1,
+		__BITFIELD_FIELD(uint32_t emis:1,
+		__BITFIELD_FIELD(uint32_t pds:1,
+		__BITFIELD_FIELD(uint32_t mrlss:1,
+		__BITFIELD_FIELD(uint32_t ccint_d:1,
+		__BITFIELD_FIELD(uint32_t pd_c:1,
+		__BITFIELD_FIELD(uint32_t mrls_c:1,
+		__BITFIELD_FIELD(uint32_t pf_d:1,
+		__BITFIELD_FIELD(uint32_t abp_d:1,
+		__BITFIELD_FIELD(uint32_t reserved_13_15:3,
+		__BITFIELD_FIELD(uint32_t dlls_en:1,
+		__BITFIELD_FIELD(uint32_t emic:1,
+		__BITFIELD_FIELD(uint32_t pcc:1,
+		__BITFIELD_FIELD(uint32_t pic:1,
+		__BITFIELD_FIELD(uint32_t aic:1,
+		__BITFIELD_FIELD(uint32_t hpint_en:1,
+		__BITFIELD_FIELD(uint32_t ccint_en:1,
+		__BITFIELD_FIELD(uint32_t pd_en:1,
+		__BITFIELD_FIELD(uint32_t mrls_en:1,
+		__BITFIELD_FIELD(uint32_t pf_en:1,
+		__BITFIELD_FIELD(uint32_t abp_en:1,
+		;))))))))))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg034_s cn52xx;
-	struct cvmx_pciercx_cfg034_s cn52xxp1;
-	struct cvmx_pciercx_cfg034_s cn56xx;
-	struct cvmx_pciercx_cfg034_s cn56xxp1;
-	struct cvmx_pciercx_cfg034_s cn61xx;
-	struct cvmx_pciercx_cfg034_s cn63xx;
-	struct cvmx_pciercx_cfg034_s cn63xxp1;
-	struct cvmx_pciercx_cfg034_s cn66xx;
-	struct cvmx_pciercx_cfg034_s cn68xx;
-	struct cvmx_pciercx_cfg034_s cn68xxp1;
-	struct cvmx_pciercx_cfg034_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg035 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg035_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_17_31:15;
-		uint32_t crssv:1;
-		uint32_t reserved_5_15:11;
-		uint32_t crssve:1;
-		uint32_t pmeie:1;
-		uint32_t sefee:1;
-		uint32_t senfee:1;
-		uint32_t secee:1;
-#else
-		uint32_t secee:1;
-		uint32_t senfee:1;
-		uint32_t sefee:1;
-		uint32_t pmeie:1;
-		uint32_t crssve:1;
-		uint32_t reserved_5_15:11;
-		uint32_t crssv:1;
-		uint32_t reserved_17_31:15;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg035_s cn52xx;
-	struct cvmx_pciercx_cfg035_s cn52xxp1;
-	struct cvmx_pciercx_cfg035_s cn56xx;
-	struct cvmx_pciercx_cfg035_s cn56xxp1;
-	struct cvmx_pciercx_cfg035_s cn61xx;
-	struct cvmx_pciercx_cfg035_s cn63xx;
-	struct cvmx_pciercx_cfg035_s cn63xxp1;
-	struct cvmx_pciercx_cfg035_s cn66xx;
-	struct cvmx_pciercx_cfg035_s cn68xx;
-	struct cvmx_pciercx_cfg035_s cn68xxp1;
-	struct cvmx_pciercx_cfg035_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg036 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg036_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_18_31:14;
-		uint32_t pme_pend:1;
-		uint32_t pme_stat:1;
-		uint32_t pme_rid:16;
-#else
-		uint32_t pme_rid:16;
-		uint32_t pme_stat:1;
-		uint32_t pme_pend:1;
-		uint32_t reserved_18_31:14;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg036_s cn52xx;
-	struct cvmx_pciercx_cfg036_s cn52xxp1;
-	struct cvmx_pciercx_cfg036_s cn56xx;
-	struct cvmx_pciercx_cfg036_s cn56xxp1;
-	struct cvmx_pciercx_cfg036_s cn61xx;
-	struct cvmx_pciercx_cfg036_s cn63xx;
-	struct cvmx_pciercx_cfg036_s cn63xxp1;
-	struct cvmx_pciercx_cfg036_s cn66xx;
-	struct cvmx_pciercx_cfg036_s cn68xx;
-	struct cvmx_pciercx_cfg036_s cn68xxp1;
-	struct cvmx_pciercx_cfg036_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg037 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg037_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_20_31:12;
-		uint32_t obffs:2;
-		uint32_t reserved_12_17:6;
-		uint32_t ltrs:1;
-		uint32_t noroprpr:1;
-		uint32_t atom128s:1;
-		uint32_t atom64s:1;
-		uint32_t atom32s:1;
-		uint32_t atom_ops:1;
-		uint32_t reserved_5_5:1;
-		uint32_t ctds:1;
-		uint32_t ctrs:4;
-#else
-		uint32_t ctrs:4;
-		uint32_t ctds:1;
-		uint32_t reserved_5_5:1;
-		uint32_t atom_ops:1;
-		uint32_t atom32s:1;
-		uint32_t atom64s:1;
-		uint32_t atom128s:1;
-		uint32_t noroprpr:1;
-		uint32_t ltrs:1;
-		uint32_t reserved_12_17:6;
-		uint32_t obffs:2;
-		uint32_t reserved_20_31:12;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg037_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_5_31:27;
-		uint32_t ctds:1;
-		uint32_t ctrs:4;
-#else
-		uint32_t ctrs:4;
-		uint32_t ctds:1;
-		uint32_t reserved_5_31:27;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg037_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg037_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg037_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg037_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_14_31:18;
-		uint32_t tph:2;
-		uint32_t reserved_11_11:1;
-		uint32_t noroprpr:1;
-		uint32_t atom128s:1;
-		uint32_t atom64s:1;
-		uint32_t atom32s:1;
-		uint32_t atom_ops:1;
-		uint32_t ari_fw:1;
-		uint32_t ctds:1;
-		uint32_t ctrs:4;
-#else
-		uint32_t ctrs:4;
-		uint32_t ctds:1;
-		uint32_t ari_fw:1;
-		uint32_t atom_ops:1;
-		uint32_t atom32s:1;
-		uint32_t atom64s:1;
-		uint32_t atom128s:1;
-		uint32_t noroprpr:1;
-		uint32_t reserved_11_11:1;
-		uint32_t tph:2;
-		uint32_t reserved_14_31:18;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg037_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg037_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg037_cn66xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_14_31:18;
-		uint32_t tph:2;
-		uint32_t reserved_11_11:1;
-		uint32_t noroprpr:1;
-		uint32_t atom128s:1;
-		uint32_t atom64s:1;
-		uint32_t atom32s:1;
-		uint32_t atom_ops:1;
-		uint32_t ari:1;
-		uint32_t ctds:1;
-		uint32_t ctrs:4;
-#else
-		uint32_t ctrs:4;
-		uint32_t ctds:1;
-		uint32_t ari:1;
-		uint32_t atom_ops:1;
-		uint32_t atom32s:1;
-		uint32_t atom64s:1;
-		uint32_t atom128s:1;
-		uint32_t noroprpr:1;
-		uint32_t reserved_11_11:1;
-		uint32_t tph:2;
-		uint32_t reserved_14_31:18;
-#endif
-	} cn66xx;
-	struct cvmx_pciercx_cfg037_cn66xx cn68xx;
-	struct cvmx_pciercx_cfg037_cn66xx cn68xxp1;
-	struct cvmx_pciercx_cfg037_cnf71xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_20_31:12;
-		uint32_t obffs:2;
-		uint32_t reserved_14_17:4;
-		uint32_t tphs:2;
-		uint32_t ltrs:1;
-		uint32_t noroprpr:1;
-		uint32_t atom128s:1;
-		uint32_t atom64s:1;
-		uint32_t atom32s:1;
-		uint32_t atom_ops:1;
-		uint32_t ari_fw:1;
-		uint32_t ctds:1;
-		uint32_t ctrs:4;
-#else
-		uint32_t ctrs:4;
-		uint32_t ctds:1;
-		uint32_t ari_fw:1;
-		uint32_t atom_ops:1;
-		uint32_t atom32s:1;
-		uint32_t atom64s:1;
-		uint32_t atom128s:1;
-		uint32_t noroprpr:1;
-		uint32_t ltrs:1;
-		uint32_t tphs:2;
-		uint32_t reserved_14_17:4;
-		uint32_t obffs:2;
-		uint32_t reserved_20_31:12;
-#endif
-	} cnf71xx;
-};
-
-union cvmx_pciercx_cfg038 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg038_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_15_31:17;
-		uint32_t obffe:2;
-		uint32_t reserved_11_12:2;
-		uint32_t ltre:1;
-		uint32_t id0_cp:1;
-		uint32_t id0_rq:1;
-		uint32_t atom_op_eb:1;
-		uint32_t atom_op:1;
-		uint32_t ari:1;
-		uint32_t ctd:1;
-		uint32_t ctv:4;
-#else
-		uint32_t ctv:4;
-		uint32_t ctd:1;
-		uint32_t ari:1;
-		uint32_t atom_op:1;
-		uint32_t atom_op_eb:1;
-		uint32_t id0_rq:1;
-		uint32_t id0_cp:1;
-		uint32_t ltre:1;
-		uint32_t reserved_11_12:2;
-		uint32_t obffe:2;
-		uint32_t reserved_15_31:17;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_17_31:15,
+		__BITFIELD_FIELD(uint32_t crssv:1,
+		__BITFIELD_FIELD(uint32_t reserved_5_15:11,
+		__BITFIELD_FIELD(uint32_t crssve:1,
+		__BITFIELD_FIELD(uint32_t pmeie:1,
+		__BITFIELD_FIELD(uint32_t sefee:1,
+		__BITFIELD_FIELD(uint32_t senfee:1,
+		__BITFIELD_FIELD(uint32_t secee:1,
+		;))))))))
 	} s;
-	struct cvmx_pciercx_cfg038_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_5_31:27;
-		uint32_t ctd:1;
-		uint32_t ctv:4;
-#else
-		uint32_t ctv:4;
-		uint32_t ctd:1;
-		uint32_t reserved_5_31:27;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg038_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg038_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg038_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg038_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_10_31:22;
-		uint32_t id0_cp:1;
-		uint32_t id0_rq:1;
-		uint32_t atom_op_eb:1;
-		uint32_t atom_op:1;
-		uint32_t ari:1;
-		uint32_t ctd:1;
-		uint32_t ctv:4;
-#else
-		uint32_t ctv:4;
-		uint32_t ctd:1;
-		uint32_t ari:1;
-		uint32_t atom_op:1;
-		uint32_t atom_op_eb:1;
-		uint32_t id0_rq:1;
-		uint32_t id0_cp:1;
-		uint32_t reserved_10_31:22;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg038_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg038_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg038_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg038_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg038_cn61xx cn68xxp1;
-	struct cvmx_pciercx_cfg038_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg039 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg039_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_9_31:23;
-		uint32_t cls:1;
-		uint32_t slsv:7;
-		uint32_t reserved_0_0:1;
-#else
-		uint32_t reserved_0_0:1;
-		uint32_t slsv:7;
-		uint32_t cls:1;
-		uint32_t reserved_9_31:23;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg039_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg039_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg039_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg039_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg039_s cn61xx;
-	struct cvmx_pciercx_cfg039_s cn63xx;
-	struct cvmx_pciercx_cfg039_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg039_s cn66xx;
-	struct cvmx_pciercx_cfg039_s cn68xx;
-	struct cvmx_pciercx_cfg039_s cn68xxp1;
-	struct cvmx_pciercx_cfg039_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg040 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg040_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_17_31:15;
-		uint32_t cdl:1;
-		uint32_t reserved_13_15:3;
-		uint32_t cde:1;
-		uint32_t csos:1;
-		uint32_t emc:1;
-		uint32_t tm:3;
-		uint32_t sde:1;
-		uint32_t hasd:1;
-		uint32_t ec:1;
-		uint32_t tls:4;
-#else
-		uint32_t tls:4;
-		uint32_t ec:1;
-		uint32_t hasd:1;
-		uint32_t sde:1;
-		uint32_t tm:3;
-		uint32_t emc:1;
-		uint32_t csos:1;
-		uint32_t cde:1;
-		uint32_t reserved_13_15:3;
-		uint32_t cdl:1;
-		uint32_t reserved_17_31:15;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg040_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg040_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg040_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg040_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg040_s cn61xx;
-	struct cvmx_pciercx_cfg040_s cn63xx;
-	struct cvmx_pciercx_cfg040_s cn63xxp1;
-	struct cvmx_pciercx_cfg040_s cn66xx;
-	struct cvmx_pciercx_cfg040_s cn68xx;
-	struct cvmx_pciercx_cfg040_s cn68xxp1;
-	struct cvmx_pciercx_cfg040_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg041 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg041_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg041_s cn52xx;
-	struct cvmx_pciercx_cfg041_s cn52xxp1;
-	struct cvmx_pciercx_cfg041_s cn56xx;
-	struct cvmx_pciercx_cfg041_s cn56xxp1;
-	struct cvmx_pciercx_cfg041_s cn61xx;
-	struct cvmx_pciercx_cfg041_s cn63xx;
-	struct cvmx_pciercx_cfg041_s cn63xxp1;
-	struct cvmx_pciercx_cfg041_s cn66xx;
-	struct cvmx_pciercx_cfg041_s cn68xx;
-	struct cvmx_pciercx_cfg041_s cn68xxp1;
-	struct cvmx_pciercx_cfg041_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg042 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg042_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_0_31:32;
-#else
-		uint32_t reserved_0_31:32;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_22_31:10,
+		__BITFIELD_FIELD(uint32_t ler:1,
+		__BITFIELD_FIELD(uint32_t ep3s:1,
+		__BITFIELD_FIELD(uint32_t ep2s:1,
+		__BITFIELD_FIELD(uint32_t ep1s:1,
+		__BITFIELD_FIELD(uint32_t eqc:1,
+		__BITFIELD_FIELD(uint32_t cdl:1,
+		__BITFIELD_FIELD(uint32_t cde:4,
+		__BITFIELD_FIELD(uint32_t csos:1,
+		__BITFIELD_FIELD(uint32_t emc:1,
+		__BITFIELD_FIELD(uint32_t tm:3,
+		__BITFIELD_FIELD(uint32_t sde:1,
+		__BITFIELD_FIELD(uint32_t hasd:1,
+		__BITFIELD_FIELD(uint32_t ec:1,
+		__BITFIELD_FIELD(uint32_t tls:4,
+		;)))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg042_s cn52xx;
-	struct cvmx_pciercx_cfg042_s cn52xxp1;
-	struct cvmx_pciercx_cfg042_s cn56xx;
-	struct cvmx_pciercx_cfg042_s cn56xxp1;
-	struct cvmx_pciercx_cfg042_s cn61xx;
-	struct cvmx_pciercx_cfg042_s cn63xx;
-	struct cvmx_pciercx_cfg042_s cn63xxp1;
-	struct cvmx_pciercx_cfg042_s cn66xx;
-	struct cvmx_pciercx_cfg042_s cn68xx;
-	struct cvmx_pciercx_cfg042_s cn68xxp1;
-	struct cvmx_pciercx_cfg042_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg064 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg064_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t nco:12;
-		uint32_t cv:4;
-		uint32_t pcieec:16;
-#else
-		uint32_t pcieec:16;
-		uint32_t cv:4;
-		uint32_t nco:12;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg064_s cn52xx;
-	struct cvmx_pciercx_cfg064_s cn52xxp1;
-	struct cvmx_pciercx_cfg064_s cn56xx;
-	struct cvmx_pciercx_cfg064_s cn56xxp1;
-	struct cvmx_pciercx_cfg064_s cn61xx;
-	struct cvmx_pciercx_cfg064_s cn63xx;
-	struct cvmx_pciercx_cfg064_s cn63xxp1;
-	struct cvmx_pciercx_cfg064_s cn66xx;
-	struct cvmx_pciercx_cfg064_s cn68xx;
-	struct cvmx_pciercx_cfg064_s cn68xxp1;
-	struct cvmx_pciercx_cfg064_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg065 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg065_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombs:1;
-		uint32_t reserved_23_23:1;
-		uint32_t ucies:1;
-		uint32_t reserved_21_21:1;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_21:1;
-		uint32_t ucies:1;
-		uint32_t reserved_23_23:1;
-		uint32_t uatombs:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg065_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_21_31:11;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_31:11;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg065_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg065_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg065_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg065_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombs:1;
-		uint32_t reserved_21_23:3;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_23:3;
-		uint32_t uatombs:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg065_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg065_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg065_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg065_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg065_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg065_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg066 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg066_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombm:1;
-		uint32_t reserved_23_23:1;
-		uint32_t uciem:1;
-		uint32_t reserved_21_21:1;
-		uint32_t urem:1;
-		uint32_t ecrcem:1;
-		uint32_t mtlpm:1;
-		uint32_t rom:1;
-		uint32_t ucm:1;
-		uint32_t cam:1;
-		uint32_t ctm:1;
-		uint32_t fcpem:1;
-		uint32_t ptlpm:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdem:1;
-		uint32_t dlpem:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpem:1;
-		uint32_t sdem:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlpm:1;
-		uint32_t fcpem:1;
-		uint32_t ctm:1;
-		uint32_t cam:1;
-		uint32_t ucm:1;
-		uint32_t rom:1;
-		uint32_t mtlpm:1;
-		uint32_t ecrcem:1;
-		uint32_t urem:1;
-		uint32_t reserved_21_21:1;
-		uint32_t uciem:1;
-		uint32_t reserved_23_23:1;
-		uint32_t uatombm:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg066_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_21_31:11;
-		uint32_t urem:1;
-		uint32_t ecrcem:1;
-		uint32_t mtlpm:1;
-		uint32_t rom:1;
-		uint32_t ucm:1;
-		uint32_t cam:1;
-		uint32_t ctm:1;
-		uint32_t fcpem:1;
-		uint32_t ptlpm:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdem:1;
-		uint32_t dlpem:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpem:1;
-		uint32_t sdem:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlpm:1;
-		uint32_t fcpem:1;
-		uint32_t ctm:1;
-		uint32_t cam:1;
-		uint32_t ucm:1;
-		uint32_t rom:1;
-		uint32_t mtlpm:1;
-		uint32_t ecrcem:1;
-		uint32_t urem:1;
-		uint32_t reserved_21_31:11;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg066_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg066_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg066_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg066_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombm:1;
-		uint32_t reserved_21_23:3;
-		uint32_t urem:1;
-		uint32_t ecrcem:1;
-		uint32_t mtlpm:1;
-		uint32_t rom:1;
-		uint32_t ucm:1;
-		uint32_t cam:1;
-		uint32_t ctm:1;
-		uint32_t fcpem:1;
-		uint32_t ptlpm:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdem:1;
-		uint32_t dlpem:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpem:1;
-		uint32_t sdem:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlpm:1;
-		uint32_t fcpem:1;
-		uint32_t ctm:1;
-		uint32_t cam:1;
-		uint32_t ucm:1;
-		uint32_t rom:1;
-		uint32_t mtlpm:1;
-		uint32_t ecrcem:1;
-		uint32_t urem:1;
-		uint32_t reserved_21_23:3;
-		uint32_t uatombm:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg066_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg066_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg066_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg066_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg066_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg066_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg067 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg067_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombs:1;
-		uint32_t reserved_23_23:1;
-		uint32_t ucies:1;
-		uint32_t reserved_21_21:1;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_21:1;
-		uint32_t ucies:1;
-		uint32_t reserved_23_23:1;
-		uint32_t uatombs:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg067_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_21_31:11;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_31:11;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg067_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg067_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg067_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg067_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_25_31:7;
-		uint32_t uatombs:1;
-		uint32_t reserved_21_23:3;
-		uint32_t ures:1;
-		uint32_t ecrces:1;
-		uint32_t mtlps:1;
-		uint32_t ros:1;
-		uint32_t ucs:1;
-		uint32_t cas:1;
-		uint32_t cts:1;
-		uint32_t fcpes:1;
-		uint32_t ptlps:1;
-		uint32_t reserved_6_11:6;
-		uint32_t sdes:1;
-		uint32_t dlpes:1;
-		uint32_t reserved_0_3:4;
-#else
-		uint32_t reserved_0_3:4;
-		uint32_t dlpes:1;
-		uint32_t sdes:1;
-		uint32_t reserved_6_11:6;
-		uint32_t ptlps:1;
-		uint32_t fcpes:1;
-		uint32_t cts:1;
-		uint32_t cas:1;
-		uint32_t ucs:1;
-		uint32_t ros:1;
-		uint32_t mtlps:1;
-		uint32_t ecrces:1;
-		uint32_t ures:1;
-		uint32_t reserved_21_23:3;
-		uint32_t uatombs:1;
-		uint32_t reserved_25_31:7;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg067_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg067_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg067_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg067_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg067_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg067_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg068 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg068_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_15_31:17;
-		uint32_t cies:1;
-		uint32_t anfes:1;
-		uint32_t rtts:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rnrs:1;
-		uint32_t bdllps:1;
-		uint32_t btlps:1;
-		uint32_t reserved_1_5:5;
-		uint32_t res:1;
-#else
-		uint32_t res:1;
-		uint32_t reserved_1_5:5;
-		uint32_t btlps:1;
-		uint32_t bdllps:1;
-		uint32_t rnrs:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rtts:1;
-		uint32_t anfes:1;
-		uint32_t cies:1;
-		uint32_t reserved_15_31:17;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg068_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_14_31:18;
-		uint32_t anfes:1;
-		uint32_t rtts:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rnrs:1;
-		uint32_t bdllps:1;
-		uint32_t btlps:1;
-		uint32_t reserved_1_5:5;
-		uint32_t res:1;
-#else
-		uint32_t res:1;
-		uint32_t reserved_1_5:5;
-		uint32_t btlps:1;
-		uint32_t bdllps:1;
-		uint32_t rnrs:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rtts:1;
-		uint32_t anfes:1;
-		uint32_t reserved_14_31:18;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg068_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg068_cn52xx cn61xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg068_cn52xx cn66xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn68xx;
-	struct cvmx_pciercx_cfg068_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg068_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg069 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg069_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_15_31:17;
-		uint32_t ciem:1;
-		uint32_t anfem:1;
-		uint32_t rttm:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rnrm:1;
-		uint32_t bdllpm:1;
-		uint32_t btlpm:1;
-		uint32_t reserved_1_5:5;
-		uint32_t rem:1;
-#else
-		uint32_t rem:1;
-		uint32_t reserved_1_5:5;
-		uint32_t btlpm:1;
-		uint32_t bdllpm:1;
-		uint32_t rnrm:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rttm:1;
-		uint32_t anfem:1;
-		uint32_t ciem:1;
-		uint32_t reserved_15_31:17;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg069_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_14_31:18;
-		uint32_t anfem:1;
-		uint32_t rttm:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rnrm:1;
-		uint32_t bdllpm:1;
-		uint32_t btlpm:1;
-		uint32_t reserved_1_5:5;
-		uint32_t rem:1;
-#else
-		uint32_t rem:1;
-		uint32_t reserved_1_5:5;
-		uint32_t btlpm:1;
-		uint32_t bdllpm:1;
-		uint32_t rnrm:1;
-		uint32_t reserved_9_11:3;
-		uint32_t rttm:1;
-		uint32_t anfem:1;
-		uint32_t reserved_14_31:18;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg069_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg069_cn52xx cn61xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg069_cn52xx cn66xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn68xx;
-	struct cvmx_pciercx_cfg069_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg069_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg070 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg070_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_9_31:23;
-		uint32_t ce:1;
-		uint32_t cc:1;
-		uint32_t ge:1;
-		uint32_t gc:1;
-		uint32_t fep:5;
-#else
-		uint32_t fep:5;
-		uint32_t gc:1;
-		uint32_t ge:1;
-		uint32_t cc:1;
-		uint32_t ce:1;
-		uint32_t reserved_9_31:23;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg070_s cn52xx;
-	struct cvmx_pciercx_cfg070_s cn52xxp1;
-	struct cvmx_pciercx_cfg070_s cn56xx;
-	struct cvmx_pciercx_cfg070_s cn56xxp1;
-	struct cvmx_pciercx_cfg070_s cn61xx;
-	struct cvmx_pciercx_cfg070_s cn63xx;
-	struct cvmx_pciercx_cfg070_s cn63xxp1;
-	struct cvmx_pciercx_cfg070_s cn66xx;
-	struct cvmx_pciercx_cfg070_s cn68xx;
-	struct cvmx_pciercx_cfg070_s cn68xxp1;
-	struct cvmx_pciercx_cfg070_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg071 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg071_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dword1:32;
-#else
-		uint32_t dword1:32;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_12_31:20,
+		__BITFIELD_FIELD(uint32_t tplp:1,
+		__BITFIELD_FIELD(uint32_t reserved_9_10:2,
+		__BITFIELD_FIELD(uint32_t ce:1,
+		__BITFIELD_FIELD(uint32_t cc:1,
+		__BITFIELD_FIELD(uint32_t ge:1,
+		__BITFIELD_FIELD(uint32_t gc:1,
+		__BITFIELD_FIELD(uint32_t fep:5,
+		;))))))))
 	} s;
-	struct cvmx_pciercx_cfg071_s cn52xx;
-	struct cvmx_pciercx_cfg071_s cn52xxp1;
-	struct cvmx_pciercx_cfg071_s cn56xx;
-	struct cvmx_pciercx_cfg071_s cn56xxp1;
-	struct cvmx_pciercx_cfg071_s cn61xx;
-	struct cvmx_pciercx_cfg071_s cn63xx;
-	struct cvmx_pciercx_cfg071_s cn63xxp1;
-	struct cvmx_pciercx_cfg071_s cn66xx;
-	struct cvmx_pciercx_cfg071_s cn68xx;
-	struct cvmx_pciercx_cfg071_s cn68xxp1;
-	struct cvmx_pciercx_cfg071_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg072 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg072_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dword2:32;
-#else
-		uint32_t dword2:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg072_s cn52xx;
-	struct cvmx_pciercx_cfg072_s cn52xxp1;
-	struct cvmx_pciercx_cfg072_s cn56xx;
-	struct cvmx_pciercx_cfg072_s cn56xxp1;
-	struct cvmx_pciercx_cfg072_s cn61xx;
-	struct cvmx_pciercx_cfg072_s cn63xx;
-	struct cvmx_pciercx_cfg072_s cn63xxp1;
-	struct cvmx_pciercx_cfg072_s cn66xx;
-	struct cvmx_pciercx_cfg072_s cn68xx;
-	struct cvmx_pciercx_cfg072_s cn68xxp1;
-	struct cvmx_pciercx_cfg072_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg073 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg073_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dword3:32;
-#else
-		uint32_t dword3:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg073_s cn52xx;
-	struct cvmx_pciercx_cfg073_s cn52xxp1;
-	struct cvmx_pciercx_cfg073_s cn56xx;
-	struct cvmx_pciercx_cfg073_s cn56xxp1;
-	struct cvmx_pciercx_cfg073_s cn61xx;
-	struct cvmx_pciercx_cfg073_s cn63xx;
-	struct cvmx_pciercx_cfg073_s cn63xxp1;
-	struct cvmx_pciercx_cfg073_s cn66xx;
-	struct cvmx_pciercx_cfg073_s cn68xx;
-	struct cvmx_pciercx_cfg073_s cn68xxp1;
-	struct cvmx_pciercx_cfg073_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg074 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg074_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dword4:32;
-#else
-		uint32_t dword4:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg074_s cn52xx;
-	struct cvmx_pciercx_cfg074_s cn52xxp1;
-	struct cvmx_pciercx_cfg074_s cn56xx;
-	struct cvmx_pciercx_cfg074_s cn56xxp1;
-	struct cvmx_pciercx_cfg074_s cn61xx;
-	struct cvmx_pciercx_cfg074_s cn63xx;
-	struct cvmx_pciercx_cfg074_s cn63xxp1;
-	struct cvmx_pciercx_cfg074_s cn66xx;
-	struct cvmx_pciercx_cfg074_s cn68xx;
-	struct cvmx_pciercx_cfg074_s cn68xxp1;
-	struct cvmx_pciercx_cfg074_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg075 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg075_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_3_31:29;
-		uint32_t fere:1;
-		uint32_t nfere:1;
-		uint32_t cere:1;
-#else
-		uint32_t cere:1;
-		uint32_t nfere:1;
-		uint32_t fere:1;
-		uint32_t reserved_3_31:29;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg075_s cn52xx;
-	struct cvmx_pciercx_cfg075_s cn52xxp1;
-	struct cvmx_pciercx_cfg075_s cn56xx;
-	struct cvmx_pciercx_cfg075_s cn56xxp1;
-	struct cvmx_pciercx_cfg075_s cn61xx;
-	struct cvmx_pciercx_cfg075_s cn63xx;
-	struct cvmx_pciercx_cfg075_s cn63xxp1;
-	struct cvmx_pciercx_cfg075_s cn66xx;
-	struct cvmx_pciercx_cfg075_s cn68xx;
-	struct cvmx_pciercx_cfg075_s cn68xxp1;
-	struct cvmx_pciercx_cfg075_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg076 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg076_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t aeimn:5;
-		uint32_t reserved_7_26:20;
-		uint32_t femr:1;
-		uint32_t nfemr:1;
-		uint32_t fuf:1;
-		uint32_t multi_efnfr:1;
-		uint32_t efnfr:1;
-		uint32_t multi_ecr:1;
-		uint32_t ecr:1;
-#else
-		uint32_t ecr:1;
-		uint32_t multi_ecr:1;
-		uint32_t efnfr:1;
-		uint32_t multi_efnfr:1;
-		uint32_t fuf:1;
-		uint32_t nfemr:1;
-		uint32_t femr:1;
-		uint32_t reserved_7_26:20;
-		uint32_t aeimn:5;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_3_31:29,
+		__BITFIELD_FIELD(uint32_t fere:1,
+		__BITFIELD_FIELD(uint32_t nfere:1,
+		__BITFIELD_FIELD(uint32_t cere:1,
+		;))))
 	} s;
-	struct cvmx_pciercx_cfg076_s cn52xx;
-	struct cvmx_pciercx_cfg076_s cn52xxp1;
-	struct cvmx_pciercx_cfg076_s cn56xx;
-	struct cvmx_pciercx_cfg076_s cn56xxp1;
-	struct cvmx_pciercx_cfg076_s cn61xx;
-	struct cvmx_pciercx_cfg076_s cn63xx;
-	struct cvmx_pciercx_cfg076_s cn63xxp1;
-	struct cvmx_pciercx_cfg076_s cn66xx;
-	struct cvmx_pciercx_cfg076_s cn68xx;
-	struct cvmx_pciercx_cfg076_s cn68xxp1;
-	struct cvmx_pciercx_cfg076_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg077 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg077_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t efnfsi:16;
-		uint32_t ecsi:16;
-#else
-		uint32_t ecsi:16;
-		uint32_t efnfsi:16;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg077_s cn52xx;
-	struct cvmx_pciercx_cfg077_s cn52xxp1;
-	struct cvmx_pciercx_cfg077_s cn56xx;
-	struct cvmx_pciercx_cfg077_s cn56xxp1;
-	struct cvmx_pciercx_cfg077_s cn61xx;
-	struct cvmx_pciercx_cfg077_s cn63xx;
-	struct cvmx_pciercx_cfg077_s cn63xxp1;
-	struct cvmx_pciercx_cfg077_s cn66xx;
-	struct cvmx_pciercx_cfg077_s cn68xx;
-	struct cvmx_pciercx_cfg077_s cn68xxp1;
-	struct cvmx_pciercx_cfg077_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg448 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg448_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t rtl:16;
-		uint32_t rtltl:16;
-#else
-		uint32_t rtltl:16;
-		uint32_t rtl:16;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg448_s cn52xx;
-	struct cvmx_pciercx_cfg448_s cn52xxp1;
-	struct cvmx_pciercx_cfg448_s cn56xx;
-	struct cvmx_pciercx_cfg448_s cn56xxp1;
-	struct cvmx_pciercx_cfg448_s cn61xx;
-	struct cvmx_pciercx_cfg448_s cn63xx;
-	struct cvmx_pciercx_cfg448_s cn63xxp1;
-	struct cvmx_pciercx_cfg448_s cn66xx;
-	struct cvmx_pciercx_cfg448_s cn68xx;
-	struct cvmx_pciercx_cfg448_s cn68xxp1;
-	struct cvmx_pciercx_cfg448_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg449 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg449_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t omr:32;
-#else
-		uint32_t omr:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg449_s cn52xx;
-	struct cvmx_pciercx_cfg449_s cn52xxp1;
-	struct cvmx_pciercx_cfg449_s cn56xx;
-	struct cvmx_pciercx_cfg449_s cn56xxp1;
-	struct cvmx_pciercx_cfg449_s cn61xx;
-	struct cvmx_pciercx_cfg449_s cn63xx;
-	struct cvmx_pciercx_cfg449_s cn63xxp1;
-	struct cvmx_pciercx_cfg449_s cn66xx;
-	struct cvmx_pciercx_cfg449_s cn68xx;
-	struct cvmx_pciercx_cfg449_s cn68xxp1;
-	struct cvmx_pciercx_cfg449_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg450 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg450_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t lpec:8;
-		uint32_t reserved_22_23:2;
-		uint32_t link_state:6;
-		uint32_t force_link:1;
-		uint32_t reserved_8_14:7;
-		uint32_t link_num:8;
-#else
-		uint32_t link_num:8;
-		uint32_t reserved_8_14:7;
-		uint32_t force_link:1;
-		uint32_t link_state:6;
-		uint32_t reserved_22_23:2;
-		uint32_t lpec:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg450_s cn52xx;
-	struct cvmx_pciercx_cfg450_s cn52xxp1;
-	struct cvmx_pciercx_cfg450_s cn56xx;
-	struct cvmx_pciercx_cfg450_s cn56xxp1;
-	struct cvmx_pciercx_cfg450_s cn61xx;
-	struct cvmx_pciercx_cfg450_s cn63xx;
-	struct cvmx_pciercx_cfg450_s cn63xxp1;
-	struct cvmx_pciercx_cfg450_s cn66xx;
-	struct cvmx_pciercx_cfg450_s cn68xx;
-	struct cvmx_pciercx_cfg450_s cn68xxp1;
-	struct cvmx_pciercx_cfg450_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg451 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg451_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_31_31:1;
-		uint32_t easpml1:1;
-		uint32_t l1el:3;
-		uint32_t l0el:3;
-		uint32_t n_fts_cc:8;
-		uint32_t n_fts:8;
-		uint32_t ack_freq:8;
-#else
-		uint32_t ack_freq:8;
-		uint32_t n_fts:8;
-		uint32_t n_fts_cc:8;
-		uint32_t l0el:3;
-		uint32_t l1el:3;
-		uint32_t easpml1:1;
-		uint32_t reserved_31_31:1;
-#endif
+		__BITFIELD_FIELD(uint32_t rtl:16,
+		__BITFIELD_FIELD(uint32_t rtltl:16,
+		;))
 	} s;
-	struct cvmx_pciercx_cfg451_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_30_31:2;
-		uint32_t l1el:3;
-		uint32_t l0el:3;
-		uint32_t n_fts_cc:8;
-		uint32_t n_fts:8;
-		uint32_t ack_freq:8;
-#else
-		uint32_t ack_freq:8;
-		uint32_t n_fts:8;
-		uint32_t n_fts_cc:8;
-		uint32_t l0el:3;
-		uint32_t l1el:3;
-		uint32_t reserved_30_31:2;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg451_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg451_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg451_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg451_s cn61xx;
-	struct cvmx_pciercx_cfg451_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg451_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg451_s cn66xx;
-	struct cvmx_pciercx_cfg451_s cn68xx;
-	struct cvmx_pciercx_cfg451_s cn68xxp1;
-	struct cvmx_pciercx_cfg451_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg452 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg452_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_26_31:6;
-		uint32_t eccrc:1;
-		uint32_t reserved_22_24:3;
-		uint32_t lme:6;
-		uint32_t reserved_8_15:8;
-		uint32_t flm:1;
-		uint32_t reserved_6_6:1;
-		uint32_t dllle:1;
-		uint32_t reserved_4_4:1;
-		uint32_t ra:1;
-		uint32_t le:1;
-		uint32_t sd:1;
-		uint32_t omr:1;
-#else
-		uint32_t omr:1;
-		uint32_t sd:1;
-		uint32_t le:1;
-		uint32_t ra:1;
-		uint32_t reserved_4_4:1;
-		uint32_t dllle:1;
-		uint32_t reserved_6_6:1;
-		uint32_t flm:1;
-		uint32_t reserved_8_15:8;
-		uint32_t lme:6;
-		uint32_t reserved_22_24:3;
-		uint32_t eccrc:1;
-		uint32_t reserved_26_31:6;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_26_31:6,
+		__BITFIELD_FIELD(uint32_t eccrc:1,
+		__BITFIELD_FIELD(uint32_t reserved_22_24:3,
+		__BITFIELD_FIELD(uint32_t lme:6,
+		__BITFIELD_FIELD(uint32_t reserved_12_15:4,
+		__BITFIELD_FIELD(uint32_t link_rate:4,
+		__BITFIELD_FIELD(uint32_t flm:1,
+		__BITFIELD_FIELD(uint32_t reserved_6_6:1,
+		__BITFIELD_FIELD(uint32_t dllle:1,
+		__BITFIELD_FIELD(uint32_t reserved_4_4:1,
+		__BITFIELD_FIELD(uint32_t ra:1,
+		__BITFIELD_FIELD(uint32_t le:1,
+		__BITFIELD_FIELD(uint32_t sd:1,
+		__BITFIELD_FIELD(uint32_t omr:1,
+		;))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg452_s cn52xx;
-	struct cvmx_pciercx_cfg452_s cn52xxp1;
-	struct cvmx_pciercx_cfg452_s cn56xx;
-	struct cvmx_pciercx_cfg452_s cn56xxp1;
-	struct cvmx_pciercx_cfg452_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_22_31:10;
-		uint32_t lme:6;
-		uint32_t reserved_8_15:8;
-		uint32_t flm:1;
-		uint32_t reserved_6_6:1;
-		uint32_t dllle:1;
-		uint32_t reserved_4_4:1;
-		uint32_t ra:1;
-		uint32_t le:1;
-		uint32_t sd:1;
-		uint32_t omr:1;
-#else
-		uint32_t omr:1;
-		uint32_t sd:1;
-		uint32_t le:1;
-		uint32_t ra:1;
-		uint32_t reserved_4_4:1;
-		uint32_t dllle:1;
-		uint32_t reserved_6_6:1;
-		uint32_t flm:1;
-		uint32_t reserved_8_15:8;
-		uint32_t lme:6;
-		uint32_t reserved_22_31:10;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg452_s cn63xx;
-	struct cvmx_pciercx_cfg452_s cn63xxp1;
-	struct cvmx_pciercx_cfg452_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg452_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg452_cn61xx cn68xxp1;
-	struct cvmx_pciercx_cfg452_cn61xx cnf71xx;
-};
-
-union cvmx_pciercx_cfg453 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg453_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dlld:1;
-		uint32_t reserved_26_30:5;
-		uint32_t ack_nak:1;
-		uint32_t fcd:1;
-		uint32_t ilst:24;
-#else
-		uint32_t ilst:24;
-		uint32_t fcd:1;
-		uint32_t ack_nak:1;
-		uint32_t reserved_26_30:5;
-		uint32_t dlld:1;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg453_s cn52xx;
-	struct cvmx_pciercx_cfg453_s cn52xxp1;
-	struct cvmx_pciercx_cfg453_s cn56xx;
-	struct cvmx_pciercx_cfg453_s cn56xxp1;
-	struct cvmx_pciercx_cfg453_s cn61xx;
-	struct cvmx_pciercx_cfg453_s cn63xx;
-	struct cvmx_pciercx_cfg453_s cn63xxp1;
-	struct cvmx_pciercx_cfg453_s cn66xx;
-	struct cvmx_pciercx_cfg453_s cn68xx;
-	struct cvmx_pciercx_cfg453_s cn68xxp1;
-	struct cvmx_pciercx_cfg453_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg454 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg454_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t cx_nfunc:3;
-		uint32_t tmfcwt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmrt:5;
-		uint32_t reserved_11_13:3;
-		uint32_t nskps:3;
-		uint32_t reserved_0_7:8;
-#else
-		uint32_t reserved_0_7:8;
-		uint32_t nskps:3;
-		uint32_t reserved_11_13:3;
-		uint32_t tmrt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmfcwt:5;
-		uint32_t cx_nfunc:3;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg454_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_29_31:3;
-		uint32_t tmfcwt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmrt:5;
-		uint32_t reserved_11_13:3;
-		uint32_t nskps:3;
-		uint32_t reserved_4_7:4;
-		uint32_t ntss:4;
-#else
-		uint32_t ntss:4;
-		uint32_t reserved_4_7:4;
-		uint32_t nskps:3;
-		uint32_t reserved_11_13:3;
-		uint32_t tmrt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmfcwt:5;
-		uint32_t reserved_29_31:3;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg454_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg454_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg454_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg454_cn61xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t cx_nfunc:3;
-		uint32_t tmfcwt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmrt:5;
-		uint32_t reserved_8_13:6;
-		uint32_t mfuncn:8;
-#else
-		uint32_t mfuncn:8;
-		uint32_t reserved_8_13:6;
-		uint32_t tmrt:5;
-		uint32_t tmanlt:5;
-		uint32_t tmfcwt:5;
-		uint32_t cx_nfunc:3;
-#endif
-	} cn61xx;
-	struct cvmx_pciercx_cfg454_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg454_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg454_cn61xx cn66xx;
-	struct cvmx_pciercx_cfg454_cn61xx cn68xx;
-	struct cvmx_pciercx_cfg454_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg454_cn61xx cnf71xx;
 };
 
 union cvmx_pciercx_cfg455 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg455_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t m_cfg0_filt:1;
-		uint32_t m_io_filt:1;
-		uint32_t msg_ctrl:1;
-		uint32_t m_cpl_ecrc_filt:1;
-		uint32_t m_ecrc_filt:1;
-		uint32_t m_cpl_len_err:1;
-		uint32_t m_cpl_attr_err:1;
-		uint32_t m_cpl_tc_err:1;
-		uint32_t m_cpl_fun_err:1;
-		uint32_t m_cpl_rid_err:1;
-		uint32_t m_cpl_tag_err:1;
-		uint32_t m_lk_filt:1;
-		uint32_t m_cfg1_filt:1;
-		uint32_t m_bar_match:1;
-		uint32_t m_pois_filt:1;
-		uint32_t m_fun:1;
-		uint32_t dfcwt:1;
-		uint32_t reserved_11_14:4;
-		uint32_t skpiv:11;
-#else
-		uint32_t skpiv:11;
-		uint32_t reserved_11_14:4;
-		uint32_t dfcwt:1;
-		uint32_t m_fun:1;
-		uint32_t m_pois_filt:1;
-		uint32_t m_bar_match:1;
-		uint32_t m_cfg1_filt:1;
-		uint32_t m_lk_filt:1;
-		uint32_t m_cpl_tag_err:1;
-		uint32_t m_cpl_rid_err:1;
-		uint32_t m_cpl_fun_err:1;
-		uint32_t m_cpl_tc_err:1;
-		uint32_t m_cpl_attr_err:1;
-		uint32_t m_cpl_len_err:1;
-		uint32_t m_ecrc_filt:1;
-		uint32_t m_cpl_ecrc_filt:1;
-		uint32_t msg_ctrl:1;
-		uint32_t m_io_filt:1;
-		uint32_t m_cfg0_filt:1;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg455_s cn52xx;
-	struct cvmx_pciercx_cfg455_s cn52xxp1;
-	struct cvmx_pciercx_cfg455_s cn56xx;
-	struct cvmx_pciercx_cfg455_s cn56xxp1;
-	struct cvmx_pciercx_cfg455_s cn61xx;
-	struct cvmx_pciercx_cfg455_s cn63xx;
-	struct cvmx_pciercx_cfg455_s cn63xxp1;
-	struct cvmx_pciercx_cfg455_s cn66xx;
-	struct cvmx_pciercx_cfg455_s cn68xx;
-	struct cvmx_pciercx_cfg455_s cn68xxp1;
-	struct cvmx_pciercx_cfg455_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg456 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg456_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_4_31:28;
-		uint32_t m_handle_flush:1;
-		uint32_t m_dabort_4ucpl:1;
-		uint32_t m_vend1_drp:1;
-		uint32_t m_vend0_drp:1;
-#else
-		uint32_t m_vend0_drp:1;
-		uint32_t m_vend1_drp:1;
-		uint32_t m_dabort_4ucpl:1;
-		uint32_t m_handle_flush:1;
-		uint32_t reserved_4_31:28;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg456_cn52xx {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_2_31:30;
-		uint32_t m_vend1_drp:1;
-		uint32_t m_vend0_drp:1;
-#else
-		uint32_t m_vend0_drp:1;
-		uint32_t m_vend1_drp:1;
-		uint32_t reserved_2_31:30;
-#endif
-	} cn52xx;
-	struct cvmx_pciercx_cfg456_cn52xx cn52xxp1;
-	struct cvmx_pciercx_cfg456_cn52xx cn56xx;
-	struct cvmx_pciercx_cfg456_cn52xx cn56xxp1;
-	struct cvmx_pciercx_cfg456_s cn61xx;
-	struct cvmx_pciercx_cfg456_cn52xx cn63xx;
-	struct cvmx_pciercx_cfg456_cn52xx cn63xxp1;
-	struct cvmx_pciercx_cfg456_s cn66xx;
-	struct cvmx_pciercx_cfg456_s cn68xx;
-	struct cvmx_pciercx_cfg456_cn52xx cn68xxp1;
-	struct cvmx_pciercx_cfg456_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg458 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg458_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dbg_info_l32:32;
-#else
-		uint32_t dbg_info_l32:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg458_s cn52xx;
-	struct cvmx_pciercx_cfg458_s cn52xxp1;
-	struct cvmx_pciercx_cfg458_s cn56xx;
-	struct cvmx_pciercx_cfg458_s cn56xxp1;
-	struct cvmx_pciercx_cfg458_s cn61xx;
-	struct cvmx_pciercx_cfg458_s cn63xx;
-	struct cvmx_pciercx_cfg458_s cn63xxp1;
-	struct cvmx_pciercx_cfg458_s cn66xx;
-	struct cvmx_pciercx_cfg458_s cn68xx;
-	struct cvmx_pciercx_cfg458_s cn68xxp1;
-	struct cvmx_pciercx_cfg458_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg459 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg459_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t dbg_info_u32:32;
-#else
-		uint32_t dbg_info_u32:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg459_s cn52xx;
-	struct cvmx_pciercx_cfg459_s cn52xxp1;
-	struct cvmx_pciercx_cfg459_s cn56xx;
-	struct cvmx_pciercx_cfg459_s cn56xxp1;
-	struct cvmx_pciercx_cfg459_s cn61xx;
-	struct cvmx_pciercx_cfg459_s cn63xx;
-	struct cvmx_pciercx_cfg459_s cn63xxp1;
-	struct cvmx_pciercx_cfg459_s cn66xx;
-	struct cvmx_pciercx_cfg459_s cn68xx;
-	struct cvmx_pciercx_cfg459_s cn68xxp1;
-	struct cvmx_pciercx_cfg459_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg460 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg460_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_20_31:12;
-		uint32_t tphfcc:8;
-		uint32_t tpdfcc:12;
-#else
-		uint32_t tpdfcc:12;
-		uint32_t tphfcc:8;
-		uint32_t reserved_20_31:12;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg460_s cn52xx;
-	struct cvmx_pciercx_cfg460_s cn52xxp1;
-	struct cvmx_pciercx_cfg460_s cn56xx;
-	struct cvmx_pciercx_cfg460_s cn56xxp1;
-	struct cvmx_pciercx_cfg460_s cn61xx;
-	struct cvmx_pciercx_cfg460_s cn63xx;
-	struct cvmx_pciercx_cfg460_s cn63xxp1;
-	struct cvmx_pciercx_cfg460_s cn66xx;
-	struct cvmx_pciercx_cfg460_s cn68xx;
-	struct cvmx_pciercx_cfg460_s cn68xxp1;
-	struct cvmx_pciercx_cfg460_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg461 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg461_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_20_31:12;
-		uint32_t tchfcc:8;
-		uint32_t tcdfcc:12;
-#else
-		uint32_t tcdfcc:12;
-		uint32_t tchfcc:8;
-		uint32_t reserved_20_31:12;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg461_s cn52xx;
-	struct cvmx_pciercx_cfg461_s cn52xxp1;
-	struct cvmx_pciercx_cfg461_s cn56xx;
-	struct cvmx_pciercx_cfg461_s cn56xxp1;
-	struct cvmx_pciercx_cfg461_s cn61xx;
-	struct cvmx_pciercx_cfg461_s cn63xx;
-	struct cvmx_pciercx_cfg461_s cn63xxp1;
-	struct cvmx_pciercx_cfg461_s cn66xx;
-	struct cvmx_pciercx_cfg461_s cn68xx;
-	struct cvmx_pciercx_cfg461_s cn68xxp1;
-	struct cvmx_pciercx_cfg461_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg462 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg462_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_20_31:12;
-		uint32_t tchfcc:8;
-		uint32_t tcdfcc:12;
-#else
-		uint32_t tcdfcc:12;
-		uint32_t tchfcc:8;
-		uint32_t reserved_20_31:12;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg462_s cn52xx;
-	struct cvmx_pciercx_cfg462_s cn52xxp1;
-	struct cvmx_pciercx_cfg462_s cn56xx;
-	struct cvmx_pciercx_cfg462_s cn56xxp1;
-	struct cvmx_pciercx_cfg462_s cn61xx;
-	struct cvmx_pciercx_cfg462_s cn63xx;
-	struct cvmx_pciercx_cfg462_s cn63xxp1;
-	struct cvmx_pciercx_cfg462_s cn66xx;
-	struct cvmx_pciercx_cfg462_s cn68xx;
-	struct cvmx_pciercx_cfg462_s cn68xxp1;
-	struct cvmx_pciercx_cfg462_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg463 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg463_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_3_31:29;
-		uint32_t rqne:1;
-		uint32_t trbne:1;
-		uint32_t rtlpfccnr:1;
-#else
-		uint32_t rtlpfccnr:1;
-		uint32_t trbne:1;
-		uint32_t rqne:1;
-		uint32_t reserved_3_31:29;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg463_s cn52xx;
-	struct cvmx_pciercx_cfg463_s cn52xxp1;
-	struct cvmx_pciercx_cfg463_s cn56xx;
-	struct cvmx_pciercx_cfg463_s cn56xxp1;
-	struct cvmx_pciercx_cfg463_s cn61xx;
-	struct cvmx_pciercx_cfg463_s cn63xx;
-	struct cvmx_pciercx_cfg463_s cn63xxp1;
-	struct cvmx_pciercx_cfg463_s cn66xx;
-	struct cvmx_pciercx_cfg463_s cn68xx;
-	struct cvmx_pciercx_cfg463_s cn68xxp1;
-	struct cvmx_pciercx_cfg463_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg464 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg464_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t wrr_vc3:8;
-		uint32_t wrr_vc2:8;
-		uint32_t wrr_vc1:8;
-		uint32_t wrr_vc0:8;
-#else
-		uint32_t wrr_vc0:8;
-		uint32_t wrr_vc1:8;
-		uint32_t wrr_vc2:8;
-		uint32_t wrr_vc3:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg464_s cn52xx;
-	struct cvmx_pciercx_cfg464_s cn52xxp1;
-	struct cvmx_pciercx_cfg464_s cn56xx;
-	struct cvmx_pciercx_cfg464_s cn56xxp1;
-	struct cvmx_pciercx_cfg464_s cn61xx;
-	struct cvmx_pciercx_cfg464_s cn63xx;
-	struct cvmx_pciercx_cfg464_s cn63xxp1;
-	struct cvmx_pciercx_cfg464_s cn66xx;
-	struct cvmx_pciercx_cfg464_s cn68xx;
-	struct cvmx_pciercx_cfg464_s cn68xxp1;
-	struct cvmx_pciercx_cfg464_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg465 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg465_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t wrr_vc7:8;
-		uint32_t wrr_vc6:8;
-		uint32_t wrr_vc5:8;
-		uint32_t wrr_vc4:8;
-#else
-		uint32_t wrr_vc4:8;
-		uint32_t wrr_vc5:8;
-		uint32_t wrr_vc6:8;
-		uint32_t wrr_vc7:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg465_s cn52xx;
-	struct cvmx_pciercx_cfg465_s cn52xxp1;
-	struct cvmx_pciercx_cfg465_s cn56xx;
-	struct cvmx_pciercx_cfg465_s cn56xxp1;
-	struct cvmx_pciercx_cfg465_s cn61xx;
-	struct cvmx_pciercx_cfg465_s cn63xx;
-	struct cvmx_pciercx_cfg465_s cn63xxp1;
-	struct cvmx_pciercx_cfg465_s cn66xx;
-	struct cvmx_pciercx_cfg465_s cn68xx;
-	struct cvmx_pciercx_cfg465_s cn68xxp1;
-	struct cvmx_pciercx_cfg465_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg466 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg466_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t rx_queue_order:1;
-		uint32_t type_ordering:1;
-		uint32_t reserved_24_29:6;
-		uint32_t queue_mode:3;
-		uint32_t reserved_20_20:1;
-		uint32_t header_credits:8;
-		uint32_t data_credits:12;
-#else
-		uint32_t data_credits:12;
-		uint32_t header_credits:8;
-		uint32_t reserved_20_20:1;
-		uint32_t queue_mode:3;
-		uint32_t reserved_24_29:6;
-		uint32_t type_ordering:1;
-		uint32_t rx_queue_order:1;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg466_s cn52xx;
-	struct cvmx_pciercx_cfg466_s cn52xxp1;
-	struct cvmx_pciercx_cfg466_s cn56xx;
-	struct cvmx_pciercx_cfg466_s cn56xxp1;
-	struct cvmx_pciercx_cfg466_s cn61xx;
-	struct cvmx_pciercx_cfg466_s cn63xx;
-	struct cvmx_pciercx_cfg466_s cn63xxp1;
-	struct cvmx_pciercx_cfg466_s cn66xx;
-	struct cvmx_pciercx_cfg466_s cn68xx;
-	struct cvmx_pciercx_cfg466_s cn68xxp1;
-	struct cvmx_pciercx_cfg466_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg467 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg467_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_24_31:8;
-		uint32_t queue_mode:3;
-		uint32_t reserved_20_20:1;
-		uint32_t header_credits:8;
-		uint32_t data_credits:12;
-#else
-		uint32_t data_credits:12;
-		uint32_t header_credits:8;
-		uint32_t reserved_20_20:1;
-		uint32_t queue_mode:3;
-		uint32_t reserved_24_31:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg467_s cn52xx;
-	struct cvmx_pciercx_cfg467_s cn52xxp1;
-	struct cvmx_pciercx_cfg467_s cn56xx;
-	struct cvmx_pciercx_cfg467_s cn56xxp1;
-	struct cvmx_pciercx_cfg467_s cn61xx;
-	struct cvmx_pciercx_cfg467_s cn63xx;
-	struct cvmx_pciercx_cfg467_s cn63xxp1;
-	struct cvmx_pciercx_cfg467_s cn66xx;
-	struct cvmx_pciercx_cfg467_s cn68xx;
-	struct cvmx_pciercx_cfg467_s cn68xxp1;
-	struct cvmx_pciercx_cfg467_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg468 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg468_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_24_31:8;
-		uint32_t queue_mode:3;
-		uint32_t reserved_20_20:1;
-		uint32_t header_credits:8;
-		uint32_t data_credits:12;
-#else
-		uint32_t data_credits:12;
-		uint32_t header_credits:8;
-		uint32_t reserved_20_20:1;
-		uint32_t queue_mode:3;
-		uint32_t reserved_24_31:8;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg468_s cn52xx;
-	struct cvmx_pciercx_cfg468_s cn52xxp1;
-	struct cvmx_pciercx_cfg468_s cn56xx;
-	struct cvmx_pciercx_cfg468_s cn56xxp1;
-	struct cvmx_pciercx_cfg468_s cn61xx;
-	struct cvmx_pciercx_cfg468_s cn63xx;
-	struct cvmx_pciercx_cfg468_s cn63xxp1;
-	struct cvmx_pciercx_cfg468_s cn66xx;
-	struct cvmx_pciercx_cfg468_s cn68xx;
-	struct cvmx_pciercx_cfg468_s cn68xxp1;
-	struct cvmx_pciercx_cfg468_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg490 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg490_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_26_31:6;
-		uint32_t header_depth:10;
-		uint32_t reserved_14_15:2;
-		uint32_t data_depth:14;
-#else
-		uint32_t data_depth:14;
-		uint32_t reserved_14_15:2;
-		uint32_t header_depth:10;
-		uint32_t reserved_26_31:6;
-#endif
+		__BITFIELD_FIELD(uint32_t m_cfg0_filt:1,
+		__BITFIELD_FIELD(uint32_t m_io_filt:1,
+		__BITFIELD_FIELD(uint32_t msg_ctrl:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_ecrc_filt:1,
+		__BITFIELD_FIELD(uint32_t m_ecrc_filt:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_len_err:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_attr_err:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_tc_err:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_fun_err:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_rid_err:1,
+		__BITFIELD_FIELD(uint32_t m_cpl_tag_err:1,
+		__BITFIELD_FIELD(uint32_t m_lk_filt:1,
+		__BITFIELD_FIELD(uint32_t m_cfg1_filt:1,
+		__BITFIELD_FIELD(uint32_t m_bar_match:1,
+		__BITFIELD_FIELD(uint32_t m_pois_filt:1,
+		__BITFIELD_FIELD(uint32_t m_fun:1,
+		__BITFIELD_FIELD(uint32_t dfcwt:1,
+		__BITFIELD_FIELD(uint32_t reserved_11_14:4,
+		__BITFIELD_FIELD(uint32_t skpiv:11,
+		;)))))))))))))))))))
 	} s;
-	struct cvmx_pciercx_cfg490_s cn52xx;
-	struct cvmx_pciercx_cfg490_s cn52xxp1;
-	struct cvmx_pciercx_cfg490_s cn56xx;
-	struct cvmx_pciercx_cfg490_s cn56xxp1;
-	struct cvmx_pciercx_cfg490_s cn61xx;
-	struct cvmx_pciercx_cfg490_s cn63xx;
-	struct cvmx_pciercx_cfg490_s cn63xxp1;
-	struct cvmx_pciercx_cfg490_s cn66xx;
-	struct cvmx_pciercx_cfg490_s cn68xx;
-	struct cvmx_pciercx_cfg490_s cn68xxp1;
-	struct cvmx_pciercx_cfg490_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg491 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg491_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_26_31:6;
-		uint32_t header_depth:10;
-		uint32_t reserved_14_15:2;
-		uint32_t data_depth:14;
-#else
-		uint32_t data_depth:14;
-		uint32_t reserved_14_15:2;
-		uint32_t header_depth:10;
-		uint32_t reserved_26_31:6;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg491_s cn52xx;
-	struct cvmx_pciercx_cfg491_s cn52xxp1;
-	struct cvmx_pciercx_cfg491_s cn56xx;
-	struct cvmx_pciercx_cfg491_s cn56xxp1;
-	struct cvmx_pciercx_cfg491_s cn61xx;
-	struct cvmx_pciercx_cfg491_s cn63xx;
-	struct cvmx_pciercx_cfg491_s cn63xxp1;
-	struct cvmx_pciercx_cfg491_s cn66xx;
-	struct cvmx_pciercx_cfg491_s cn68xx;
-	struct cvmx_pciercx_cfg491_s cn68xxp1;
-	struct cvmx_pciercx_cfg491_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg492 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg492_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_26_31:6;
-		uint32_t header_depth:10;
-		uint32_t reserved_14_15:2;
-		uint32_t data_depth:14;
-#else
-		uint32_t data_depth:14;
-		uint32_t reserved_14_15:2;
-		uint32_t header_depth:10;
-		uint32_t reserved_26_31:6;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg492_s cn52xx;
-	struct cvmx_pciercx_cfg492_s cn52xxp1;
-	struct cvmx_pciercx_cfg492_s cn56xx;
-	struct cvmx_pciercx_cfg492_s cn56xxp1;
-	struct cvmx_pciercx_cfg492_s cn61xx;
-	struct cvmx_pciercx_cfg492_s cn63xx;
-	struct cvmx_pciercx_cfg492_s cn63xxp1;
-	struct cvmx_pciercx_cfg492_s cn66xx;
-	struct cvmx_pciercx_cfg492_s cn68xx;
-	struct cvmx_pciercx_cfg492_s cn68xxp1;
-	struct cvmx_pciercx_cfg492_s cnf71xx;
 };
 
 union cvmx_pciercx_cfg515 {
 	uint32_t u32;
 	struct cvmx_pciercx_cfg515_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t reserved_21_31:11;
-		uint32_t s_d_e:1;
-		uint32_t ctcrb:1;
-		uint32_t cpyts:1;
-		uint32_t dsc:1;
-		uint32_t le:9;
-		uint32_t n_fts:8;
-#else
-		uint32_t n_fts:8;
-		uint32_t le:9;
-		uint32_t dsc:1;
-		uint32_t cpyts:1;
-		uint32_t ctcrb:1;
-		uint32_t s_d_e:1;
-		uint32_t reserved_21_31:11;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg515_s cn61xx;
-	struct cvmx_pciercx_cfg515_s cn63xx;
-	struct cvmx_pciercx_cfg515_s cn63xxp1;
-	struct cvmx_pciercx_cfg515_s cn66xx;
-	struct cvmx_pciercx_cfg515_s cn68xx;
-	struct cvmx_pciercx_cfg515_s cn68xxp1;
-	struct cvmx_pciercx_cfg515_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg516 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg516_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t phy_stat:32;
-#else
-		uint32_t phy_stat:32;
-#endif
-	} s;
-	struct cvmx_pciercx_cfg516_s cn52xx;
-	struct cvmx_pciercx_cfg516_s cn52xxp1;
-	struct cvmx_pciercx_cfg516_s cn56xx;
-	struct cvmx_pciercx_cfg516_s cn56xxp1;
-	struct cvmx_pciercx_cfg516_s cn61xx;
-	struct cvmx_pciercx_cfg516_s cn63xx;
-	struct cvmx_pciercx_cfg516_s cn63xxp1;
-	struct cvmx_pciercx_cfg516_s cn66xx;
-	struct cvmx_pciercx_cfg516_s cn68xx;
-	struct cvmx_pciercx_cfg516_s cn68xxp1;
-	struct cvmx_pciercx_cfg516_s cnf71xx;
-};
-
-union cvmx_pciercx_cfg517 {
-	uint32_t u32;
-	struct cvmx_pciercx_cfg517_s {
-#ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t phy_ctrl:32;
-#else
-		uint32_t phy_ctrl:32;
-#endif
+		__BITFIELD_FIELD(uint32_t reserved_21_31:11,
+		__BITFIELD_FIELD(uint32_t s_d_e:1,
+		__BITFIELD_FIELD(uint32_t ctcrb:1,
+		__BITFIELD_FIELD(uint32_t cpyts:1,
+		__BITFIELD_FIELD(uint32_t dsc:1,
+		__BITFIELD_FIELD(uint32_t le:9,
+		__BITFIELD_FIELD(uint32_t n_fts:8,
+		;)))))))
 	} s;
-	struct cvmx_pciercx_cfg517_s cn52xx;
-	struct cvmx_pciercx_cfg517_s cn52xxp1;
-	struct cvmx_pciercx_cfg517_s cn56xx;
-	struct cvmx_pciercx_cfg517_s cn56xxp1;
-	struct cvmx_pciercx_cfg517_s cn61xx;
-	struct cvmx_pciercx_cfg517_s cn63xx;
-	struct cvmx_pciercx_cfg517_s cn63xxp1;
-	struct cvmx_pciercx_cfg517_s cn66xx;
-	struct cvmx_pciercx_cfg517_s cn68xx;
-	struct cvmx_pciercx_cfg517_s cn68xxp1;
-	struct cvmx_pciercx_cfg517_s cnf71xx;
 };
 
 #endif
-- 
cgit v1.2.3


From e96b1b0648adf7d80606a99e9596177186fd90ab Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <Steven.Hill@cavium.com>
Date: Thu, 9 Mar 2017 08:16:03 -0600
Subject: MIPS: Octeon: Clean up platform code.

Remove unused headers and fix warnings from checkpatch.

Signed-off-by: Steven J. Hill <Steven.Hill@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15407/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/octeon-platform.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index e3c77d8a0d56..8505db478904 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -7,8 +7,6 @@
  * Copyright (C) 2008 Wind River Systems
  */
 
-#include <linux/init.h>
-#include <linux/delay.h>
 #include <linux/etherdevice.h>
 #include <linux/of_platform.h>
 #include <linux/of_fdt.h>
@@ -440,7 +438,7 @@ out:
 }
 device_initcall(octeon_rng_device_init);
 
-static struct of_device_id __initdata octeon_ids[] = {
+const struct of_device_id octeon_ids[] __initconst = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "cavium,octeon-6335-uctl", },
 	{ .compatible = "cavium,octeon-5750-usbn", },
@@ -480,6 +478,7 @@ static void __init octeon_fdt_set_phy(int eth, int phy_addr)
 	alt_phy_handle = fdt_getprop(initial_boot_params, eth, "cavium,alt-phy-handle", NULL);
 	if (alt_phy_handle) {
 		u32 alt_phandle = be32_to_cpup(alt_phy_handle);
+
 		alt_phy = fdt_node_offset_by_phandle(initial_boot_params, alt_phandle);
 	} else {
 		alt_phy = -1;
@@ -578,6 +577,7 @@ static void __init octeon_fdt_rm_ethernet(int node)
 	if (phy_handle) {
 		u32 ph = be32_to_cpup(phy_handle);
 		int p = fdt_node_offset_by_phandle(initial_boot_params, ph);
+
 		if (p >= 0)
 			fdt_nop_node(initial_boot_params, p);
 	}
@@ -727,6 +727,7 @@ int __init octeon_prune_device_tree(void)
 
 	for (i = 0; i < 2; i++) {
 		int mgmt;
+
 		snprintf(name_buffer, sizeof(name_buffer),
 			 "mix%d", i);
 		alias_prop = fdt_getprop(initial_boot_params, aliases,
@@ -742,6 +743,7 @@ int __init octeon_prune_device_tree(void)
 						 name_buffer);
 			} else {
 				int phy_addr = cvmx_helper_board_get_mii_address(CVMX_HELPER_BOARD_MGMT_IPD_PORT + i);
+
 				octeon_fdt_set_phy(mgmt, phy_addr);
 			}
 		}
@@ -750,6 +752,7 @@ int __init octeon_prune_device_tree(void)
 	pip_path = fdt_getprop(initial_boot_params, aliases, "pip", NULL);
 	if (pip_path) {
 		int pip = fdt_path_offset(initial_boot_params, pip_path);
+
 		if (pip	 >= 0)
 			for (i = 0; i <= 4; i++)
 				octeon_fdt_pip_iface(pip, i);
@@ -766,6 +769,7 @@ int __init octeon_prune_device_tree(void)
 
 	for (i = 0; i < 2; i++) {
 		int i2c;
+
 		snprintf(name_buffer, sizeof(name_buffer),
 			 "twsi%d", i);
 		alias_prop = fdt_getprop(initial_boot_params, aliases,
@@ -796,11 +800,11 @@ int __init octeon_prune_device_tree(void)
 
 	for (i = 0; i < 2; i++) {
 		int i2c;
+
 		snprintf(name_buffer, sizeof(name_buffer),
 			 "smi%d", i);
 		alias_prop = fdt_getprop(initial_boot_params, aliases,
 					name_buffer, NULL);
-
 		if (alias_prop) {
 			i2c = fdt_path_offset(initial_boot_params, alias_prop);
 			if (i2c < 0)
@@ -823,6 +827,7 @@ int __init octeon_prune_device_tree(void)
 
 	for (i = 0; i < 3; i++) {
 		int uart;
+
 		snprintf(name_buffer, sizeof(name_buffer),
 			 "uart%d", i);
 		alias_prop = fdt_getprop(initial_boot_params, aliases,
@@ -862,6 +867,7 @@ int __init octeon_prune_device_tree(void)
 		int len;
 
 		int cf = fdt_path_offset(initial_boot_params, alias_prop);
+
 		base_ptr = 0;
 		if (octeon_bootinfo->major_version == 1
 			&& octeon_bootinfo->minor_version >= 1) {
@@ -911,6 +917,7 @@ int __init octeon_prune_device_tree(void)
 			fdt_nop_property(initial_boot_params, cf, "cavium,dma-engine-handle");
 			if (!is_16bit) {
 				__be32 width = cpu_to_be32(8);
+
 				fdt_setprop_inplace(initial_boot_params, cf,
 						"cavium,bus-width", &width, sizeof(width));
 			}
@@ -1036,6 +1043,7 @@ end_led:
 		} else  {
 			__be32 new_f[1];
 			enum cvmx_helper_board_usb_clock_types c;
+
 			c = __cvmx_helper_board_usb_get_clock_type();
 			switch (c) {
 			case USB_CLOCK_TYPE_REF_48:
-- 
cgit v1.2.3


From 5373633cc9253ba82547473e899cab141c54133e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 16 Mar 2017 18:06:12 -0700
Subject: MIPS: Disable Werror when W= is set

Using any value for W= will lead to a ton of warnings which are turned
into fatal errors because MIPS adds -Werror to arch/mips/*.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: james.hogan@imgtec.com
Patchwork: https://patchwork.linux-mips.org/patch/15785/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kbuild | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild
index 5c3f688a5232..5cef58651db0 100644
--- a/arch/mips/Kbuild
+++ b/arch/mips/Kbuild
@@ -1,7 +1,9 @@
 # Fail on warnings - also for files referenced in subdirs
 # -Werror can be disabled for specific files using:
 # CFLAGS_<file.o> := -Wno-error
+ifeq ($(W),)
 subdir-ccflags-y := -Werror
+endif
 
 # platform specific definitions
 include arch/mips/Kbuild.platforms
-- 
cgit v1.2.3


From 296e46db00731b05c796e4f1db4bfe2e0efd80c1 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 21 Mar 2017 22:13:06 +0100
Subject: MIPS: Don't unnecessarily include kmalloc.h into <asm/cache.h>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cache.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h
index b4db69fbc40c..99d6632c5047 100644
--- a/arch/mips/include/asm/cache.h
+++ b/arch/mips/include/asm/cache.h
@@ -9,8 +9,6 @@
 #ifndef _ASM_CACHE_H
 #define _ASM_CACHE_H
 
-#include <kmalloc.h>
-
 #define L1_CACHE_SHIFT		CONFIG_MIPS_L1_CACHE_SHIFT
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
-- 
cgit v1.2.3


From 4437637c3277fa9aba4e443329b5db3b6d34eedf Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 21 Mar 2017 22:14:30 +0100
Subject: MIPS: Delete unused definition of SMP_CACHE_SHIFT.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cache.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h
index 99d6632c5047..d7e314b6be09 100644
--- a/arch/mips/include/asm/cache.h
+++ b/arch/mips/include/asm/cache.h
@@ -12,7 +12,6 @@
 #define L1_CACHE_SHIFT		CONFIG_MIPS_L1_CACHE_SHIFT
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
-#define SMP_CACHE_SHIFT		L1_CACHE_SHIFT
 #define SMP_CACHE_BYTES		L1_CACHE_BYTES
 
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
-- 
cgit v1.2.3


From 294d62743754f1fefefccccb09f8deec48f00969 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 21 Mar 2017 22:40:43 +0100
Subject: MIPS: Delete redundant definition of SMP_CACHE_BYTES.

<linux/cache.h> already defines SMP_CACHE_BYTES as L1_CACHE_BYTES.

This change results in a build error in <asm/cpu-info.h> which directly
includes <asm/cache.h>.  Fix this by including <linux/cache.h> instead.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cache.h    | 2 --
 arch/mips/include/asm/cpu-info.h | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h
index d7e314b6be09..fc67947ed658 100644
--- a/arch/mips/include/asm/cache.h
+++ b/arch/mips/include/asm/cache.h
@@ -12,8 +12,6 @@
 #define L1_CACHE_SHIFT		CONFIG_MIPS_L1_CACHE_SHIFT
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
-#define SMP_CACHE_BYTES		L1_CACHE_BYTES
-
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
 #endif /* _ASM_CACHE_H */
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index edbe2734a1bf..d43b3045109e 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -12,10 +12,9 @@
 #ifndef __ASM_CPU_INFO_H
 #define __ASM_CPU_INFO_H
 
+#include <linux/cache.h>
 #include <linux/types.h>
 
-#include <asm/cache.h>
-
 /*
  * Descriptor for a cache
  */
-- 
cgit v1.2.3


From 17c99d9421695a0e0de18bf1e7091d859e20ec1d Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 16 Mar 2017 21:00:28 +0800
Subject: MIPS: Loongson-3: Select MIPS_L1_CACHE_SHIFT_6

Some newer Loongson-3 have 64 bytes cache lines, so select
MIPS_L1_CACHE_SHIFT_6.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@caviumnetworks.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15755/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f4dd2c322d4b..2afb41c52ba0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1374,6 +1374,7 @@ config CPU_LOONGSON3
 	select WEAK_ORDERING
 	select WEAK_REORDERING_BEYOND_LLSC
 	select MIPS_PGD_C0_CONTEXT
+	select MIPS_L1_CACHE_SHIFT_6
 	select GPIOLIB
 	help
 		The Loongson 3 processor implements the MIPS64R2 instruction
-- 
cgit v1.2.3


From 5bba7aa4958e271c3ffceb70d47d3206524cf489 Mon Sep 17 00:00:00 2001
From: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
Date: Mon, 13 Mar 2017 16:36:35 +0100
Subject: MIPS: r2-on-r6-emu: Fix BLEZL and BGTZL identification

Fix the problem of inaccurate identification of instructions BLEZL and
BGTZL in R2 emulation code by making sure all necessary encoding
specifications are met.

Previously, certain R6 instructions could be identified as BLEZL or
BGTZL. R2 emulation routine didn't take into account that both BLEZL
and BGTZL instructions require their rt field (bits 20 to 16 of
instruction encoding) to be 0, and that, at same time, if the value in
that field is not 0, the encoding may represent a legitimate MIPS R6
instruction.

This means that a problem could occur after emulation optimization,
when emulation routine tried to pipeline emulation, picked up a next
candidate, and subsequently misrecognized an R6 instruction as BLEZL
or BGTZL.

It should be said that for single pass strategy, the problem does not
happen because CPU doesn't trap on branch-compacts which share opcode
space with BLEZL/BGTZL (but have rt field != 0, of course).

Signed-off-by: Leonid Yegoshin <leonid.yegoshin@imgtec.com>
Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtech.com>
Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtech.com>
Reported-by: Douglas Leung <douglas.leung@imgtec.com>
Reviewed-by: Paul Burton <paul.burton@imgtec.com>
Cc: james.hogan@imgtec.com
Cc: petar.jovanovic@imgtec.com
Cc: goran.ferenc@imgtec.com
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15456/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/mips-r2-to-r6-emul.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index d8f1cf1ec370..50558b30a809 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -1096,10 +1096,20 @@ repeat:
 		}
 		break;
 
-	case beql_op:
-	case bnel_op:
 	case blezl_op:
 	case bgtzl_op:
+		/*
+		 * For BLEZL and BGTZL, rt field must be set to 0. If this
+		 * is not the case, this may be an encoding of a MIPS R6
+		 * instruction, so return to CPU execution if this occurs
+		 */
+		if (MIPSInst_RT(inst)) {
+			err = SIGILL;
+			break;
+		}
+		/* fall through */
+	case beql_op:
+	case bnel_op:
 		if (delay_slot(regs)) {
 			err = SIGILL;
 			break;
-- 
cgit v1.2.3


From 411dac79cc2ed80f7e348ccc23eb4d8b0ba9f6d5 Mon Sep 17 00:00:00 2001
From: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Date: Mon, 13 Mar 2017 16:36:36 +0100
Subject: MIPS: r2-on-r6-emu: Clear BLTZALL and BGEZALL debugfs counters

Add missing clearing of BLTZALL and BGEZALL emulation counters in
function mipsr2_stats_clear_show().

Previously, it was not possible to reset BLTZALL and BGEZALL
emulation counters - their value remained the same even after
explicit request via debugfs. As far as other related counters
are concerned, they all seem to be properly cleared.

This change affects debugfs operation only, core R2 emulation
functionality is not affected.

Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Reviewed-by: Paul Burton <paul.burton@imgtec.com>
Cc: james.hogan@imgtec.com
Cc: leonid.yegoshin@imgtec.com
Cc: douglas.leung@imgtec.com
Cc: petar.jovanovic@imgtec.com
Cc: miodrag.dinic@imgtec.com
Cc: goran.ferenc@imgtec.com
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15517/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/mips-r2-to-r6-emul.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index 50558b30a809..e1416f1d2477 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -2339,6 +2339,8 @@ static int mipsr2_stats_clear_show(struct seq_file *s, void *unused)
 	__this_cpu_write((mipsr2bremustats).bgezl, 0);
 	__this_cpu_write((mipsr2bremustats).bltzll, 0);
 	__this_cpu_write((mipsr2bremustats).bgezll, 0);
+	__this_cpu_write((mipsr2bremustats).bltzall, 0);
+	__this_cpu_write((mipsr2bremustats).bgezall, 0);
 	__this_cpu_write((mipsr2bremustats).bltzal, 0);
 	__this_cpu_write((mipsr2bremustats).bgezal, 0);
 	__this_cpu_write((mipsr2bremustats).beql, 0);
-- 
cgit v1.2.3


From 8bcd84a4a37c88d8304ca3a64f0461a51487e239 Mon Sep 17 00:00:00 2001
From: Douglas Leung <douglas.leung@imgtec.com>
Date: Mon, 13 Mar 2017 16:36:37 +0100
Subject: MIPS: math-emu: Fix BC1EQZ and BC1NEZ condition handling

Correct the treatment of branching conditions for BC1EQZ and BC1NEZ
instructions in function isBranchInstr().

Previously, corresponding conditions were swapped, which in turn meant
that, for these two instructions, function isBranchInstr() returned
wrong value in its output parameter contpc.

This change is actually an extension of the fix done by the commit
93583e178ebf ("MIPS: math-emu: Fix BC1{EQ,NE}Z emulation"). That commit
dealt with a similar problem in function cop1Emulate(), while this
commit deals with condition handling in function isBranchInstr().
The code styles of changes in these two commits are kept as
consistent as possible.

Signed-off-by: Douglas Leung <douglas.leung@imgtec.com>
Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com>
Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Reviewed-by: Paul Burton <paul.burton@imgtec.com>
Cc: james.hogan@imgtec.com
Cc: leonid.yegoshin@imgtec.com
Cc: petar.jovanovic@imgtec.com
Cc: goran.ferenc@imgtec.com
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15489/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/math-emu/cp1emu.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index a298ac93edcc..f12fde10c8ad 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -439,6 +439,8 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 	union mips_instruction insn = (union mips_instruction)dec_insn.insn;
 	unsigned int fcr31;
 	unsigned int bit = 0;
+	unsigned int bit0;
+	union fpureg *fpr;
 
 	switch (insn.i_format.opcode) {
 	case spec_op:
@@ -706,14 +708,14 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 		    ((insn.i_format.rs == bc1eqz_op) ||
 		     (insn.i_format.rs == bc1nez_op))) {
 			bit = 0;
+			fpr = &current->thread.fpu.fpr[insn.i_format.rt];
+			bit0 = get_fpr32(fpr, 0) & 0x1;
 			switch (insn.i_format.rs) {
 			case bc1eqz_op:
-				if (get_fpr32(&current->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1)
-				    bit = 1;
+				bit = bit0 == 0;
 				break;
 			case bc1nez_op:
-				if (!(get_fpr32(&current->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1))
-				    bit = 1;
+				bit = bit0 != 0;
 				break;
 			}
 			if (bit)
-- 
cgit v1.2.3


From 835ed7bf12609ce23d42956e74206cc539a0ce5f Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.kw@hitachi.com>
Date: Thu, 23 Mar 2017 08:04:14 +0900
Subject: mtd: spi-nor: Add support for N25Q256A11

Add new Micron N25Q256A (N25Q256A11) 256Mbit NOR Flash in the list
of supported devices. This chip has the same structure as the N25Q256A
but ID and voltage (1V8) to use is different. Therefore, this adds
N25Q256A11 as n25q256ax1.

In the future, for new Micron memories we could use the patterns
"n25q*ax1" for 1V8 and "n25q*ax3" for 3V3 memories.

Signed-off-by: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.kw@hitachi.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index d3cb44b28490..19b0b01be2d3 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1037,6 +1037,7 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
+	{ "n25q256ax1",  INFO(0x20bb19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
 	{ "n25q512ax3",  INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
 	{ "n25q00",      INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
-- 
cgit v1.2.3


From c3f0d0bc5b01ad90c45276952802455750444b4f Mon Sep 17 00:00:00 2001
From: Mark Charlebois <charlebm@gmail.com>
Date: Fri, 31 Mar 2017 22:38:13 +0200
Subject: kbuild, LLVMLinux: Add -Werror to cc-option to support clang
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Clang will warn about unknown warnings but will not return false
unless -Werror is set. GCC will return false if an unknown
warning is passed.

Adding -Werror make both compiler behave the same.

[arnd: it turns out we need the same patch for testing whether -ffunction-sections
       works right with gcc. I've build tested extensively with this patch
       applied, so let's just merge this one now.]

Signed-off-by: Mark Charlebois <charlebm@gmail.com>
Signed-off-by: Behan Webster <behanw@converseincode.com>
Reviewed-by: Jan-Simon Möller <dl9pf@gmx.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Kbuild.include | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index d6ca649cb0e9..a70fd26204de 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -116,12 +116,12 @@ CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
 # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
 
 cc-option = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+	$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
 
 # cc-option-yn
 # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
 cc-option-yn = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
+	$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
 
 # cc-option-align
 # Prefix align with either -falign or -malign
@@ -131,7 +131,7 @@ cc-option-align = $(subst -functions=0,,\
 # cc-disable-warning
 # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
 cc-disable-warning = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+	$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
 # cc-name
 # Expands to either gcc or clang
-- 
cgit v1.2.3


From 6895baa6a64fa9499e4d11696d59b3522c527ed0 Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Tue, 11 Apr 2017 15:48:05 +0800
Subject: ACPICA: Add non-linux host build support

_LINUX: used to detect a target build is a linux kernel/application.
__linux__: used to detect a build is on a linux hosts.

Thus we can see: if a linux kernel build is performed on environments other
than linux hosts, __linux__ may not be defined by the compiler and _LINUX
cannot cover linux kernel resident ACPICA files, as it's only defined in
<linux/acpi.h> and hence only allows non ACPICA kernel files to correctly
include aclinux.h.
As a conclusion, we don't actually support such build.

This patch adds -D_LINUX for ACPICA files so that kernel builds on any
hosts can use unified _LINUX as a linux kernel target indication to
correctly include aclinux.h.

Tested-by: Al Stone <ahs3@redhat.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index 32d93edbc479..dea65306b687 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -2,7 +2,7 @@
 # Makefile for ACPICA Core interpreter
 #
 
-ccflags-y			:= -Os -DBUILDING_ACPICA
+ccflags-y			:= -Os -D_LINUX -DBUILDING_ACPICA
 ccflags-$(CONFIG_ACPI_DEBUG)	+= -DACPI_DEBUG_OUTPUT
 
 # use acpi.o to put all files here into acpi.o modparam namespace
-- 
cgit v1.2.3


From 1f83f5e42b5b17f844955b49e986c3f1f187dd54 Mon Sep 17 00:00:00 2001
From: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Date: Fri, 7 Apr 2017 13:40:28 +0200
Subject: MIPS: Use common outgoing-CPU-notification code

Replace the open-coded CPU-offline notification with common code.
In particular avoid calling scheduler code using RCU from an offline CPU
that RCU is ignoring.

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15921/
Patchwork: https://patchwork.linux-mips.org/patch/15953/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp-cps.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 6d45f05538c8..306b4a64cb8f 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -8,6 +8,7 @@
  * option) any later version.
  */
 
+#include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/irqchip/mips-gic.h>
@@ -408,7 +409,6 @@ static int cps_cpu_disable(void)
 	return 0;
 }
 
-static DECLARE_COMPLETION(cpu_death_chosen);
 static unsigned cpu_death_sibling;
 static enum {
 	CPU_DEATH_HALT,
@@ -444,7 +444,7 @@ void play_dead(void)
 	}
 
 	/* This CPU has chosen its way out */
-	complete(&cpu_death_chosen);
+	(void)cpu_report_death();
 
 	if (cpu_death == CPU_DEATH_HALT) {
 		vpe_id = cpu_vpe_id(&cpu_data[cpu]);
@@ -493,8 +493,7 @@ static void cps_cpu_die(unsigned int cpu)
 	int err;
 
 	/* Wait for the cpu to choose its way out */
-	if (!wait_for_completion_timeout(&cpu_death_chosen,
-					 msecs_to_jiffies(5000))) {
+	if (!cpu_wait_death(cpu, 5)) {
 		pr_err("CPU%u: didn't offline\n", cpu);
 		return;
 	}
-- 
cgit v1.2.3


From 828d1e4e98654e2284496e2fd7f9605ba04ef02a Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Thu, 6 Apr 2017 11:29:06 +0100
Subject: MIPS: Remove dead define of ST_OFF

Commit 1a3d59579b9f ("MIPS: Tidy up FPU context switching") removed the
last usage of the macro ST_OFF. Remove the dead code.

Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: trivial@kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15898/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/r4k_switch.S | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 758577861523..7b386d54fd65 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -25,12 +25,6 @@
 /* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */
 #undef fp
 
-/*
- * Offset to the current process status flags, the first 32 bytes of the
- * stack are not used.
- */
-#define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)
-
 #ifndef USE_ALTERNATE_RESUME_IMPL
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
-- 
cgit v1.2.3


From 33679a50370db9aa1a3f0cdf5f70c1c07236a4b2 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 30 Mar 2017 14:52:15 -0700
Subject: MIPS: uasm: Remove needless ISA abstraction

We always either target MIPS32/MIPS64 or microMIPS, and always include
one & only one of uasm-mips.c or uasm-micromips.c. Therefore the
abstraction of the ISA in asm/uasm.h declaring functions for either ISA
is redundant & needless. Remove it to simplify the code.

This is largely the result of the following:

  :%s/ISAOPC(\(.\{-}\))/uasm_i##\1/
  :%s/ISAFUNC(\(.\{-}\))/\1/

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Paul Burton <paul.burton@imgtec.com>
Patchwork: https://patchwork.linux-mips.org/patch/15844/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/uasm.h |  87 ++++++++----------------
 arch/mips/mm/uasm.c          | 156 +++++++++++++++++++++----------------------
 2 files changed, 106 insertions(+), 137 deletions(-)

diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index d91ed5b506ed..3748f4d120a5 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -21,77 +21,46 @@
 #define UASM_EXPORT_SYMBOL(sym)
 #endif
 
-#define _UASM_ISA_CLASSIC	0
-#define _UASM_ISA_MICROMIPS	1
-
-#ifndef UASM_ISA
-#ifdef CONFIG_CPU_MICROMIPS
-#define UASM_ISA	_UASM_ISA_MICROMIPS
-#else
-#define UASM_ISA	_UASM_ISA_CLASSIC
-#endif
-#endif
-
-#if (UASM_ISA == _UASM_ISA_CLASSIC)
-#ifdef CONFIG_CPU_MICROMIPS
-#define ISAOPC(op)	CL_uasm_i##op
-#define ISAFUNC(x)	CL_##x
-#else
-#define ISAOPC(op)	uasm_i##op
-#define ISAFUNC(x)	x
-#endif
-#elif (UASM_ISA == _UASM_ISA_MICROMIPS)
-#ifdef CONFIG_CPU_MICROMIPS
-#define ISAOPC(op)	uasm_i##op
-#define ISAFUNC(x)	x
-#else
-#define ISAOPC(op)	MM_uasm_i##op
-#define ISAFUNC(x)	MM_##x
-#endif
-#else
-#error Unsupported micro-assembler ISA!!!
-#endif
-
 #define Ip_u1u2u3(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
 
 #define Ip_u2u1u3(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
 
 #define Ip_u3u2u1(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
 
 #define Ip_u3u1u2(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, unsigned int c)
 
 #define Ip_u1u2s3(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, signed int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, signed int c)
 
 #define Ip_u2s3u1(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, signed int b, unsigned int c)
+void uasm_i##op(u32 **buf, unsigned int a, signed int b, unsigned int c)
 
 #define Ip_s3s1s2(op)							\
-void ISAOPC(op)(u32 **buf, int a, int b, int c)
+void uasm_i##op(u32 **buf, int a, int b, int c)
 
 #define Ip_u2u1s3(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, signed int c)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, signed int c)
 
 #define Ip_u2u1msbu3(op)						\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, unsigned int c, \
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b, unsigned int c, \
 	   unsigned int d)
 
 #define Ip_u1u2(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b)
 
 #define Ip_u2u1(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b)
+void uasm_i##op(u32 **buf, unsigned int a, unsigned int b)
 
 #define Ip_u1s2(op)							\
-void ISAOPC(op)(u32 **buf, unsigned int a, signed int b)
+void uasm_i##op(u32 **buf, unsigned int a, signed int b)
 
-#define Ip_u1(op) void ISAOPC(op)(u32 **buf, unsigned int a)
+#define Ip_u1(op) void uasm_i##op(u32 **buf, unsigned int a)
 
-#define Ip_0(op) void ISAOPC(op)(u32 **buf)
+#define Ip_0(op) void uasm_i##op(u32 **buf)
 
 Ip_u2u1s3(_addiu);
 Ip_u3u1u2(_addu);
@@ -191,20 +160,20 @@ struct uasm_label {
 	int lab;
 };
 
-void ISAFUNC(uasm_build_label)(struct uasm_label **lab, u32 *addr,
+void uasm_build_label(struct uasm_label **lab, u32 *addr,
 			int lid);
 #ifdef CONFIG_64BIT
-int ISAFUNC(uasm_in_compat_space_p)(long addr);
+int uasm_in_compat_space_p(long addr);
 #endif
-int ISAFUNC(uasm_rel_hi)(long val);
-int ISAFUNC(uasm_rel_lo)(long val);
-void ISAFUNC(UASM_i_LA_mostly)(u32 **buf, unsigned int rs, long addr);
-void ISAFUNC(UASM_i_LA)(u32 **buf, unsigned int rs, long addr);
+int uasm_rel_hi(long val);
+int uasm_rel_lo(long val);
+void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr);
+void UASM_i_LA(u32 **buf, unsigned int rs, long addr);
 
 #define UASM_L_LA(lb)							\
-static inline void ISAFUNC(uasm_l##lb)(struct uasm_label **lab, u32 *addr) \
+static inline void uasm_l##lb(struct uasm_label **lab, u32 *addr)	\
 {									\
-	ISAFUNC(uasm_build_label)(lab, addr, label##lb);		\
+	uasm_build_label(lab, addr, label##lb);				\
 }
 
 /* convenience macros for instructions */
@@ -256,27 +225,27 @@ static inline void uasm_i_drotr_safe(u32 **p, unsigned int a1,
 				     unsigned int a2, unsigned int a3)
 {
 	if (a3 < 32)
-		ISAOPC(_drotr)(p, a1, a2, a3);
+		uasm_i_drotr(p, a1, a2, a3);
 	else
-		ISAOPC(_drotr32)(p, a1, a2, a3 - 32);
+		uasm_i_drotr32(p, a1, a2, a3 - 32);
 }
 
 static inline void uasm_i_dsll_safe(u32 **p, unsigned int a1,
 				    unsigned int a2, unsigned int a3)
 {
 	if (a3 < 32)
-		ISAOPC(_dsll)(p, a1, a2, a3);
+		uasm_i_dsll(p, a1, a2, a3);
 	else
-		ISAOPC(_dsll32)(p, a1, a2, a3 - 32);
+		uasm_i_dsll32(p, a1, a2, a3 - 32);
 }
 
 static inline void uasm_i_dsrl_safe(u32 **p, unsigned int a1,
 				    unsigned int a2, unsigned int a3)
 {
 	if (a3 < 32)
-		ISAOPC(_dsrl)(p, a1, a2, a3);
+		uasm_i_dsrl(p, a1, a2, a3);
 	else
-		ISAOPC(_dsrl32)(p, a1, a2, a3 - 32);
+		uasm_i_dsrl32(p, a1, a2, a3 - 32);
 }
 
 /* Handle relocations. */
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 7f400c8d4645..730363b59bac 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -350,7 +350,7 @@ I_u2u1u3(_lddir)
 
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 #include <asm/octeon/octeon.h>
-void ISAFUNC(uasm_i_pref)(u32 **buf, unsigned int a, signed int b,
+void uasm_i_pref(u32 **buf, unsigned int a, signed int b,
 			    unsigned int c)
 {
 	if (CAVIUM_OCTEON_DCACHE_PREFETCH_WAR && a <= 24 && a != 5)
@@ -362,26 +362,26 @@ void ISAFUNC(uasm_i_pref)(u32 **buf, unsigned int a, signed int b,
 	else
 		build_insn(buf, insn_pref, c, a, b);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_i_pref));
+UASM_EXPORT_SYMBOL(uasm_i_pref);
 #else
 I_u2s3u1(_pref)
 #endif
 
 /* Handle labels. */
-void ISAFUNC(uasm_build_label)(struct uasm_label **lab, u32 *addr, int lid)
+void uasm_build_label(struct uasm_label **lab, u32 *addr, int lid)
 {
 	(*lab)->addr = addr;
 	(*lab)->lab = lid;
 	(*lab)++;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_build_label));
+UASM_EXPORT_SYMBOL(uasm_build_label);
 
-int ISAFUNC(uasm_in_compat_space_p)(long addr)
+int uasm_in_compat_space_p(long addr)
 {
 	/* Is this address in 32bit compat space? */
 	return addr == (int)addr;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_in_compat_space_p));
+UASM_EXPORT_SYMBOL(uasm_in_compat_space_p);
 
 static int uasm_rel_highest(long val)
 {
@@ -401,64 +401,64 @@ static int uasm_rel_higher(long val)
 #endif
 }
 
-int ISAFUNC(uasm_rel_hi)(long val)
+int uasm_rel_hi(long val)
 {
 	return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_rel_hi));
+UASM_EXPORT_SYMBOL(uasm_rel_hi);
 
-int ISAFUNC(uasm_rel_lo)(long val)
+int uasm_rel_lo(long val)
 {
 	return ((val & 0xffff) ^ 0x8000) - 0x8000;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_rel_lo));
+UASM_EXPORT_SYMBOL(uasm_rel_lo);
 
-void ISAFUNC(UASM_i_LA_mostly)(u32 **buf, unsigned int rs, long addr)
+void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr)
 {
-	if (!ISAFUNC(uasm_in_compat_space_p)(addr)) {
-		ISAFUNC(uasm_i_lui)(buf, rs, uasm_rel_highest(addr));
+	if (!uasm_in_compat_space_p(addr)) {
+		uasm_i_lui(buf, rs, uasm_rel_highest(addr));
 		if (uasm_rel_higher(addr))
-			ISAFUNC(uasm_i_daddiu)(buf, rs, rs, uasm_rel_higher(addr));
-		if (ISAFUNC(uasm_rel_hi(addr))) {
-			ISAFUNC(uasm_i_dsll)(buf, rs, rs, 16);
-			ISAFUNC(uasm_i_daddiu)(buf, rs, rs,
-					ISAFUNC(uasm_rel_hi)(addr));
-			ISAFUNC(uasm_i_dsll)(buf, rs, rs, 16);
+			uasm_i_daddiu(buf, rs, rs, uasm_rel_higher(addr));
+		if (uasm_rel_hi(addr)) {
+			uasm_i_dsll(buf, rs, rs, 16);
+			uasm_i_daddiu(buf, rs, rs,
+					uasm_rel_hi(addr));
+			uasm_i_dsll(buf, rs, rs, 16);
 		} else
-			ISAFUNC(uasm_i_dsll32)(buf, rs, rs, 0);
+			uasm_i_dsll32(buf, rs, rs, 0);
 	} else
-		ISAFUNC(uasm_i_lui)(buf, rs, ISAFUNC(uasm_rel_hi(addr)));
+		uasm_i_lui(buf, rs, uasm_rel_hi(addr));
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(UASM_i_LA_mostly));
+UASM_EXPORT_SYMBOL(UASM_i_LA_mostly);
 
-void ISAFUNC(UASM_i_LA)(u32 **buf, unsigned int rs, long addr)
+void UASM_i_LA(u32 **buf, unsigned int rs, long addr)
 {
-	ISAFUNC(UASM_i_LA_mostly)(buf, rs, addr);
-	if (ISAFUNC(uasm_rel_lo(addr))) {
-		if (!ISAFUNC(uasm_in_compat_space_p)(addr))
-			ISAFUNC(uasm_i_daddiu)(buf, rs, rs,
-					ISAFUNC(uasm_rel_lo(addr)));
+	UASM_i_LA_mostly(buf, rs, addr);
+	if (uasm_rel_lo(addr)) {
+		if (!uasm_in_compat_space_p(addr))
+			uasm_i_daddiu(buf, rs, rs,
+					uasm_rel_lo(addr));
 		else
-			ISAFUNC(uasm_i_addiu)(buf, rs, rs,
-					ISAFUNC(uasm_rel_lo(addr)));
+			uasm_i_addiu(buf, rs, rs,
+					uasm_rel_lo(addr));
 	}
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(UASM_i_LA));
+UASM_EXPORT_SYMBOL(UASM_i_LA);
 
 /* Handle relocations. */
-void ISAFUNC(uasm_r_mips_pc16)(struct uasm_reloc **rel, u32 *addr, int lid)
+void uasm_r_mips_pc16(struct uasm_reloc **rel, u32 *addr, int lid)
 {
 	(*rel)->addr = addr;
 	(*rel)->type = R_MIPS_PC16;
 	(*rel)->lab = lid;
 	(*rel)++;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_r_mips_pc16));
+UASM_EXPORT_SYMBOL(uasm_r_mips_pc16);
 
 static inline void __resolve_relocs(struct uasm_reloc *rel,
 				    struct uasm_label *lab);
 
-void ISAFUNC(uasm_resolve_relocs)(struct uasm_reloc *rel,
+void uasm_resolve_relocs(struct uasm_reloc *rel,
 				  struct uasm_label *lab)
 {
 	struct uasm_label *l;
@@ -468,39 +468,39 @@ void ISAFUNC(uasm_resolve_relocs)(struct uasm_reloc *rel,
 			if (rel->lab == l->lab)
 				__resolve_relocs(rel, l);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_resolve_relocs));
+UASM_EXPORT_SYMBOL(uasm_resolve_relocs);
 
-void ISAFUNC(uasm_move_relocs)(struct uasm_reloc *rel, u32 *first, u32 *end,
+void uasm_move_relocs(struct uasm_reloc *rel, u32 *first, u32 *end,
 			       long off)
 {
 	for (; rel->lab != UASM_LABEL_INVALID; rel++)
 		if (rel->addr >= first && rel->addr < end)
 			rel->addr += off;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_move_relocs));
+UASM_EXPORT_SYMBOL(uasm_move_relocs);
 
-void ISAFUNC(uasm_move_labels)(struct uasm_label *lab, u32 *first, u32 *end,
+void uasm_move_labels(struct uasm_label *lab, u32 *first, u32 *end,
 			       long off)
 {
 	for (; lab->lab != UASM_LABEL_INVALID; lab++)
 		if (lab->addr >= first && lab->addr < end)
 			lab->addr += off;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_move_labels));
+UASM_EXPORT_SYMBOL(uasm_move_labels);
 
-void ISAFUNC(uasm_copy_handler)(struct uasm_reloc *rel, struct uasm_label *lab,
+void uasm_copy_handler(struct uasm_reloc *rel, struct uasm_label *lab,
 				u32 *first, u32 *end, u32 *target)
 {
 	long off = (long)(target - first);
 
 	memcpy(target, first, (end - first) * sizeof(u32));
 
-	ISAFUNC(uasm_move_relocs(rel, first, end, off));
-	ISAFUNC(uasm_move_labels(lab, first, end, off));
+	uasm_move_relocs(rel, first, end, off);
+	uasm_move_labels(lab, first, end, off);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_copy_handler));
+UASM_EXPORT_SYMBOL(uasm_copy_handler);
 
-int ISAFUNC(uasm_insn_has_bdelay)(struct uasm_reloc *rel, u32 *addr)
+int uasm_insn_has_bdelay(struct uasm_reloc *rel, u32 *addr)
 {
 	for (; rel->lab != UASM_LABEL_INVALID; rel++) {
 		if (rel->addr == addr
@@ -511,92 +511,92 @@ int ISAFUNC(uasm_insn_has_bdelay)(struct uasm_reloc *rel, u32 *addr)
 
 	return 0;
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_insn_has_bdelay));
+UASM_EXPORT_SYMBOL(uasm_insn_has_bdelay);
 
 /* Convenience functions for labeled branches. */
-void ISAFUNC(uasm_il_bltz)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			   int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bltz)(p, reg, 0);
+	uasm_i_bltz(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bltz));
+UASM_EXPORT_SYMBOL(uasm_il_bltz);
 
-void ISAFUNC(uasm_il_b)(u32 **p, struct uasm_reloc **r, int lid)
+void uasm_il_b(u32 **p, struct uasm_reloc **r, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_b)(p, 0);
+	uasm_i_b(p, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_b));
+UASM_EXPORT_SYMBOL(uasm_il_b);
 
-void ISAFUNC(uasm_il_beq)(u32 **p, struct uasm_reloc **r, unsigned int r1,
+void uasm_il_beq(u32 **p, struct uasm_reloc **r, unsigned int r1,
 			  unsigned int r2, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_beq)(p, r1, r2, 0);
+	uasm_i_beq(p, r1, r2, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beq));
+UASM_EXPORT_SYMBOL(uasm_il_beq);
 
-void ISAFUNC(uasm_il_beqz)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			   int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_beqz)(p, reg, 0);
+	uasm_i_beqz(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beqz));
+UASM_EXPORT_SYMBOL(uasm_il_beqz);
 
-void ISAFUNC(uasm_il_beqzl)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			    int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_beqzl)(p, reg, 0);
+	uasm_i_beqzl(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beqzl));
+UASM_EXPORT_SYMBOL(uasm_il_beqzl);
 
-void ISAFUNC(uasm_il_bne)(u32 **p, struct uasm_reloc **r, unsigned int reg1,
+void uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1,
 			  unsigned int reg2, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bne)(p, reg1, reg2, 0);
+	uasm_i_bne(p, reg1, reg2, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bne));
+UASM_EXPORT_SYMBOL(uasm_il_bne);
 
-void ISAFUNC(uasm_il_bnez)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			   int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bnez)(p, reg, 0);
+	uasm_i_bnez(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bnez));
+UASM_EXPORT_SYMBOL(uasm_il_bnez);
 
-void ISAFUNC(uasm_il_bgezl)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			    int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bgezl)(p, reg, 0);
+	uasm_i_bgezl(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bgezl));
+UASM_EXPORT_SYMBOL(uasm_il_bgezl);
 
-void ISAFUNC(uasm_il_bgez)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			   int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bgez)(p, reg, 0);
+	uasm_i_bgez(p, reg, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bgez));
+UASM_EXPORT_SYMBOL(uasm_il_bgez);
 
-void ISAFUNC(uasm_il_bbit0)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bbit0(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			    unsigned int bit, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bbit0)(p, reg, bit, 0);
+	uasm_i_bbit0(p, reg, bit, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bbit0));
+UASM_EXPORT_SYMBOL(uasm_il_bbit0);
 
-void ISAFUNC(uasm_il_bbit1)(u32 **p, struct uasm_reloc **r, unsigned int reg,
+void uasm_il_bbit1(u32 **p, struct uasm_reloc **r, unsigned int reg,
 			    unsigned int bit, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
-	ISAFUNC(uasm_i_bbit1)(p, reg, bit, 0);
+	uasm_i_bbit1(p, reg, bit, 0);
 }
-UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bbit1));
+UASM_EXPORT_SYMBOL(uasm_il_bbit1);
-- 
cgit v1.2.3


From 759f534e93ace6a94a3c0718611fcf70dd5f7809 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 4 Apr 2017 17:49:57 +0200
Subject: CPUFREQ: Loongson2: drop set_cpus_allowed_ptr()

It is pure mystery to me why we need to be on a specific CPU while
looking up a value in an array.
My best shot at this is that before commit d4019f0a92ab ("cpufreq: move
freq change notifications to cpufreq core") it was required to invoke
cpufreq_notify_transition() on a special CPU.

Since it looks like a waste, remove it.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: tglx@linutronix.de
Cc: linux-pm@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15888/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/cpufreq/loongson2_cpufreq.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index 6bbdac1065ff..9ac27b22476c 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -51,19 +51,12 @@ static int loongson2_cpu_freq_notifier(struct notifier_block *nb,
 static int loongson2_cpufreq_target(struct cpufreq_policy *policy,
 				     unsigned int index)
 {
-	unsigned int cpu = policy->cpu;
-	cpumask_t cpus_allowed;
 	unsigned int freq;
 
-	cpus_allowed = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, cpumask_of(cpu));
-
 	freq =
 	    ((cpu_clock_freq / 1000) *
 	     loongson2_clockmod_table[index].driver_data) / 8;
 
-	set_cpus_allowed_ptr(current, &cpus_allowed);
-
 	/* setting the cpu frequency */
 	clk_set_rate(policy->clk, freq * 1000);
 
-- 
cgit v1.2.3


From f9c4e3a6dae1a3a15055ea478438cdc0352e1af2 Mon Sep 17 00:00:00 2001
From: James Cowgill <James.Cowgill@imgtec.com>
Date: Fri, 31 Mar 2017 17:09:58 +0100
Subject: MIPS: Opt into HAVE_COPY_THREAD_TLS

This the mips version of commit c1bd55f922a2d ("x86: opt into
HAVE_COPY_THREAD_TLS, for both 32-bit and 64-bit").

Simply use the tls system call argument instead of extracting the tls
argument by magic from the pt_regs structure.

See commit 3033f14ab78c3 ("clone: support passing tls argument via C
rather than pt_regs magic") for more background.

Signed-off-by: James Cowgill <James.Cowgill@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15855/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig          | 1 +
 arch/mips/kernel/process.c | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 2afb41c52ba0..52d81ca6f74e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -70,6 +70,7 @@ config MIPS
 	select HAVE_EXIT_THREAD
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_ARCH_HARDENED_USERCOPY
+	select HAVE_COPY_THREAD_TLS
 
 menu "Machine selection"
 
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index b68e10fc453d..918d4c73e951 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -114,8 +114,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 /*
  * Copy architecture-specific thread state
  */
-int copy_thread(unsigned long clone_flags, unsigned long usp,
-	unsigned long kthread_arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+	unsigned long kthread_arg, struct task_struct *p, unsigned long tls)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *childregs, *regs = current_pt_regs();
@@ -176,7 +176,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE);
 
 	if (clone_flags & CLONE_SETTLS)
-		ti->tp_value = regs->regs[7];
+		ti->tp_value = tls;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 1e548da5cef1f7148ce625f0bb0272ae2657cb1f Mon Sep 17 00:00:00 2001
From: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Date: Mon, 13 Mar 2017 14:33:37 +0100
Subject: MIPS: mach-rm: Remove recursive include of cpu-feature-overrides.h

cpu-feautre-overrides.h in mach-rm unnecessarily includes itself, so
drop the pointless include

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15462/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mach-rm/cpu-feature-overrides.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h b/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
index 98cf40417c5d..d38be668e338 100644
--- a/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
@@ -10,8 +10,6 @@
 #ifndef __ASM_MACH_RM200_CPU_FEATURE_OVERRIDES_H
 #define __ASM_MACH_RM200_CPU_FEATURE_OVERRIDES_H
 
-#include <cpu-feature-overrides.h>
-
 #define cpu_has_tlb		1
 #define cpu_has_4kex		1
 #define cpu_has_4k_cache	1
-- 
cgit v1.2.3


From 90241fb9b55a36edd9dafb8de679f66836e84369 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Fri, 7 Apr 2017 15:03:59 +0530
Subject: pwm: tegra: Use DIV_ROUND_CLOSEST_ULL() instead of local
 implementation

Use macro DIV_ROUND_CLOSEST_ULL() for 64-bit division to closest one
instead of implementing the same locally. This increase readability.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-tegra.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index e4647840cd6e..0a688dabd670 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -85,8 +85,7 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	 * nearest integer during division.
 	 */
 	c *= (1 << PWM_DUTY_WIDTH);
-	c += period_ns / 2;
-	do_div(c, period_ns);
+	c = DIV_ROUND_CLOSEST_ULL(c, period_ns);
 
 	val = (u32)c << PWM_DUTY_SHIFT;
 
-- 
cgit v1.2.3


From 250b76f43f57d578ebff5e7211eb2c73aa5cd6ca Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Fri, 7 Apr 2017 15:04:00 +0530
Subject: pwm: tegra: Increase precision in PWM rate calculation

The rate of the PWM calculated as follows:

	hz = NSEC_PER_SEC / period_ns;
 	rate = (rate + (hz / 2)) / hz;

This has the precision loss in lower PWM rate.

Change this to have more precision as:

	hz = DIV_ROUND_CLOSEST_ULL(NSEC_PER_SEC * 100, period_ns);
	rate = DIV_ROUND_CLOSEST(rate * 100, hz)

Example:

1. period_ns = 16672000, PWM clock rate is 200 KHz.

	Based on old formula
		hz = NSEC_PER_SEC / period_ns
		   = 1000000000ul/16672000
		   = 59 (59.98)
		rate = (200K + 59/2)/59 = 3390

	Based on new method:
		hz = 5998
		rate = DIV_ROUND_CLOSE(200000*100, 5998) = 3334

	If we measure the PWM signal rate, we will get more accurate
	period with rate value of 3334 instead of 3390.

2.  period_ns = 16803898, PWM clock rate is 200 KHz.

	Based on old formula:
		hz = 59, rate = 3390

	Based on new formula:
		hz = 5951, rate = 3360

	The PWM signal rate of 3360 is more near to requested period
	than 3333.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-tegra.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index 0a688dabd670..21518bea00c6 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -76,6 +76,7 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	struct tegra_pwm_chip *pc = to_tegra_pwm_chip(chip);
 	unsigned long long c = duty_ns;
 	unsigned long rate, hz;
+	unsigned long long ns100 = NSEC_PER_SEC;
 	u32 val = 0;
 	int err;
 
@@ -94,9 +95,11 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	 * cycles at the PWM clock rate will take period_ns nanoseconds.
 	 */
 	rate = clk_get_rate(pc->clk) >> PWM_DUTY_WIDTH;
-	hz = NSEC_PER_SEC / period_ns;
 
-	rate = (rate + (hz / 2)) / hz;
+	/* Consider precision in PWM_SCALE_WIDTH rate calculation */
+	ns100 *= 100;
+	hz = DIV_ROUND_CLOSEST_ULL(ns100, period_ns);
+	rate = DIV_ROUND_CLOSEST(rate * 100, hz);
 
 	/*
 	 * Since the actual PWM divider is the register's frequency divider
-- 
cgit v1.2.3


From 0dafdd168b0f441668e0d96a508af35a986374bf Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Fri, 7 Apr 2017 15:04:01 +0530
Subject: pwm: tegra: Add DT binding details to configure pin in
 suspends/resume

In some of NVIDIA Tegra's platform, PWM controller is used to
control the PWM controlled regulators. PWM signal is connected to
the VID pin of the regulator where duty cycle of PWM signal decide
the voltage level of the regulator output.

When system enters suspend, some PWM client/slave regulator devices
require the PWM output to be tristated.

Add DT binding details to provide the pin configuration state
from PWM and pinctrl DT node in suspend and active state of
the system.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 .../devicetree/bindings/pwm/nvidia,tegra20-pwm.txt | 45 ++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt
index b4e73778dda3..c57e11b8d937 100644
--- a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt
@@ -19,6 +19,19 @@ Required properties:
 - reset-names: Must include the following entries:
   - pwm
 
+Optional properties:
+============================
+In some of the interface like PWM based regulator device, it is required
+to configure the pins differently in different states, especially in suspend
+state of the system. The configuration of pin is provided via the pinctrl
+DT node as detailed in the pinctrl DT binding document
+	Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+The PWM node will have following optional properties.
+pinctrl-names:	Pin state names. Must be "default" and "sleep".
+pinctrl-0:	phandle for the default/active state of pin configurations.
+pinctrl-1:	phandle for the sleep state of pin configurations.
+
 Example:
 
 	pwm: pwm@7000a000 {
@@ -29,3 +42,35 @@ Example:
 		resets = <&tegra_car 17>;
 		reset-names = "pwm";
 	};
+
+
+Example with the pin configuration for suspend and resume:
+=========================================================
+Suppose pin PE7 (On Tegra210) interfaced with the regulator device and
+it requires PWM output to be tristated when system enters suspend.
+Following will be DT binding to achieve this:
+
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
+
+	pinmux@700008d4 {
+		pwm_active_state: pwm_active_state {
+                        pe7 {
+                                nvidia,pins = "pe7";
+                                nvidia,tristate = <TEGRA_PIN_DISABLE>;
+			};
+		};
+
+		pwm_sleep_state: pwm_sleep_state {
+                        pe7 {
+                                nvidia,pins = "pe7";
+                                nvidia,tristate = <TEGRA_PIN_ENABLE>;
+			};
+		};
+	};
+
+	pwm@7000a000 {
+		/* Mandatory PWM properties */
+		pinctrl-names = "default", "sleep";
+		pinctrl-0 = <&pwm_active_state>;
+		pinctrl-1 = <&pwm_sleep_state>;
+	};
-- 
cgit v1.2.3


From 4a813b262f85ac34c9f3e557d9be79f846a381df Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Fri, 7 Apr 2017 15:04:02 +0530
Subject: pwm: tegra: Add support to configure pin state in suspends/resume

In some of NVIDIA Tegra's platform, PWM controller is used to
control the PWM controlled regulators. PWM signal is connected to
the VID pin of the regulator where duty cycle of PWM signal decide
the voltage level of the regulator output.

When system enters suspend, some PWM client/slave regulator devices
require the PWM output to be tristated.

Add support to configure the pin state via pinctrl frameworks in
suspend and active state of the system.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-tegra.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index 21518bea00c6..9c7f180b9f3a 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -29,6 +29,7 @@
 #include <linux/of_device.h>
 #include <linux/pwm.h>
 #include <linux/platform_device.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/slab.h>
 #include <linux/reset.h>
 
@@ -255,6 +256,18 @@ static int tegra_pwm_remove(struct platform_device *pdev)
 	return pwmchip_remove(&pc->chip);
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int tegra_pwm_suspend(struct device *dev)
+{
+	return pinctrl_pm_select_sleep_state(dev);
+}
+
+static int tegra_pwm_resume(struct device *dev)
+{
+	return pinctrl_pm_select_default_state(dev);
+}
+#endif
+
 static const struct tegra_pwm_soc tegra20_pwm_soc = {
 	.num_channels = 4,
 };
@@ -271,10 +284,15 @@ static const struct of_device_id tegra_pwm_of_match[] = {
 
 MODULE_DEVICE_TABLE(of, tegra_pwm_of_match);
 
+static const struct dev_pm_ops tegra_pwm_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(tegra_pwm_suspend, tegra_pwm_resume)
+};
+
 static struct platform_driver tegra_pwm_driver = {
 	.driver = {
 		.name = "tegra-pwm",
 		.of_match_table = tegra_pwm_of_match,
+		.pm = &tegra_pwm_pm_ops,
 	},
 	.probe = tegra_pwm_probe,
 	.remove = tegra_pwm_remove,
-- 
cgit v1.2.3


From f1387d770527b11c5467ed6b6b3d9c3e5aa12dd4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 15 Jan 2017 15:18:22 -0800
Subject: doc: Synchronous RCU grace periods are now legal throughout boot

This commit updates the "Early Boot" section of the RCU requirements
to describe how synchronous RCU grace periods are now legal throughout
the boot process.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 .../RCU/Design/Requirements/Requirements.html      | 81 +++++++++++++---------
 1 file changed, 47 insertions(+), 34 deletions(-)

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index 21593496aca6..999b3ed3444e 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2154,7 +2154,8 @@ as will <tt>rcu_assign_pointer()</tt>.
 <p>
 Although <tt>call_rcu()</tt> may be invoked at any
 time during boot, callbacks are not guaranteed to be invoked until after
-the scheduler is fully up and running.
+all of RCU's kthreads have been spawned, which occurs at
+<tt>early_initcall()</tt> time.
 This delay in callback invocation is due to the fact that RCU does not
 invoke callbacks until it is fully initialized, and this full initialization
 cannot occur until after the scheduler has initialized itself to the
@@ -2167,8 +2168,10 @@ on what operations those callbacks could invoke.
 Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
 <a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
 (<a href="#Bottom-Half Flavor">discussed below</a>),
-and
-<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>
+<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>,
+<tt>synchronize_rcu_expedited()</tt>,
+<tt>synchronize_rcu_bh_expedited()</tt>, and
+<tt>synchronize_sched_expedited()</tt>
 will all operate normally
 during very early boot, the reason being that there is only one CPU
 and preemption is disabled.
@@ -2178,45 +2181,55 @@ state and thus a grace period, so the early-boot implementation can
 be a no-op.
 
 <p>
-Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt>
-continue to operate normally through the remainder of boot, courtesy
-of the fact that preemption is disabled across their RCU read-side
-critical sections and also courtesy of the fact that there is still
-only one CPU.
-However, once the scheduler starts initializing, preemption is enabled.
-There is still only a single CPU, but the fact that preemption is enabled
-means that the no-op implementation of <tt>synchronize_rcu()</tt> no
-longer works in <tt>CONFIG_PREEMPT=y</tt> kernels.
-Therefore, as soon as the scheduler starts initializing, the early-boot
-fastpath is disabled.
-This means that <tt>synchronize_rcu()</tt> switches to its runtime
-mode of operation where it posts callbacks, which in turn means that
-any call to <tt>synchronize_rcu()</tt> will block until the corresponding
-callback is invoked.
-Unfortunately, the callback cannot be invoked until RCU's runtime
-grace-period machinery is up and running, which cannot happen until
-the scheduler has initialized itself sufficiently to allow RCU's
-kthreads to be spawned.
-Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
-initialization can result in deadlock.
+However, once the scheduler has spawned its first kthread, this early
+boot trick fails for <tt>synchronize_rcu()</tt> (as well as for
+<tt>synchronize_rcu_expedited()</tt>) in <tt>CONFIG_PREEMPT=y</tt>
+kernels.
+The reason is that an RCU read-side critical section might be preempted,
+which means that a subsequent <tt>synchronize_rcu()</tt> really does have
+to wait for something, as opposed to simply returning immediately.
+Unfortunately, <tt>synchronize_rcu()</tt> can't do this until all of
+its kthreads are spawned, which doesn't happen until some time during
+<tt>early_initcalls()</tt> time.
+But this is no excuse:  RCU is nevertheless required to correctly handle
+synchronous grace periods during this time period, which it currently does.
+Once all of its kthreads are up and running, RCU starts running
+normally.
 
 <table>
 <tr><th>&nbsp;</th></tr>
 <tr><th align="left">Quick Quiz:</th></tr>
 <tr><td>
-	So what happens with <tt>synchronize_rcu()</tt> during
-	scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-	kernels?
+	How can RCU possibly handle grace periods before all of its
+	kthreads have been spawned???
 </td></tr>
 <tr><th align="left">Answer:</th></tr>
 <tr><td bgcolor="#ffffff"><font color="ffffff">
-	In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
-	maps directly to <tt>synchronize_sched()</tt>.
-	Therefore, <tt>synchronize_rcu()</tt> works normally throughout
-	boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
-	However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
-	so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
-	during scheduler initialization.
+	Very carefully!
+
+	<p>During the &ldquo;dead zone&rdquo; between the time that the
+	scheduler spawns the first task and the time that all of RCU's
+	kthreads have been spawned, all synchronous grace periods are
+	handled by the expedited grace-period mechanism.
+	At runtime, this expedited mechanism relies on workqueues, but
+	during the dead zone the requesting task itself drives the
+	desired expedited grace period.
+	Because dead-zone execution takes place within task context,
+	everything works.
+	Once the dead zone ends, expedited grace periods go back to
+	using workqueues, as is required to avoid problems that would
+	otherwise occur when a user task received a POSIX signal while
+	driving an expedited grace period.
+
+	<p>And yes, this does mean that it is unhelpful to send POSIX
+	signals to random tasks between the time that the scheduler
+	spawns its first kthread and the time that RCU's kthreads
+	have all been spawned.
+	If there ever turns out to be a good reason for sending POSIX
+	signals during that time, appropriate adjustments will be made.
+	(If it turns out that POSIX signals are sent during this time for
+	no good reason, other adjustments will be made, appropriate
+	or otherwise.)
 </font></td></tr>
 <tr><td>&nbsp;</td></tr>
 </table>
-- 
cgit v1.2.3


From b4553f0cfea5ab5a02967e482bcafe7db6407afd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 15 Jan 2017 16:12:09 -0800
Subject: doc: Add mid-boot operation to expedited grace periods

This commit adds a description of how expedited grace periods operate
during the mid-boot "dead zone", which starts when the scheduler spawns
the first kthread and ends when all of RCU's kthreads have been spawned.
In short, before mid-boot, synchronous grace periods can be a no-op.
After the end of mid-boot, workqueues may be used.  During mid-boot,
the requesting task drivees the expedited grace period.

For more detail, see https://lwn.net/Articles/716148/.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 .../Expedited-Grace-Periods.html                   | 47 ++++++++++++++++++++--
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
index 7a3194c5559a..e5d0bbd0230b 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
@@ -284,6 +284,7 @@ Expedited Grace Period Refinements</a></h2>
 	Funnel locking and wait/wakeup</a>.
 <li>	<a href="#Use of Workqueues">Use of Workqueues</a>.
 <li>	<a href="#Stall Warnings">Stall warnings</a>.
+<li>	<a href="#Mid-Boot Operation">Mid-boot operation</a>.
 </ol>
 
 <h3><a name="Idle-CPU Checks">Idle-CPU Checks</a></h3>
@@ -524,7 +525,7 @@ their grace periods and carrying out their wakeups.
 In earlier implementations, the task requesting the expedited
 grace period also drove it to completion.
 This straightforward approach had the disadvantage of needing to
-account for signals sent to user tasks,
+account for POSIX signals sent to user tasks,
 so more recent implemementations use the Linux kernel's
 <a href="https://www.kernel.org/doc/Documentation/workqueue.txt">workqueues</a>.
 
@@ -533,8 +534,8 @@ The requesting task still does counter snapshotting and funnel-lock
 processing, but the task reaching the top of the funnel lock
 does a <tt>schedule_work()</tt> (from <tt>_synchronize_rcu_expedited()</tt>
 so that a workqueue kthread does the actual grace-period processing.
-Because workqueue kthreads do not accept signals, grace-period-wait
-processing need not allow for signals.
+Because workqueue kthreads do not accept POSIX signals, grace-period-wait
+processing need not allow for POSIX signals.
 
 In addition, this approach allows wakeups for the previous expedited
 grace period to be overlapped with processing for the next expedited
@@ -586,6 +587,46 @@ blocking the current grace period are printed.
 Each stall warning results in another pass through the loop, but the
 second and subsequent passes use longer stall times.
 
+<h3><a name="Mid-Boot Operation">Mid-boot operation</a></h3>
+
+<p>
+The use of workqueues has the advantage that the expedited
+grace-period code need not worry about POSIX signals.
+Unfortunately, it has the
+corresponding disadvantage that workqueues cannot be used until
+they are initialized, which does not happen until some time after
+the scheduler spawns the first task.
+Given that there are parts of the kernel that really do want to
+execute grace periods during this mid-boot &ldquo;dead zone&rdquo;,
+expedited grace periods must do something else during thie time.
+
+<p>
+What they do is to fall back to the old practice of requiring that the
+requesting task drive the expedited grace period, as was the case
+before the use of workqueues.
+However, the requesting task is only required to drive the grace period
+during the mid-boot dead zone.
+Before mid-boot, a synchronous grace period is a no-op.
+Some time after mid-boot, workqueues are used.
+
+<p>
+Non-expedited non-SRCU synchronous grace periods must also operate
+normally during mid-boot.
+This is handled by causing non-expedited grace periods to take the
+expedited code path during mid-boot.
+
+<p>
+The current code assumes that there are no POSIX signals during
+the mid-boot dead zone.
+However, if an overwhelming need for POSIX signals somehow arises,
+appropriate adjustments can be made to the expedited stall-warning code.
+One such adjustment would reinstate the pre-workqueue stall-warning
+checks, but only during the mid-boot dead zone.
+
+<p>
+With this refinement, synchronous grace periods can now be used from
+task context pretty much any time during the life of the kernel.
+
 <h3><a name="Summary">
 Summary</a></h3>
 
-- 
cgit v1.2.3


From 8e2a439753b1b708c6aa58249ab3cab8015597b1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 8 Feb 2017 14:30:15 -0800
Subject: doc: Update stallwarn.txt to make causes more prominent

This commit rearranges the Documentation/RCU/stallwarn.txt file to
put the list of issues that can cause RCU CPU stall warnings near
the beginning of the document.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/RCU/stallwarn.txt | 190 +++++++++++++++++++++-------------------
 1 file changed, 100 insertions(+), 90 deletions(-)

diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index e93d04133fe7..96a3d81837e1 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -1,9 +1,102 @@
 Using RCU's CPU Stall Detector
 
-The rcu_cpu_stall_suppress module parameter enables RCU's CPU stall
-detector, which detects conditions that unduly delay RCU grace periods.
-This module parameter enables CPU stall detection by default, but
-may be overridden via boot-time parameter or at runtime via sysfs.
+This document first discusses what sorts of issues RCU's CPU stall
+detector can locate, and then discusses kernel parameters and Kconfig
+options that can be used to fine-tune the detector's operation.  Finally,
+this document explains the stall detector's "splat" format.
+
+
+What Causes RCU CPU Stall Warnings?
+
+So your kernel printed an RCU CPU stall warning.  The next question is
+"What caused it?"  The following problems can result in RCU CPU stall
+warnings:
+
+o	A CPU looping in an RCU read-side critical section.
+
+o	A CPU looping with interrupts disabled.
+
+o	A CPU looping with preemption disabled.  This condition can
+	result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh
+	stalls.
+
+o	A CPU looping with bottom halves disabled.  This condition can
+	result in RCU-sched and RCU-bh stalls.
+
+o	For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the
+	kernel without invoking schedule().  Note that cond_resched()
+	does not necessarily prevent RCU CPU stall warnings.  Therefore,
+	if the looping in the kernel is really expected and desirable
+	behavior, you might need to replace some of the cond_resched()
+	calls with calls to cond_resched_rcu_qs().
+
+o	Booting Linux using a console connection that is too slow to
+	keep up with the boot-time console-message rate.  For example,
+	a 115Kbaud serial console can be -way- too slow to keep up
+	with boot-time message rates, and will frequently result in
+	RCU CPU stall warning messages.  Especially if you have added
+	debug printk()s.
+
+o	Anything that prevents RCU's grace-period kthreads from running.
+	This can result in the "All QSes seen" console-log message.
+	This message will include information on when the kthread last
+	ran and how often it should be expected to run.
+
+o	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
+	happen to preempt a low-priority task in the middle of an RCU
+	read-side critical section.   This is especially damaging if
+	that low-priority task is not permitted to run on any other CPU,
+	in which case the next RCU grace period can never complete, which
+	will eventually cause the system to run out of memory and hang.
+	While the system is in the process of running itself out of
+	memory, you might see stall-warning messages.
+
+o	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
+	is running at a higher priority than the RCU softirq threads.
+	This will prevent RCU callbacks from ever being invoked,
+	and in a CONFIG_PREEMPT_RCU kernel will further prevent
+	RCU grace periods from ever completing.  Either way, the
+	system will eventually run out of memory and hang.  In the
+	CONFIG_PREEMPT_RCU case, you might see stall-warning
+	messages.
+
+o	A hardware or software issue shuts off the scheduler-clock
+	interrupt on a CPU that is not in dyntick-idle mode.  This
+	problem really has happened, and seems to be most likely to
+	result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.
+
+o	A bug in the RCU implementation.
+
+o	A hardware failure.  This is quite unlikely, but has occurred
+	at least once in real life.  A CPU failed in a running system,
+	becoming unresponsive, but not causing an immediate crash.
+	This resulted in a series of RCU CPU stall warnings, eventually
+	leading the realization that the CPU had failed.
+
+The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall
+warning.  Note that SRCU does -not- have CPU stall warnings.  Please note
+that RCU only detects CPU stalls when there is a grace period in progress.
+No grace period, no CPU stall warnings.
+
+To diagnose the cause of the stall, inspect the stack traces.
+The offending function will usually be near the top of the stack.
+If you have a series of stall warnings from a single extended stall,
+comparing the stack traces can often help determine where the stall
+is occurring, which will usually be in the function nearest the top of
+that portion of the stack which remains the same from trace to trace.
+If you can reliably trigger the stall, ftrace can be quite helpful.
+
+RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
+and with RCU's event tracing.  For information on RCU's event tracing,
+see include/trace/events/rcu.h.
+
+
+Fine-Tuning the RCU CPU Stall Detector
+
+The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
+CPU stall detector, which detects conditions that unduly delay RCU grace
+periods.  This module parameter enables CPU stall detection by default,
+but may be overridden via boot-time parameter or at runtime via sysfs.
 The stall detector's idea of what constitutes "unduly delayed" is
 controlled by a set of kernel configuration variables and cpp macros:
 
@@ -56,6 +149,9 @@ rcupdate.rcu_task_stall_timeout
 	And continues with the output of sched_show_task() for each
 	task stalling the current RCU-tasks grace period.
 
+
+Interpreting RCU's CPU Stall-Detector "Splats"
+
 For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling,
 it will print a message similar to the following:
 
@@ -178,89 +274,3 @@ grace period is in flight.
 
 It is entirely possible to see stall warnings from normal and from
 expedited grace periods at about the same time from the same run.
-
-
-What Causes RCU CPU Stall Warnings?
-
-So your kernel printed an RCU CPU stall warning.  The next question is
-"What caused it?"  The following problems can result in RCU CPU stall
-warnings:
-
-o	A CPU looping in an RCU read-side critical section.
-	
-o	A CPU looping with interrupts disabled.  This condition can
-	result in RCU-sched and RCU-bh stalls.
-
-o	A CPU looping with preemption disabled.  This condition can
-	result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh
-	stalls.
-
-o	A CPU looping with bottom halves disabled.  This condition can
-	result in RCU-sched and RCU-bh stalls.
-
-o	For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the
-	kernel without invoking schedule().  Note that cond_resched()
-	does not necessarily prevent RCU CPU stall warnings.  Therefore,
-	if the looping in the kernel is really expected and desirable
-	behavior, you might need to replace some of the cond_resched()
-	calls with calls to cond_resched_rcu_qs().
-
-o	Booting Linux using a console connection that is too slow to
-	keep up with the boot-time console-message rate.  For example,
-	a 115Kbaud serial console can be -way- too slow to keep up
-	with boot-time message rates, and will frequently result in
-	RCU CPU stall warning messages.  Especially if you have added
-	debug printk()s.
-
-o	Anything that prevents RCU's grace-period kthreads from running.
-	This can result in the "All QSes seen" console-log message.
-	This message will include information on when the kthread last
-	ran and how often it should be expected to run.
-
-o	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
-	happen to preempt a low-priority task in the middle of an RCU
-	read-side critical section.   This is especially damaging if
-	that low-priority task is not permitted to run on any other CPU,
-	in which case the next RCU grace period can never complete, which
-	will eventually cause the system to run out of memory and hang.
-	While the system is in the process of running itself out of
-	memory, you might see stall-warning messages.
-
-o	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
-	is running at a higher priority than the RCU softirq threads.
-	This will prevent RCU callbacks from ever being invoked,
-	and in a CONFIG_PREEMPT_RCU kernel will further prevent
-	RCU grace periods from ever completing.  Either way, the
-	system will eventually run out of memory and hang.  In the
-	CONFIG_PREEMPT_RCU case, you might see stall-warning
-	messages.
-
-o	A hardware or software issue shuts off the scheduler-clock
-	interrupt on a CPU that is not in dyntick-idle mode.  This
-	problem really has happened, and seems to be most likely to
-	result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.
-
-o	A bug in the RCU implementation.
-
-o	A hardware failure.  This is quite unlikely, but has occurred
-	at least once in real life.  A CPU failed in a running system,
-	becoming unresponsive, but not causing an immediate crash.
-	This resulted in a series of RCU CPU stall warnings, eventually
-	leading the realization that the CPU had failed.
-
-The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall
-warning.  Note that SRCU does -not- have CPU stall warnings.  Please note
-that RCU only detects CPU stalls when there is a grace period in progress.
-No grace period, no CPU stall warnings.
-
-To diagnose the cause of the stall, inspect the stack traces.
-The offending function will usually be near the top of the stack.
-If you have a series of stall warnings from a single extended stall,
-comparing the stack traces can often help determine where the stall
-is occurring, which will usually be in the function nearest the top of
-that portion of the stack which remains the same from trace to trace.
-If you can reliably trigger the stall, ftrace can be quite helpful.
-
-RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
-and with RCU's event tracing.  For information on RCU's event tracing,
-see include/trace/events/rcu.h.
-- 
cgit v1.2.3


From aa123a748ea552b18f0d4add823c29ddbddaf7b4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 11 Apr 2017 09:17:08 -0700
Subject: doc: Update RCU data-structure documentation for rcu_segcblist

The rcu_segcblist data structure, which contains segmented lists
of RCU callbacks, was recently added.  This commit updates the
documentation accordingly.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 .../Design/Data-Structures/Data-Structures.html    | 207 ++++++++++++++-------
 .../RCU/Design/Data-Structures/nxtlist.svg         |  34 ++--
 2 files changed, 156 insertions(+), 85 deletions(-)

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index d583c653a703..2ab38ee420c5 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -19,6 +19,8 @@ to each other.
 	The <tt>rcu_state</tt> Structure</a>
 <li>	<a href="#The rcu_node Structure">
 	The <tt>rcu_node</tt> Structure</a>
+<li>	<a href="#The rcu_segcblist Structure">
+	The <tt>rcu_segcblist</tt> Structure</a>
 <li>	<a href="#The rcu_data Structure">
 	The <tt>rcu_data</tt> Structure</a>
 <li>	<a href="#The rcu_dynticks Structure">
@@ -841,6 +843,134 @@ for lockdep lock-class names.
 Finally, lines&nbsp;64-66 produce an error if the maximum number of
 CPUs is too large for the specified fanout.
 
+<h3><a name="The rcu_segcblist Structure">
+The <tt>rcu_segcblist</tt> Structure</a></h3>
+
+The <tt>rcu_segcblist</tt> structure maintains a segmented list of
+callbacks as follows:
+
+<pre>
+ 1 #define RCU_DONE_TAIL        0
+ 2 #define RCU_WAIT_TAIL        1
+ 3 #define RCU_NEXT_READY_TAIL  2
+ 4 #define RCU_NEXT_TAIL        3
+ 5 #define RCU_CBLIST_NSEGS     4
+ 6
+ 7 struct rcu_segcblist {
+ 8   struct rcu_head *head;
+ 9   struct rcu_head **tails[RCU_CBLIST_NSEGS];
+10   unsigned long gp_seq[RCU_CBLIST_NSEGS];
+11   long len;
+12   long len_lazy;
+13 };
+</pre>
+
+<p>
+The segments are as follows:
+
+<ol>
+<li>	<tt>RCU_DONE_TAIL</tt>: Callbacks whose grace periods have elapsed.
+	These callbacks are ready to be invoked.
+<li>	<tt>RCU_WAIT_TAIL</tt>: Callbacks that are waiting for the
+	current grace period.
+	Note that different CPUs can have different ideas about which
+	grace period is current, hence the <tt>-&gt;gp_seq</tt> field.
+<li>	<tt>RCU_NEXT_READY_TAIL</tt>: Callbacks waiting for the next
+	grace period to start.
+<li>	<tt>RCU_NEXT_TAIL</tt>: Callbacks that have not yet been
+	associated with a grace period.
+</ol>
+
+<p>
+The <tt>-&gt;head</tt> pointer references the first callback or
+is <tt>NULL</tt> if the list contains no callbacks (which is
+<i>not</i> the same as being empty).
+Each element of the <tt>-&gt;tails[]</tt> array references the
+<tt>-&gt;next</tt> pointer of the last callback in the corresponding
+segment of the list, or the list's <tt>-&gt;head</tt> pointer if
+that segment and all previous segments are empty.
+If the corresponding segment is empty but some previous segment is
+not empty, then the array element is identical to its predecessor.
+Older callbacks are closer to the head of the list, and new callbacks
+are added at the tail.
+This relationship between the <tt>-&gt;head</tt> pointer, the
+<tt>-&gt;tails[]</tt> array, and the callbacks is shown in this
+diagram:
+
+</p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%">
+
+</p><p>In this figure, the <tt>-&gt;head</tt> pointer references the
+first
+RCU callback in the list.
+The <tt>-&gt;tails[RCU_DONE_TAIL]</tt> array element references
+the <tt>-&gt;head</tt> pointer itself, indicating that none
+of the callbacks is ready to invoke.
+The <tt>-&gt;tails[RCU_WAIT_TAIL]</tt> array element references callback
+CB&nbsp;2's <tt>-&gt;next</tt> pointer, which indicates that
+CB&nbsp;1 and CB&nbsp;2 are both waiting on the current grace period,
+give or take possible disagreements about exactly which grace period
+is the current one.
+The <tt>-&gt;tails[RCU_NEXT_READY_TAIL]</tt> array element
+references the same RCU callback that <tt>-&gt;tails[RCU_WAIT_TAIL]</tt>
+does, which indicates that there are no callbacks waiting on the next
+RCU grace period.
+The <tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element references
+CB&nbsp;4's <tt>-&gt;next</tt> pointer, indicating that all the
+remaining RCU callbacks have not yet been assigned to an RCU grace
+period.
+Note that the <tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element
+always references the last RCU callback's <tt>-&gt;next</tt> pointer
+unless the callback list is empty, in which case it references
+the <tt>-&gt;head</tt> pointer.
+
+<p>
+There is one additional important special case for the
+<tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element: It can be <tt>NULL</tt>
+when this list is <i>disabled</i>.
+Lists are disabled when the corresponding CPU is offline or when
+the corresponding CPU's callbacks are offloaded to a kthread,
+both of which are described elsewhere.
+
+</p><p>CPUs advance their callbacks from the
+<tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the
+<tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments
+as grace periods advance.
+
+</p><p>The <tt>-&gt;gp_seq[]</tt> array records grace-period
+numbers corresponding to the list segments.
+This is what allows different CPUs to have different ideas as to
+which is the current grace period while still avoiding premature
+invocation of their callbacks.
+In particular, this allows CPUs that go idle for extended periods
+to determine which of their callbacks are ready to be invoked after
+reawakening.
+
+</p><p>The <tt>-&gt;len</tt> counter contains the number of
+callbacks in <tt>-&gt;head</tt>, and the
+<tt>-&gt;len_lazy</tt> contains the number of those callbacks that
+are known to only free memory, and whose invocation can therefore
+be safely deferred.
+
+<p><b>Important note</b>: It is the <tt>-&gt;len</tt> field that
+determines whether or not there are callbacks associated with
+this <tt>rcu_segcblist</tt> structure, <i>not</i> the <tt>-&gt;head</tt>
+pointer.
+The reason for this is that all the ready-to-invoke callbacks
+(that is, those in the <tt>RCU_DONE_TAIL</tt> segment) are extracted
+all at once at callback-invocation time.
+If callback invocation must be postponed, for example, because a
+high-priority process just woke up on this CPU, then the remaining
+callbacks are placed back on the <tt>RCU_DONE_TAIL</tt> segment.
+Either way, the <tt>-&gt;len</tt> and <tt>-&gt;len_lazy</tt> counts
+are adjusted after the corresponding callbacks have been invoked, and so
+again it is the <tt>-&gt;len</tt> count that accurately reflects whether
+or not there are callbacks associated with this <tt>rcu_segcblist</tt>
+structure.
+Of course, off-CPU sampling of the <tt>-&gt;len</tt> count requires
+the use of appropriate synchronization, for example, memory barriers.
+This synchronization can be a bit subtle, particularly in the case
+of <tt>rcu_barrier()</tt>.
+
 <h3><a name="The rcu_data Structure">
 The <tt>rcu_data</tt> Structure</a></h3>
 
@@ -983,62 +1113,18 @@ choice.
 as follows:
 
 <pre>
- 1 struct rcu_head *nxtlist;
- 2 struct rcu_head **nxttail[RCU_NEXT_SIZE];
- 3 unsigned long nxtcompleted[RCU_NEXT_SIZE];
- 4 long qlen_lazy;
- 5 long qlen;
- 6 long qlen_last_fqs_check;
+ 1 struct rcu_segcblist cblist;
+ 2 long qlen_last_fqs_check;
+ 3 unsigned long n_cbs_invoked;
+ 4 unsigned long n_nocbs_invoked;
+ 5 unsigned long n_cbs_orphaned;
+ 6 unsigned long n_cbs_adopted;
  7 unsigned long n_force_qs_snap;
- 8 unsigned long n_cbs_invoked;
- 9 unsigned long n_cbs_orphaned;
-10 unsigned long n_cbs_adopted;
-11 long blimit;
+ 8 long blimit;
 </pre>
 
-<p>The <tt>-&gt;nxtlist</tt> pointer and the
-<tt>-&gt;nxttail[]</tt> array form a four-segment list with
-older callbacks near the head and newer ones near the tail.
-Each segment contains callbacks with the corresponding relationship
-to the current grace period.
-The pointer out of the end of each of the four segments is referenced
-by the element of the <tt>-&gt;nxttail[]</tt> array indexed by
-<tt>RCU_DONE_TAIL</tt> (for callbacks handled by a prior grace period),
-<tt>RCU_WAIT_TAIL</tt> (for callbacks waiting on the current grace period),
-<tt>RCU_NEXT_READY_TAIL</tt> (for callbacks that will wait on the next
-grace period), and
-<tt>RCU_NEXT_TAIL</tt> (for callbacks that are not yet associated
-with a specific grace period)
-respectively, as shown in the following figure.
-
-</p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%">
-
-</p><p>In this figure, the <tt>-&gt;nxtlist</tt> pointer references the
-first
-RCU callback in the list.
-The <tt>-&gt;nxttail[RCU_DONE_TAIL]</tt> array element references
-the <tt>-&gt;nxtlist</tt> pointer itself, indicating that none
-of the callbacks is ready to invoke.
-The <tt>-&gt;nxttail[RCU_WAIT_TAIL]</tt> array element references callback
-CB&nbsp;2's <tt>-&gt;next</tt> pointer, which indicates that
-CB&nbsp;1 and CB&nbsp;2 are both waiting on the current grace period.
-The <tt>-&gt;nxttail[RCU_NEXT_READY_TAIL]</tt> array element
-references the same RCU callback that <tt>-&gt;nxttail[RCU_WAIT_TAIL]</tt>
-does, which indicates that there are no callbacks waiting on the next
-RCU grace period.
-The <tt>-&gt;nxttail[RCU_NEXT_TAIL]</tt> array element references
-CB&nbsp;4's <tt>-&gt;next</tt> pointer, indicating that all the
-remaining RCU callbacks have not yet been assigned to an RCU grace
-period.
-Note that the <tt>-&gt;nxttail[RCU_NEXT_TAIL]</tt> array element
-always references the last RCU callback's <tt>-&gt;next</tt> pointer
-unless the callback list is empty, in which case it references
-the <tt>-&gt;nxtlist</tt> pointer.
-
-</p><p>CPUs advance their callbacks from the
-<tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the
-<tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments
-as grace periods advance.
+<p>The <tt>-&gt;cblist</tt> structure is the segmented callback list
+described earlier.
 The CPU advances the callbacks in its <tt>rcu_data</tt> structure
 whenever it notices that another RCU grace period has completed.
 The CPU detects the completion of an RCU grace period by noticing
@@ -1049,16 +1135,7 @@ Recall that each <tt>rcu_node</tt> structure's
 <tt>-&gt;completed</tt> field is updated at the end of each
 grace period.
 
-</p><p>The <tt>-&gt;nxtcompleted[]</tt> array records grace-period
-numbers corresponding to the list segments.
-This allows CPUs that go idle for extended periods to determine
-which of their callbacks are ready to be invoked after reawakening.
-
-</p><p>The <tt>-&gt;qlen</tt> counter contains the number of
-callbacks in <tt>-&gt;nxtlist</tt>, and the
-<tt>-&gt;qlen_lazy</tt> contains the number of those callbacks that
-are known to only free memory, and whose invocation can therefore
-be safely deferred.
+<p>
 The <tt>-&gt;qlen_last_fqs_check</tt> and
 <tt>-&gt;n_force_qs_snap</tt> coordinate the forcing of quiescent
 states from <tt>call_rcu()</tt> and friends when callback
@@ -1069,6 +1146,10 @@ lists grow excessively long.
 fields count the number of callbacks invoked,
 sent to other CPUs when this CPU goes offline,
 and received from other CPUs when those other CPUs go offline.
+The <tt>-&gt;n_nocbs_invoked</tt> is used when the CPU's callbacks
+are offloaded to a kthread.
+
+<p>
 Finally, the <tt>-&gt;blimit</tt> counter is the maximum number of
 RCU callbacks that may be invoked at a given time.
 
diff --git a/Documentation/RCU/Design/Data-Structures/nxtlist.svg b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
index abc4cc73a097..0223e79c38e0 100644
--- a/Documentation/RCU/Design/Data-Structures/nxtlist.svg
+++ b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
@@ -19,7 +19,7 @@
    id="svg2"
    version="1.1"
    inkscape:version="0.48.4 r9939"
-   sodipodi:docname="nxtlist.fig">
+   sodipodi:docname="segcblist.svg">
   <metadata
      id="metadata94">
     <rdf:RDF>
@@ -28,7 +28,7 @@
         <dc:format>image/svg+xml</dc:format>
         <dc:type
            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title></dc:title>
+        <dc:title />
       </cc:Work>
     </rdf:RDF>
   </metadata>
@@ -241,61 +241,51 @@
        xml:space="preserve"
        x="225"
        y="675"
-       fill="#000000"
-       font-family="Courier"
        font-style="normal"
        font-weight="bold"
        font-size="324"
-       text-anchor="start"
-       id="text64">nxtlist</text>
+       id="text64"
+       style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;head</text>
     <!-- Text -->
     <text
        xml:space="preserve"
        x="225"
        y="1800"
-       fill="#000000"
-       font-family="Courier"
        font-style="normal"
        font-weight="bold"
        font-size="324"
-       text-anchor="start"
-       id="text66">nxttail[RCU_DONE_TAIL]</text>
+       id="text66"
+       style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;tails[RCU_DONE_TAIL]</text>
     <!-- Text -->
     <text
        xml:space="preserve"
        x="225"
        y="2925"
-       fill="#000000"
-       font-family="Courier"
        font-style="normal"
        font-weight="bold"
        font-size="324"
-       text-anchor="start"
-       id="text68">nxttail[RCU_WAIT_TAIL]</text>
+       id="text68"
+       style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;tails[RCU_WAIT_TAIL]</text>
     <!-- Text -->
     <text
        xml:space="preserve"
        x="225"
        y="4050"
-       fill="#000000"
-       font-family="Courier"
        font-style="normal"
        font-weight="bold"
        font-size="324"
-       text-anchor="start"
-       id="text70">nxttail[RCU_NEXT_READY_TAIL]</text>
+       id="text70"
+       style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;tails[RCU_NEXT_READY_TAIL]</text>
     <!-- Text -->
     <text
        xml:space="preserve"
        x="225"
        y="5175"
-       fill="#000000"
-       font-family="Courier"
        font-style="normal"
        font-weight="bold"
        font-size="324"
-       text-anchor="start"
-       id="text72">nxttail[RCU_NEXT_TAIL]</text>
+       id="text72"
+       style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;tails[RCU_NEXT_TAIL]</text>
     <!-- Text -->
     <text
        xml:space="preserve"
-- 
cgit v1.2.3


From 6771853bce3f0f7f17cab257b47f5b77b1c625b1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 11 Apr 2017 10:17:23 -0700
Subject: doc: Update requirements based on recent changes

These changes include lighter-weight expedited grace periods, the fact
that expedited grace periods and rcu_barrier() no longer block CPU
hotplug, some HTML font fixups, noting that rcu_barrier() need not wait
for a grace period (even if callbacks are posted), the fact that SRCU
read-side critical sections can be used from offline CPUs, and the fact
that SRCU now maintains per-CPU callback lists.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 .../RCU/Design/Requirements/Requirements.html      | 120 ++++++++++++++++-----
 1 file changed, 94 insertions(+), 26 deletions(-)

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index 999b3ed3444e..f60adf112663 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -659,8 +659,9 @@ systems with more than one CPU:
 	In other words, a given instance of <tt>synchronize_rcu()</tt>
 	can avoid waiting on a given RCU read-side critical section only
 	if it can prove that <tt>synchronize_rcu()</tt> started first.
+	</font>
 
-	<p>
+	<p><font color="ffffff">
 	A related question is &ldquo;When <tt>rcu_read_lock()</tt>
 	doesn't generate any code, why does it matter how it relates
 	to a grace period?&rdquo;
@@ -675,8 +676,9 @@ systems with more than one CPU:
 	within the critical section, in which case none of the accesses
 	within the critical section may observe the effects of any
 	access following the grace period.
+	</font>
 
-	<p>
+	<p><font color="ffffff">
 	As of late 2016, mathematical models of RCU take this
 	viewpoint, for example, see slides&nbsp;62 and&nbsp;63
 	of the
@@ -1616,8 +1618,8 @@ CPUs should at least make reasonable forward progress.
 In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
 is permitted to impose modest degradation of real-time latency
 on non-idle online CPUs.
-That said, it will likely be necessary to take further steps to reduce this
-degradation, hopefully to roughly that of a scheduling-clock interrupt.
+Here, &ldquo;modest&rdquo; means roughly the same latency
+degradation as a scheduling-clock interrupt.
 
 <p>
 There are a number of situations where even
@@ -1913,12 +1915,9 @@ This requirement is another factor driving batching of grace periods,
 but it is also the driving force behind the checks for large numbers
 of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
 Finally, high update rates should not delay RCU read-side critical
-sections, although some read-side delays can occur when using
+sections, although some small read-side delays can occur when using
 <tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
-of <tt>try_stop_cpus()</tt>.
-(In the future, <tt>synchronize_rcu_expedited()</tt> will be
-converted to use lighter-weight inter-processor interrupts (IPIs),
-but this will still disturb readers, though to a much smaller degree.)
+of <tt>smp_call_function_single()</tt>.
 
 <p>
 Although all three of these corner cases were understood in the early
@@ -2192,7 +2191,7 @@ Unfortunately, <tt>synchronize_rcu()</tt> can't do this until all of
 its kthreads are spawned, which doesn't happen until some time during
 <tt>early_initcalls()</tt> time.
 But this is no excuse:  RCU is nevertheless required to correctly handle
-synchronous grace periods during this time period, which it currently does.
+synchronous grace periods during this time period.
 Once all of its kthreads are up and running, RCU starts running
 normally.
 
@@ -2206,8 +2205,10 @@ normally.
 <tr><th align="left">Answer:</th></tr>
 <tr><td bgcolor="#ffffff"><font color="ffffff">
 	Very carefully!
+	</font>
 
-	<p>During the &ldquo;dead zone&rdquo; between the time that the
+	<p><font color="ffffff">
+	During the &ldquo;dead zone&rdquo; between the time that the
 	scheduler spawns the first task and the time that all of RCU's
 	kthreads have been spawned, all synchronous grace periods are
 	handled by the expedited grace-period mechanism.
@@ -2220,8 +2221,10 @@ normally.
 	using workqueues, as is required to avoid problems that would
 	otherwise occur when a user task received a POSIX signal while
 	driving an expedited grace period.
+	</font>
 
-	<p>And yes, this does mean that it is unhelpful to send POSIX
+	<p><font color="ffffff">
+	And yes, this does mean that it is unhelpful to send POSIX
 	signals to random tasks between the time that the scheduler
 	spawns its first kthread and the time that RCU's kthreads
 	have all been spawned.
@@ -2308,12 +2311,61 @@ situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
 The need for <tt>rcu_barrier()</tt> for module unloading became
 apparent later.
 
+<p>
+<b>Important note</b>: The <tt>rcu_barrier()</tt> function is not,
+repeat, <i>not</i>, obligated to wait for a grace period.
+It is instead only required to wait for RCU callbacks that have
+already been posted.
+Therefore, if there are no RCU callbacks posted anywhere in the system,
+<tt>rcu_barrier()</tt> is within its rights to return immediately.
+Even if there are callbacks posted, <tt>rcu_barrier()</tt> does not
+necessarily need to wait for a grace period.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Wait a minute!
+	Each RCU callbacks must wait for a grace period to complete,
+	and <tt>rcu_barrier()</tt> must wait for each pre-existing
+	callback to be invoked.
+	Doesn't <tt>rcu_barrier()</tt> therefore need to wait for
+	a full grace period if there is even one callback posted anywhere
+	in the system?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Absolutely not!!!
+	</font>
+
+	<p><font color="ffffff">
+	Yes, each RCU callbacks must wait for a grace period to complete,
+	but it might well be partly (or even completely) finished waiting
+	by the time <tt>rcu_barrier()</tt> is invoked.
+	In that case, <tt>rcu_barrier()</tt> need only wait for the
+	remaining portion of the grace period to elapse.
+	So even if there are quite a few callbacks posted,
+	<tt>rcu_barrier()</tt> might well return quite quickly.
+	</font>
+
+	<p><font color="ffffff">
+	So if you need to wait for a grace period as well as for all
+	pre-existing callbacks, you will need to invoke both
+	<tt>synchronize_rcu()</tt> and <tt>rcu_barrier()</tt>.
+	If latency is a concern, you can always use workqueues
+	to invoke them concurrently.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
 <h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
 
 <p>
 The Linux kernel supports CPU hotplug, which means that CPUs
 can come and go.
-It is of course illegal to use any RCU API member from an offline CPU.
+It is of course illegal to use any RCU API member from an offline CPU,
+with the exception of <a href="#Sleepable RCU">SRCU</a> read-side
+critical sections.
 This requirement was present from day one in DYNIX/ptx, but
 on the other hand, the Linux kernel's CPU-hotplug implementation
 is &ldquo;interesting.&rdquo;
@@ -2323,19 +2375,18 @@ The Linux-kernel CPU-hotplug implementation has notifiers that
 are used to allow the various kernel subsystems (including RCU)
 to respond appropriately to a given CPU-hotplug operation.
 Most RCU operations may be invoked from CPU-hotplug notifiers,
-including even normal synchronous grace-period operations
-such as <tt>synchronize_rcu()</tt>.
-However, expedited grace-period operations such as
-<tt>synchronize_rcu_expedited()</tt> are not supported,
-due to the fact that current implementations block CPU-hotplug
-operations, which could result in deadlock.
+including even synchronous grace-period operations such as
+<tt>synchronize_rcu()</tt> and <tt>synchronize_rcu_expedited()</tt>.
 
 <p>
-In addition, all-callback-wait operations such as
+However, all-callback-wait operations such as
 <tt>rcu_barrier()</tt> are also not supported, due to the
 fact that there are phases of CPU-hotplug operations where
 the outgoing CPU's callbacks will not be invoked until after
 the CPU-hotplug operation ends, which could also result in deadlock.
+Furthermore, <tt>rcu_barrier()</tt> blocks CPU-hotplug operations
+during its execution, which results in another type of deadlock
+when invoked from a CPU-hotplug notifier.
 
 <h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
 
@@ -2876,6 +2927,27 @@ It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
 API, which, in combination with <tt>srcu_read_unlock()</tt>,
 guarantees a full memory barrier.
 
+<p>
+Also unlike other RCU flavors, SRCU's callbacks-wait function
+<tt>srcu_barrier()</tt> may be invoked from CPU-hotplug notifiers,
+though this is not necessarily a good idea.
+The reason that this is possible is that SRCU is insensitive
+to whether or not a CPU is online, which means that <tt>srcu_barrier()</tt>
+need not exclude CPU-hotplug operations.
+
+<p>
+As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating
+a locking bottleneck present in prior kernel versions.
+Although this will allow users to put much heavier stress on
+<tt>call_srcu()</tt>, it is important to note that SRCU does not
+yet take any special steps to deal with callback flooding.
+So if you are posting (say) 10,000 SRCU callbacks per second per CPU,
+you are probably totally OK, but if you intend to post (say) 1,000,000
+SRCU callbacks per second per CPU, please run some tests first.
+SRCU just might need a few adjustment to deal with that sort of load.
+Of course, your mileage may vary based on the speed of your CPUs and
+the size of your memory.
+
 <p>
 The
 <a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
@@ -3034,8 +3106,8 @@ to do some redesign to avoid this scalability problem.
 
 <p>
 RCU disables CPU hotplug in a few places, perhaps most notably in the
-expedited grace-period and <tt>rcu_barrier()</tt> operations.
-If there is a strong reason to use expedited grace periods in CPU-hotplug
+<tt>rcu_barrier()</tt> operations.
+If there is a strong reason to use <tt>rcu_barrier()</tt> in CPU-hotplug
 notifiers, it will be necessary to avoid disabling CPU hotplug.
 This would introduce some complexity, so there had better be a <i>very</i>
 good reason.
@@ -3109,9 +3181,5 @@ Andy Lutomirski for their help in rendering
 this article human readable, and to Michelle Rankin for her support
 of this effort.
 Other contributions are acknowledged in the Linux kernel's git archive.
-The cartoon is copyright (c) 2013 by Melissa Broussard,
-and is provided
-under the terms of the Creative Commons Attribution-Share Alike 3.0
-United States license.
 
 </body></html>
-- 
cgit v1.2.3


From 066bb1c84aa430d15f36070471cbfe8976631cce Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 7 Mar 2017 07:30:58 -0800
Subject: doc: Update rcu_assign_pointer() definition in whatisRCU.txt

The rcu_assign_pointer() macro has changed over time, and the version
in Documentation/RCU/whatisRCU.txt has not kept up.  This commit brings
it into 2017, albeit in a simplified fashion.

Reported-by: Andrea Parri <parri.andrea@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/RCU/whatisRCU.txt | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 5cbd8b2395b8..6b0337008f9c 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -587,20 +587,21 @@ It is extremely simple:
 		write_unlock(&rcu_gp_mutex);
 	}
 
-[You can ignore rcu_assign_pointer() and rcu_dereference() without
-missing much.  But here they are anyway.  And whatever you do, don't
-forget about them when submitting patches making use of RCU!]
-
-	#define rcu_assign_pointer(p, v)	({ \
-							smp_wmb(); \
-							(p) = (v); \
-						})
-
-	#define rcu_dereference(p)     ({ \
-					typeof(p) _________p1 = p; \
-					smp_read_barrier_depends(); \
-					(_________p1); \
-					})
+[You can ignore rcu_assign_pointer() and rcu_dereference() without missing
+much.  But here are simplified versions anyway.  And whatever you do,
+don't forget about them when submitting patches making use of RCU!]
+
+	#define rcu_assign_pointer(p, v) \
+	({ \
+		smp_store_release(&(p), (v)); \
+	})
+
+	#define rcu_dereference(p) \
+	({ \
+		typeof(p) _________p1 = p; \
+		smp_read_barrier_depends(); \
+		(_________p1); \
+	})
 
 
 The rcu_read_lock() and rcu_read_unlock() primitive read-acquire
-- 
cgit v1.2.3


From 93728af0a1f63e13d6f7f56a434965b05b8b2abd Mon Sep 17 00:00:00 2001
From: Michalis Kokologiannakis <mixaskok@gmail.com>
Date: Mon, 20 Mar 2017 22:38:35 +0100
Subject: doc: Update the comparisons rule in rcu_dereference.txt

When an RCU-protected pointer is fetched but never dereferenced
rcu_access_pointer() should be used in place of rcu_dereference().
This commit explicitly records this very fact in Documentation/
RCU/rcu_dereference.txt, in order to prevent the usage of
rcu_dereference() in comparisons.

Signed-off-by: Michalis Kokologiannakis <mixaskok@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/RCU/rcu_dereference.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
index c0bf2441a2ba..b2a613f16d74 100644
--- a/Documentation/RCU/rcu_dereference.txt
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -138,6 +138,15 @@ o	Be very careful about comparing pointers obtained from
 		This sort of comparison occurs frequently when scanning
 		RCU-protected circular linked lists.
 
+		Note that if checks for being within an RCU read-side
+		critical section are not required and the pointer is never
+		dereferenced, rcu_access_pointer() should be used in place
+		of rcu_dereference(). The rcu_access_pointer() primitive
+		does not require an enclosing read-side critical section,
+		and also omits the smp_read_barrier_depends() included in
+		rcu_dereference(), which in turn should provide a small
+		performance gain in some CPUs (e.g., the DEC Alpha).
+
 	o	The comparison is against a pointer that references memory
 		that was initialized "a long time ago."  The reason
 		this is safe is that even if misordering occurs, the
-- 
cgit v1.2.3


From d3d3a3ccc4a8f1f254fb6788081f35bebe374174 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 28 Mar 2017 19:57:45 -0700
Subject: doc: Emphasize that "toy" RCU requires recursive rwlock

Reported-by: "yangzc@uit.com.cn" <yangzc@uit.com.cn>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/RCU/whatisRCU.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 6b0337008f9c..8c131a1c62ea 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -562,7 +562,9 @@ This section presents a "toy" RCU implementation that is based on
 familiar locking primitives.  Its overhead makes it a non-starter for
 real-life use, as does its lack of scalability.  It is also unsuitable
 for realtime use, since it allows scheduling latency to "bleed" from
-one read-side critical section to another.
+one read-side critical section to another.  It also assumes recursive
+reader-writer locks:  If you try this with non-recursive locks, and
+you allow nested rcu_read_lock() calls, you can deadlock.
 
 However, it is probably the easiest implementation to relate to, so is
 a good starting point.
-- 
cgit v1.2.3


From b26cfc48e3e03126c183f1f3960e6d69460bb852 Mon Sep 17 00:00:00 2001
From: pierre Kuo <vichy.kuo@gmail.com>
Date: Fri, 7 Apr 2017 14:37:36 +0800
Subject: doc: Update control-dependencies section of memory-barriers.txt

In the following example, if MAX is defined to be 1, then the compiler
knows (Q % MAX) is equal to zero.  The compiler can therefore throw
away the "then" branch (and the "if"), retaining only the "else" branch.

	q = READ_ONCE(a);
	if (q % MAX) {
		WRITE_ONCE(b, 1);
		do_something();
	} else {
		WRITE_ONCE(b, 2);
		do_something_else();
	}

It is therefore necessary to modify the example like this:

        q = READ_ONCE(a);
-       WRITE_ONCE(b, 1);
+       WRITE_ONCE(b, 2);
        do_something_else();

Signed-off-by: pierre Kuo <vichy.kuo@gmail.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/memory-barriers.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index d2b0a8d81258..08329cb857ed 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -768,7 +768,7 @@ equal to zero, in which case the compiler is within its rights to
 transform the above code into the following:
 
 	q = READ_ONCE(a);
-	WRITE_ONCE(b, 1);
+	WRITE_ONCE(b, 2);
 	do_something_else();
 
 Given this transformation, the CPU is not required to respect the ordering
-- 
cgit v1.2.3


From b0459491ca2dcda4223b3f3c80a635ae12218580 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Sun, 26 Mar 2017 02:23:15 +0800
Subject: clk: hi6220: add debug APB clock

The debug APB clock is absent in hi6220 driver, so this patch is to add
support for it.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/hisilicon/clk-hi6220.c       | 1 +
 include/dt-bindings/clock/hi6220-clock.h | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c
index c0e8e1f196aa..2ae151ce623a 100644
--- a/drivers/clk/hisilicon/clk-hi6220.c
+++ b/drivers/clk/hisilicon/clk-hi6220.c
@@ -134,6 +134,7 @@ static struct hisi_gate_clock hi6220_separated_gate_clks_sys[] __initdata = {
 	{ HI6220_UART4_PCLK,    "uart4_pclk",    "uart4_src",      CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x230, 8,  0, },
 	{ HI6220_SPI_CLK,       "spi_clk",       "clk_150m",       CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x230, 9,  0, },
 	{ HI6220_TSENSOR_CLK,   "tsensor_clk",   "clk_bus",        CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x230, 12, 0, },
+	{ HI6220_DAPB_CLK,      "dapb_clk",      "cs_dapb",        CLK_SET_RATE_PARENT|CLK_IS_CRITICAL,   0x230, 18, 0, },
 	{ HI6220_MMU_CLK,       "mmu_clk",       "ddrc_axi1",      CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x240, 11, 0, },
 	{ HI6220_HIFI_SEL,      "hifi_sel",      "hifi_src",       CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 0,  0, },
 	{ HI6220_MMC0_SYSPLL,   "mmc0_syspll",   "syspll",         CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 1,  0, },
diff --git a/include/dt-bindings/clock/hi6220-clock.h b/include/dt-bindings/clock/hi6220-clock.h
index 6b03c84f4278..b8ba665aab7b 100644
--- a/include/dt-bindings/clock/hi6220-clock.h
+++ b/include/dt-bindings/clock/hi6220-clock.h
@@ -124,7 +124,10 @@
 #define HI6220_CS_DAPB		57
 #define HI6220_CS_ATB_DIV	58
 
-#define HI6220_SYS_NR_CLKS	59
+/* gate clock */
+#define HI6220_DAPB_CLK		59
+
+#define HI6220_SYS_NR_CLKS	60
 
 /* clk in Hi6220 media controller */
 /* gate clocks */
-- 
cgit v1.2.3


From ef18910479b7b4c5e8132f8a40a2a8176ef076f1 Mon Sep 17 00:00:00 2001
From: Gabriel Fernandez <gabriel.fernandez@st.com>
Date: Thu, 16 Mar 2017 09:16:40 +0100
Subject: clk: stm32f4: fix: exclude values 0 and 1 for PLLQ

0000: PLLQ = 0, wrong configuration
0001: PLLQ = 1, wrong configuration
...
0010: PLLQ = 2
0011: PLLQ = 3
0100: PLLQ = 4
...
1111: PLLQ = 1

Use divider table to exclude 0 and 1 values.

Fixes: 83135ad3c517 ("clk: stm32f4: Add PLL_I2S & PLL_SAI for STM32F429/469 boards")

Signed-off-by: Gabriel Fernandez <gabriel.fernandez@st.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/clk-stm32f4.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
index ab609a76706f..cf9449b3dbd9 100644
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -429,6 +429,13 @@ static const struct clk_div_table pll_divp_table[] = {
 	{ 0, 2 }, { 1, 4 }, { 2, 6 }, { 3, 8 }, { 0 }
 };
 
+static const struct clk_div_table pll_divq_table[] = {
+	{ 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 },
+	{ 8, 8 }, { 9, 9 }, { 10, 10 }, { 11, 11 }, { 12, 12 }, { 13, 13 },
+	{ 14, 14 }, { 15, 15 },
+	{ 0 }
+};
+
 static const struct clk_div_table pll_divr_table[] = {
 	{ 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 }, { 0 }
 };
@@ -496,9 +503,9 @@ struct stm32f4_div_data {
 
 #define MAX_PLL_DIV 3
 static const struct stm32f4_div_data  div_data[MAX_PLL_DIV] = {
-	{ 16, 2, 0,			pll_divp_table	},
-	{ 24, 4, CLK_DIVIDER_ONE_BASED, NULL		},
-	{ 28, 3, 0,			pll_divr_table	},
+	{ 16, 2, 0, pll_divp_table },
+	{ 24, 4, 0, pll_divq_table },
+	{ 28, 3, 0, pll_divr_table },
 };
 
 struct stm32f4_pll_data {
-- 
cgit v1.2.3


From d5a0945fdf89ad293ccaa2be588635f4bfc0cd62 Mon Sep 17 00:00:00 2001
From: Ray Jui <ray.jui@broadcom.com>
Date: Wed, 5 Apr 2017 12:53:37 -0700
Subject: clk: iproc: Remove redundant check

Remove the redundant check of 'rate' in the if statement of the
'pll_set_rate' function

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Ray Jui <ray.jui@broadcom.com>
Fixes: 5fe225c105fd ("clk: iproc: add initial common clock support")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/bcm/clk-iproc-pll.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c
index e04634c46395..2d61893da024 100644
--- a/drivers/clk/bcm/clk-iproc-pll.c
+++ b/drivers/clk/bcm/clk-iproc-pll.c
@@ -277,7 +277,7 @@ static int pll_set_rate(struct iproc_clk *clk, unsigned int rate_index,
 	if (rate >= VCO_LOW && rate < VCO_HIGH) {
 		ki = 4;
 		kp_index = KP_BAND_MID;
-	} else if (rate >= VCO_HIGH && rate && rate < VCO_HIGH_HIGH) {
+	} else if (rate >= VCO_HIGH && rate < VCO_HIGH_HIGH) {
 		ki = 3;
 		kp_index = KP_BAND_HIGH;
 	} else if (rate >= VCO_HIGH_HIGH && rate < VCO_MAX) {
-- 
cgit v1.2.3


From ac03d8b3a592a0b562fce2376030baf9a572f7c1 Mon Sep 17 00:00:00 2001
From: Gabriel Fernandez <gabriel.fernandez@st.com>
Date: Thu, 16 Mar 2017 09:16:41 +0100
Subject: clk: stm32f4: fix timeout management for pll and ready gate

Use a classic polling to test bit ready.
And the shift of the bit ready of LSE & LSI were wrongs.

Fixes: 861adc44d290 ("clk: stm32f4: Add LSI & LSE clocks")
Signed-off-by: Gabriel Fernandez <gabriel.fernandez@st.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/clk-stm32f4.c | 43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
index cf9449b3dbd9..68e2a4e499f1 100644
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -531,19 +531,26 @@ static int stm32f4_pll_is_enabled(struct clk_hw *hw)
 	return clk_gate_ops.is_enabled(hw);
 }
 
+#define PLL_TIMEOUT 10000
+
 static int stm32f4_pll_enable(struct clk_hw *hw)
 {
 	struct clk_gate *gate = to_clk_gate(hw);
 	struct stm32f4_pll *pll = to_stm32f4_pll(gate);
-	int ret = 0;
-	unsigned long reg;
+	int bit_status;
+	unsigned int timeout = PLL_TIMEOUT;
 
-	ret = clk_gate_ops.enable(hw);
+	if (clk_gate_ops.is_enabled(hw))
+		return 0;
+
+	clk_gate_ops.enable(hw);
 
-	ret = readl_relaxed_poll_timeout_atomic(base + STM32F4_RCC_CR, reg,
-			reg & (1 << pll->bit_rdy_idx), 0, 10000);
+	do {
+		bit_status = !(readl(gate->reg) & BIT(pll->bit_rdy_idx));
 
-	return ret;
+	} while (bit_status && --timeout);
+
+	return bit_status;
 }
 
 static void stm32f4_pll_disable(struct clk_hw *hw)
@@ -834,24 +841,32 @@ struct stm32_rgate {
 	u8	bit_rdy_idx;
 };
 
-#define RTC_TIMEOUT 1000000
+#define RGATE_TIMEOUT 50000
 
 static int rgclk_enable(struct clk_hw *hw)
 {
 	struct clk_gate *gate = to_clk_gate(hw);
 	struct stm32_rgate *rgate = to_rgclk(gate);
-	u32 reg;
-	int ret;
+	int bit_status;
+	unsigned int timeout = RGATE_TIMEOUT;
+
+	if (clk_gate_ops.is_enabled(hw))
+		return 0;
 
 	disable_power_domain_write_protection();
 
 	clk_gate_ops.enable(hw);
 
-	ret = readl_relaxed_poll_timeout_atomic(gate->reg, reg,
-			reg & rgate->bit_rdy_idx, 1000, RTC_TIMEOUT);
+	do {
+		bit_status = !(readl(gate->reg) & BIT(rgate->bit_rdy_idx));
+		if (bit_status)
+			udelay(100);
+
+	} while (bit_status && --timeout);
 
 	enable_power_domain_write_protection();
-	return ret;
+
+	return bit_status;
 }
 
 static void rgclk_disable(struct clk_hw *hw)
@@ -1533,7 +1548,7 @@ static void __init stm32f4_rcc_init(struct device_node *np)
 	}
 
 	clks[CLK_LSI] = clk_register_rgate(NULL, "lsi", "clk-lsi", 0,
-			base + STM32F4_RCC_CSR, 0, 2, 0, &stm32f4_clk_lock);
+			base + STM32F4_RCC_CSR, 0, 1, 0, &stm32f4_clk_lock);
 
 	if (IS_ERR(clks[CLK_LSI])) {
 		pr_err("Unable to register lsi clock\n");
@@ -1541,7 +1556,7 @@ static void __init stm32f4_rcc_init(struct device_node *np)
 	}
 
 	clks[CLK_LSE] = clk_register_rgate(NULL, "lse", "clk-lse", 0,
-			base + STM32F4_RCC_BDCR, 0, 2, 0, &stm32f4_clk_lock);
+			base + STM32F4_RCC_BDCR, 0, 1, 0, &stm32f4_clk_lock);
 
 	if (IS_ERR(clks[CLK_LSE])) {
 		pr_err("Unable to register lse clock\n");
-- 
cgit v1.2.3


From a62ca337b36e31621b582cbe8f17d9404a48e120 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@codeaurora.org>
Date: Thu, 23 Mar 2017 13:13:40 +0530
Subject: clk: qcom: msm8996: Fix the vfe1 powerdomain name

Fix a typo which caused both vfe0 and vfe1 powerdomains to be
named as vfe0.

Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
Fixes: 7e824d507909 ("clk: qcom: gdsc: Add mmcc gdscs for msm8996 family")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/qcom/mmcc-msm8996.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/qcom/mmcc-msm8996.c b/drivers/clk/qcom/mmcc-msm8996.c
index 9b97246287a7..20d8fea32a99 100644
--- a/drivers/clk/qcom/mmcc-msm8996.c
+++ b/drivers/clk/qcom/mmcc-msm8996.c
@@ -2986,7 +2986,7 @@ static struct gdsc vfe1_gdsc = {
 	.cxcs = (unsigned int []){ 0x36ac },
 	.cxc_count = 1,
 	.pd = {
-		.name = "vfe0",
+		.name = "vfe1",
 	},
 	.parent = &camss_gdsc.pd,
 	.pwrsts = PWRSTS_OFF_ON,
-- 
cgit v1.2.3


From 1f9dfd7ac787ab8caa19a7bf463f617d97c9f66c Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 5 Apr 2017 14:27:21 +0900
Subject: cs-2000-cp: keep Reserved bit on each register

Thus CS2000 datasheet is indicating below, this patch
follows it.

WARNING: All "Reserved" registers must maintain their default
         state to ensure proper functional operation.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/clk-cs2000-cp.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/clk-cs2000-cp.c b/drivers/clk/clk-cs2000-cp.c
index 3fca0526d940..f5915ff1d397 100644
--- a/drivers/clk/clk-cs2000-cp.c
+++ b/drivers/clk/clk-cs2000-cp.c
@@ -36,15 +36,27 @@
 
 /* DEVICE_CTRL */
 #define PLL_UNLOCK	(1 << 7)
+#define AUXOUTDIS	(1 << 1)
+#define CLKOUTDIS	(1 << 0)
 
 /* DEVICE_CFG1 */
 #define RSEL(x)		(((x) & 0x3) << 3)
 #define RSEL_MASK	RSEL(0x3)
 #define ENDEV1		(0x1)
 
+/* DEVICE_CFG2 */
+#define AUTORMOD	(1 << 3)
+#define LOCKCLK(x)	(((x) & 0x3) << 1)
+#define LOCKCLK_MASK	LOCKCLK(0x3)
+#define FRACNSRC	(1 << 0)
+
 /* GLOBAL_CFG */
 #define ENDEV2		(0x1)
 
+/* FUNC_CFG1 */
+#define REFCLKDIV(x)	(((x) & 0x3) << 3)
+#define REFCLKDIV_MASK	REFCLKDIV(0x3)
+
 #define CH_SIZE_ERR(ch)		((ch < 0) || (ch >= CH_MAX))
 #define hw_to_priv(_hw)		container_of(_hw, struct cs2000_priv, hw)
 #define priv_to_client(priv)	(priv->client)
@@ -127,7 +139,9 @@ static int cs2000_clk_in_bound_rate(struct cs2000_priv *priv,
 	else
 		return -EINVAL;
 
-	return cs2000_bset(priv, FUNC_CFG1, 0x3 << 3, val << 3);
+	return cs2000_bset(priv, FUNC_CFG1,
+			   REFCLKDIV_MASK,
+			   REFCLKDIV(val));
 }
 
 static int cs2000_wait_pll_lock(struct cs2000_priv *priv)
@@ -153,7 +167,10 @@ static int cs2000_wait_pll_lock(struct cs2000_priv *priv)
 static int cs2000_clk_out_enable(struct cs2000_priv *priv, bool enable)
 {
 	/* enable both AUX_OUT, CLK_OUT */
-	return cs2000_write(priv, DEVICE_CTRL, enable ? 0 : 0x3);
+	return cs2000_bset(priv, DEVICE_CTRL,
+			   (AUXOUTDIS | CLKOUTDIS),
+			   enable ? 0 :
+			   (AUXOUTDIS | CLKOUTDIS));
 }
 
 static u32 cs2000_rate_to_ratio(u32 rate_in, u32 rate_out)
@@ -243,7 +260,9 @@ static int cs2000_ratio_select(struct cs2000_priv *priv, int ch)
 	if (ret < 0)
 		return ret;
 
-	ret = cs2000_write(priv, DEVICE_CFG2, 0x0);
+	ret = cs2000_bset(priv, DEVICE_CFG2,
+			  (AUTORMOD | LOCKCLK_MASK | FRACNSRC),
+			  0);
 	if (ret < 0)
 		return ret;
 
-- 
cgit v1.2.3


From cf091ee994ec266e33c3a305d6fc8c5ccb1f8acd Mon Sep 17 00:00:00 2001
From: Robin van der Gracht <robin@protonic.nl>
Date: Fri, 3 Mar 2017 15:14:05 +0100
Subject: clk: imx: clk-imx6ul: The i.mx6ul has no aips_tz3 clock

The clock was mapped on CG15 (gpio2_clocks) in the CCRG0 register.

Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Robin van der Gracht <robin@protonic.nl>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/imx/clk-imx6ul.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index 75c35fb12b60..dbd6e59377c3 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -73,7 +73,7 @@ static struct clk *clks[IMX6UL_CLK_END];
 static struct clk_onecell_data clk_data;
 
 static int const clks_init_on[] __initconst = {
-	IMX6UL_CLK_AIPSTZ1, IMX6UL_CLK_AIPSTZ2, IMX6UL_CLK_AIPSTZ3,
+	IMX6UL_CLK_AIPSTZ1, IMX6UL_CLK_AIPSTZ2,
 	IMX6UL_CLK_AXI, IMX6UL_CLK_ARM, IMX6UL_CLK_ROM,
 	IMX6UL_CLK_MMDC_P0_FAST, IMX6UL_CLK_MMDC_P0_IPG,
 };
@@ -341,9 +341,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_GPT2_SERIAL]	= imx_clk_gate2("gpt2_serial",	"perclk",	base + 0x68,	26);
 	clks[IMX6UL_CLK_UART2_IPG]	= imx_clk_gate2("uart2_ipg",	"ipg",		base + 0x68,	28);
 	clks[IMX6UL_CLK_UART2_SERIAL]	= imx_clk_gate2("uart2_serial",	"uart_podf",	base + 0x68,	28);
-	if (clk_on_imx6ul())
-		clks[IMX6UL_CLK_AIPSTZ3]	= imx_clk_gate2("aips_tz3",	"ahb",		base + 0x68,	30);
-	else if (clk_on_imx6ull())
+	if (clk_on_imx6ull())
 		clks[IMX6UL_CLK_AIPSTZ3]	= imx_clk_gate2("aips_tz3",	"ahb",		 base + 0x80,	18);
 
 	/* CCGR1 */
@@ -482,6 +480,9 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	for (i = 0; i < ARRAY_SIZE(clks_init_on); i++)
 		clk_prepare_enable(clks[clks_init_on[i]]);
 
+	if (clk_on_imx6ull())
+		clk_prepare_enable(clks[IMX6UL_CLK_AIPSTZ3]);
+
 	if (IS_ENABLED(CONFIG_USB_MXS_PHY)) {
 		clk_prepare_enable(clks[IMX6UL_CLK_USBPHY1_GATE]);
 		clk_prepare_enable(clks[IMX6UL_CLK_USBPHY2_GATE]);
-- 
cgit v1.2.3


From 5790d801762c588c63b41fbdbdb8295cfd6036e6 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 21 Mar 2017 16:38:21 +0800
Subject: clk: zte: set CLK_SET_RATE_PARENT for a few zx296718 clocks

To support VOU VGA display driver with different modes, we need to set
flag for a few clocks, so that clk_set_rate() call in VOU driver can get
VGA device desired pixel rate.

While at it, the divider between pll_vga and clk_vga gets corrected, as
it's 1:1 instead of 1:2.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Reviewed-by: Jun Nie <jun.nie@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/zte/clk-zx296718.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/zte/clk-zx296718.c b/drivers/clk/zte/clk-zx296718.c
index 2f7c668643fe..f2e1e8b3a8d5 100644
--- a/drivers/clk/zte/clk-zx296718.c
+++ b/drivers/clk/zte/clk-zx296718.c
@@ -409,7 +409,7 @@ static struct zx_clk_fixed_factor top_ffactor_clk[] = {
 	FFACTOR(0, "clk54m",		"pll_mm1", 1, 24, 0),
 	/* vga */
 	FFACTOR(0, "pll_vga_1800m",	"pll_vga", 1, 1, 0),
-	FFACTOR(0, "clk_vga",		"pll_vga", 1, 2, 0),
+	FFACTOR(0, "clk_vga",		"pll_vga", 1, 1, CLK_SET_RATE_PARENT),
 	/* pll ddr */
 	FFACTOR(0, "clk466m",		"pll_ddr", 1, 2, 0),
 
@@ -458,8 +458,8 @@ static struct zx_clk_mux top_mux_clk[] = {
 	MUX(0, "sappu_a_mux",	 sappu_aclk_p,	  TOP_CLK_MUX5,  4, 2),
 	MUX(0, "sappu_w_mux",	 sappu_wclk_p,	  TOP_CLK_MUX5,  8, 3),
 	MUX(0, "vou_a_mux",	 vou_aclk_p,	  TOP_CLK_MUX7,  0, 3),
-	MUX(0, "vou_main_w_mux", vou_main_wclk_p, TOP_CLK_MUX7,  4, 3),
-	MUX(0, "vou_aux_w_mux",	 vou_aux_wclk_p,  TOP_CLK_MUX7,  8, 3),
+	MUX_F(0, "vou_main_w_mux", vou_main_wclk_p, TOP_CLK_MUX7,  4, 3, CLK_SET_RATE_PARENT, 0),
+	MUX_F(0, "vou_aux_w_mux",  vou_aux_wclk_p,  TOP_CLK_MUX7,  8, 3, CLK_SET_RATE_PARENT, 0),
 	MUX(0, "vou_ppu_w_mux",	 vou_ppu_wclk_p,  TOP_CLK_MUX7, 12, 3),
 	MUX(0, "vga_i2c_mux",	 vga_i2c_wclk_p,  TOP_CLK_MUX7, 16, 1),
 	MUX(0, "viu_m0_a_mux",	 viu_m0_aclk_p,	  TOP_CLK_MUX6,  0, 3),
-- 
cgit v1.2.3


From ee249cbe42f19a7edac0e8cbb95064845e2e5218 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 21 Mar 2017 16:38:22 +0800
Subject: clk: zte: pd_bit is not 0 on zx296718

The bit 0 of PLL_CFG0 register is not powerdown on zx296718, but part of
of postdiv2 field.  The consequence is that functions like hw_to_idx()
and zx_pll_enable() will end up tampering the postdiv2 of the PLL.

Let's fix it by defining pd_bit 0xff which is obviously invalid for a
bit position and having PLL driver check the validity before operating
on the bit.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Reviewed-by: Jun Nie <jun.nie@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/zte/clk.c | 12 +++++++++++-
 drivers/clk/zte/clk.h |  6 +++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/zte/clk.c b/drivers/clk/zte/clk.c
index 878d879b23ff..b82031766ffa 100644
--- a/drivers/clk/zte/clk.c
+++ b/drivers/clk/zte/clk.c
@@ -52,7 +52,10 @@ static int hw_to_idx(struct clk_zx_pll *zx_pll)
 
 	/* For matching the value in lookup table */
 	hw_cfg0 &= ~BIT(zx_pll->lock_bit);
-	hw_cfg0 |= BIT(zx_pll->pd_bit);
+
+	/* Check availability of pd_bit */
+	if (zx_pll->pd_bit < 32)
+		hw_cfg0 |= BIT(zx_pll->pd_bit);
 
 	for (i = 0; i < zx_pll->count; i++) {
 		if (hw_cfg0 == config[i].cfg0 && hw_cfg1 == config[i].cfg1)
@@ -108,6 +111,10 @@ static int zx_pll_enable(struct clk_hw *hw)
 	struct clk_zx_pll *zx_pll = to_clk_zx_pll(hw);
 	u32 reg;
 
+	/* If pd_bit is not available, simply return success. */
+	if (zx_pll->pd_bit > 31)
+		return 0;
+
 	reg = readl_relaxed(zx_pll->reg_base);
 	writel_relaxed(reg & ~BIT(zx_pll->pd_bit), zx_pll->reg_base);
 
@@ -120,6 +127,9 @@ static void zx_pll_disable(struct clk_hw *hw)
 	struct clk_zx_pll *zx_pll = to_clk_zx_pll(hw);
 	u32 reg;
 
+	if (zx_pll->pd_bit > 31)
+		return;
+
 	reg = readl_relaxed(zx_pll->reg_base);
 	writel_relaxed(reg | BIT(zx_pll->pd_bit), zx_pll->reg_base);
 }
diff --git a/drivers/clk/zte/clk.h b/drivers/clk/zte/clk.h
index 84a55a3e2bd4..4df0f121b56d 100644
--- a/drivers/clk/zte/clk.h
+++ b/drivers/clk/zte/clk.h
@@ -66,8 +66,12 @@ struct clk_zx_pll {
 				CLK_GET_RATE_NOCACHE),			\
 }
 
+/*
+ * The pd_bit is not available on ZX296718, so let's pass something
+ * bigger than 31, e.g. 0xff, to indicate that.
+ */
 #define ZX296718_PLL(_name, _parent, _reg, _table)			\
-ZX_PLL(_name, _parent, _reg, _table, 0, 30)
+ZX_PLL(_name, _parent, _reg, _table, 0xff, 30)
 
 struct zx_clk_gate {
 	struct clk_gate gate;
-- 
cgit v1.2.3


From a90099da3cd0921e853f7b7e0c74f140a78baeed Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 21 Mar 2017 16:38:23 +0800
Subject: clk: zte: add pll_vga clock for zx296718

It adds zx296718 pll_vga clock for VGA support, so that VGA device can
get required pixel rate from clock driver for different display mode.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Reviewed-by: Jun Nie <jun.nie@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/zte/clk-zx296718.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/clk/zte/clk-zx296718.c b/drivers/clk/zte/clk-zx296718.c
index f2e1e8b3a8d5..8db0119bc6f7 100644
--- a/drivers/clk/zte/clk-zx296718.c
+++ b/drivers/clk/zte/clk-zx296718.c
@@ -101,6 +101,29 @@ static struct zx_pll_config pll_cpu_table[] = {
 	PLL_RATE(1600000000, 0x00104221, 0x04aaaaaa),
 };
 
+static struct zx_pll_config pll_vga_table[] = {
+	PLL_RATE(36000000,  0x00102464, 0x04000000), /* 800x600@56 */
+	PLL_RATE(40000000,  0x00102864, 0x04000000), /* 800x600@60 */
+	PLL_RATE(49500000,  0x00103164, 0x04800000), /* 800x600@75 */
+	PLL_RATE(50000000,  0x00103264, 0x04000000), /* 800x600@72 */
+	PLL_RATE(56250000,  0x00103864, 0x04400000), /* 800x600@85 */
+	PLL_RATE(65000000,  0x00104164, 0x04000000), /* 1024x768@60 */
+	PLL_RATE(74375000,  0x00104a64, 0x04600000), /* 1280x720@60 */
+	PLL_RATE(75000000,  0x00104b64, 0x04800000), /* 1024x768@70 */
+	PLL_RATE(78750000,  0x00104e64, 0x04c00000), /* 1024x768@75 */
+	PLL_RATE(85500000,  0x00105564, 0x04800000), /* 1360x768@60 */
+	PLL_RATE(106500000, 0x00106a64, 0x04800000), /* 1440x900@60 */
+	PLL_RATE(108000000, 0x00106c64, 0x04000000), /* 1280x1024@60 */
+	PLL_RATE(110000000, 0x00106e64, 0x04000000), /* 1024x768@85 */
+	PLL_RATE(135000000, 0x00105a44, 0x04000000), /* 1280x1024@75 */
+	PLL_RATE(136750000, 0x00104462, 0x04600000), /* 1440x900@75 */
+	PLL_RATE(148500000, 0x00104a62, 0x04400000), /* 1920x1080@60 */
+	PLL_RATE(157000000, 0x00104e62, 0x04800000), /* 1440x900@85 */
+	PLL_RATE(157500000, 0x00104e62, 0x04c00000), /* 1280x1024@85 */
+	PLL_RATE(162000000, 0x00105162, 0x04000000), /* 1600x1200@60 */
+	PLL_RATE(193250000, 0x00106062, 0x04a00000), /* 1920x1200@60 */
+};
+
 PNAME(osc) = {
 	"osc24m",
 	"osc32k",
@@ -369,6 +392,7 @@ PNAME(wdt_ares_p) = {
 
 static struct clk_zx_pll zx296718_pll_clk[] = {
 	ZX296718_PLL("pll_cpu",	"osc24m",	PLL_CPU_REG,	pll_cpu_table),
+	ZX296718_PLL("pll_vga",	"osc24m",	PLL_VGA_REG,	pll_vga_table),
 };
 
 static struct zx_clk_fixed_factor top_ffactor_clk[] = {
-- 
cgit v1.2.3


From 15a2a14b23024ab79fd22de28ba7c006be46aa53 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Fri, 7 Apr 2017 12:21:33 -0700
Subject: clk: zte: Mark pll config tables as const

These should be const.

Cc: Shawn Guo <shawn.guo@linaro.org>
Cc: Jun Nie <jun.nie@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/zte/clk-zx296718.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/zte/clk-zx296718.c b/drivers/clk/zte/clk-zx296718.c
index 8db0119bc6f7..a10962988ba8 100644
--- a/drivers/clk/zte/clk-zx296718.c
+++ b/drivers/clk/zte/clk-zx296718.c
@@ -94,14 +94,14 @@
 
 static DEFINE_SPINLOCK(clk_lock);
 
-static struct zx_pll_config pll_cpu_table[] = {
+static const struct zx_pll_config pll_cpu_table[] = {
 	PLL_RATE(1312000000, 0x00103621, 0x04aaaaaa),
 	PLL_RATE(1407000000, 0x00103a21, 0x04aaaaaa),
 	PLL_RATE(1503000000, 0x00103e21, 0x04aaaaaa),
 	PLL_RATE(1600000000, 0x00104221, 0x04aaaaaa),
 };
 
-static struct zx_pll_config pll_vga_table[] = {
+static const struct zx_pll_config pll_vga_table[] = {
 	PLL_RATE(36000000,  0x00102464, 0x04000000), /* 800x600@56 */
 	PLL_RATE(40000000,  0x00102864, 0x04000000), /* 800x600@60 */
 	PLL_RATE(49500000,  0x00103164, 0x04800000), /* 800x600@75 */
-- 
cgit v1.2.3


From 1905719556c077bc5619821e7840cac4f743ac73 Mon Sep 17 00:00:00 2001
From: Robin van der Gracht <robin@protonic.nl>
Date: Mon, 6 Mar 2017 09:13:43 +0100
Subject: clk: imx: correct uart4_serial clock name in driver for i.MX6UL

Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Robin van der Gracht <robin@protonic.nl>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/imx/clk-imx6ul.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index dbd6e59377c3..b4e0dff3c8c2 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -358,7 +358,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_GPT1_BUS]	= imx_clk_gate2("gpt1_bus",	"perclk",	base + 0x6c,	20);
 	clks[IMX6UL_CLK_GPT1_SERIAL]	= imx_clk_gate2("gpt1_serial",	"perclk",	base + 0x6c,	22);
 	clks[IMX6UL_CLK_UART4_IPG]	= imx_clk_gate2("uart4_ipg",	"ipg",		base + 0x6c,	24);
-	clks[IMX6UL_CLK_UART4_SERIAL]	= imx_clk_gate2("uart4_serail",	"uart_podf",	base + 0x6c,	24);
+	clks[IMX6UL_CLK_UART4_SERIAL]	= imx_clk_gate2("uart4_serial",	"uart_podf",	base + 0x6c,	24);
 
 	/* CCGR2 */
 	if (clk_on_imx6ull()) {
-- 
cgit v1.2.3


From 92031575c3e60c2f641aacf752bd7494d65e70f7 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 21 Mar 2017 15:20:31 +0200
Subject: clk: add clk_possible_parents debugfs file

For validation purposes, it's often useful to be able to retrieve the list
of possible parents in userspace. Add a debugfs file for every clock which
has more than 1 possible parent.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Reviewed-by: Jon Mayo <jmayo@nvidia.com>
[sboyd@codeaurora.org: Remove useless cast from void and extra
newline]
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/clk.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 67201f67a14a..0d8eb5aa477a 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2126,6 +2126,31 @@ static const struct file_operations clk_dump_fops = {
 	.release	= single_release,
 };
 
+static int possible_parents_dump(struct seq_file *s, void *data)
+{
+	struct clk_core *core = s->private;
+	int i;
+
+	for (i = 0; i < core->num_parents - 1; i++)
+		seq_printf(s, "%s ", core->parent_names[i]);
+
+	seq_printf(s, "%s\n", core->parent_names[i]);
+
+	return 0;
+}
+
+static int possible_parents_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, possible_parents_dump, inode->i_private);
+}
+
+static const struct file_operations possible_parents_fops = {
+	.open		= possible_parents_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
 {
 	struct dentry *d;
@@ -2177,6 +2202,13 @@ static int clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
 	if (!d)
 		goto err_out;
 
+	if (core->num_parents > 1) {
+		d = debugfs_create_file("clk_possible_parents", S_IRUGO,
+				core->dentry, core, &possible_parents_fops);
+		if (!d)
+			goto err_out;
+	}
+
 	if (core->ops->debug_init) {
 		ret = core->ops->debug_init(core->hw, core->dentry);
 		if (ret)
-- 
cgit v1.2.3


From 17c34c566795ca7529cf888fb33fdb7997d6b202 Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Tue, 21 Mar 2017 12:16:26 +0200
Subject: clk: aggregate return codes of notify chains

In case there are multiple notify chains for the same clocks (because they
were registered by different users), we need to propagate potential failure
of any single one of them to the caller. Otherwise we eg risk violating the
V/f curve when a notifier is used for DVFS.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/clk.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 0d8eb5aa477a..cddddbe46d9d 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -966,6 +966,8 @@ static int __clk_notify(struct clk_core *core, unsigned long msg,
 			cnd.clk = cn->clk;
 			ret = srcu_notifier_call_chain(&cn->notifier_head, msg,
 					&cnd);
+			if (ret & NOTIFY_STOP_MASK)
+				return ret;
 		}
 	}
 
-- 
cgit v1.2.3


From 83dd720da666a2af6debbb2d8aab289e3b6cb8f5 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 11 Apr 2017 00:35:49 +0000
Subject: clk: cs2000: use existing priv_to_dev() to getting struct device

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/clk-cs2000-cp.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/clk/clk-cs2000-cp.c b/drivers/clk/clk-cs2000-cp.c
index f5915ff1d397..4df38c5ff96c 100644
--- a/drivers/clk/clk-cs2000-cp.c
+++ b/drivers/clk/clk-cs2000-cp.c
@@ -370,8 +370,7 @@ static const struct clk_ops cs2000_ops = {
 
 static int cs2000_clk_get(struct cs2000_priv *priv)
 {
-	struct i2c_client *client = priv_to_client(priv);
-	struct device *dev = &client->dev;
+	struct device *dev = priv_to_dev(priv);
 	struct clk *clk_in, *ref_clk;
 
 	clk_in = devm_clk_get(dev, "clk_in");
@@ -439,8 +438,7 @@ static int cs2000_clk_register(struct cs2000_priv *priv)
 
 static int cs2000_version_print(struct cs2000_priv *priv)
 {
-	struct i2c_client *client = priv_to_client(priv);
-	struct device *dev = &client->dev;
+	struct device *dev = priv_to_dev(priv);
 	s32 val;
 	const char *revision;
 
@@ -471,7 +469,7 @@ static int cs2000_version_print(struct cs2000_priv *priv)
 static int cs2000_remove(struct i2c_client *client)
 {
 	struct cs2000_priv *priv = i2c_get_clientdata(client);
-	struct device *dev = &client->dev;
+	struct device *dev = priv_to_dev(priv);
 	struct device_node *np = dev->of_node;
 
 	of_clk_del_provider(np);
-- 
cgit v1.2.3


From f110cc4f9b2b456501c839d02412544c9656c2cc Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Fri, 31 Mar 2017 16:21:24 +0100
Subject: MIPS: generic: Enable Root FS on NFS in generic_defconfig

The generic_defconfig is used for platforms like SEAD3 which do not
usually have fixed storage available, therefore NFS is the preferred
location of the RFS.
When the upstream kernel defconfig is built & tested on platforms such
as SEAD3 this leads to essentially false failures when the RFS fails to
mount.

There is little harm in having this feature enabled by default, so
enable it in the defconfig. Kernel autoconfiguration & DHCP must also be
selected to allow RFS on NFS.

Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15853/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/configs/generic_defconfig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig
index c95d94c7838b..91aacf2ef26d 100644
--- a/arch/mips/configs/generic_defconfig
+++ b/arch/mips/configs/generic_defconfig
@@ -36,6 +36,8 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
 CONFIG_NETFILTER=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
@@ -80,6 +82,7 @@ CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_REDUCED=y
-- 
cgit v1.2.3


From ca452b95e3be1cbd86ee60165de640d27ddca8b7 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 30 Mar 2017 13:51:59 -0700
Subject: MIPS: Remove CONFIG_ARCH_HAS_ILOG2_U{32,64}

We declare CONFIG_ARCH_HAS_ILOG2_U32 & CONFIG_ARCH_HAS_ILOG2_U64 in
Kconfig, but they are always false since nothing ever selects them. The
generic fls-based implementation is efficient for MIPS anyway. Remove
the redundant Kconfig entries.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: trivial@kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15840/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 52d81ca6f74e..1f8ebdf75734 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1042,14 +1042,6 @@ config RWSEM_GENERIC_SPINLOCK
 config RWSEM_XCHGADD_ALGORITHM
 	bool
 
-config ARCH_HAS_ILOG2_U32
-	bool
-	default n
-
-config ARCH_HAS_ILOG2_U64
-	bool
-	default n
-
 config GENERIC_HWEIGHT
 	bool
 	default y
-- 
cgit v1.2.3


From 6748cb3c299de1ffbe56733647b01dbcc398c419 Mon Sep 17 00:00:00 2001
From: Behan Webster <behanw@converseincode.com>
Date: Mon, 27 Mar 2017 18:19:09 -0700
Subject: kbuild: use -Oz instead of -Os when using clang

This generates smaller resulting object code when compiled with clang.

Signed-off-by: Behan Webster <behanw@converseincode.com>
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ae49fef98ca3..d26618608633 100644
--- a/Makefile
+++ b/Makefile
@@ -638,7 +638,8 @@ KBUILD_CFLAGS	+= $(call cc-option,-fdata-sections,)
 endif
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-KBUILD_CFLAGS	+= -Os $(call cc-disable-warning,maybe-uninitialized,)
+KBUILD_CFLAGS	+= $(call cc-option,-Oz,-Os)
+KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
 else
 ifdef CONFIG_PROFILE_ALL_BRANCHES
 KBUILD_CFLAGS	+= -O2 $(call cc-disable-warning,maybe-uninitialized,)
-- 
cgit v1.2.3


From ebf003f0cfb3705e60d40dedc3ec949176c741af Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Wed, 12 Apr 2017 12:43:52 -0700
Subject: kbuild: Consolidate header generation from ASM offset information

Largely redundant code is used in different places to generate C headers
from offset information extracted from assembly language output.
Consolidate the code in Makefile.lib and use this instead.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Kbuild                    | 25 -------------------------
 arch/ia64/kernel/Makefile | 26 ++------------------------
 scripts/Makefile.lib      | 28 ++++++++++++++++++++++++++++
 scripts/mod/Makefile      | 28 ++--------------------------
 4 files changed, 32 insertions(+), 75 deletions(-)

diff --git a/Kbuild b/Kbuild
index 3d0ae152af7c..94c752762bc2 100644
--- a/Kbuild
+++ b/Kbuild
@@ -7,31 +7,6 @@
 # 4) Check for missing system calls
 # 5) Generate constants.py (may need bounds.h)
 
-# Default sed regexp - multiline due to syntax constraints
-define sed-y
-	"/^->/{s:->#\(.*\):/* \1 */:; \
-	s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
-	s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
-	s:->::; p;}"
-endef
-
-# Use filechk to avoid rebuilds when a header changes, but the resulting file
-# does not
-define filechk_offsets
-	(set -e; \
-	 echo "#ifndef $2"; \
-	 echo "#define $2"; \
-	 echo "/*"; \
-	 echo " * DO NOT MODIFY."; \
-	 echo " *"; \
-	 echo " * This file was generated by Kbuild"; \
-	 echo " */"; \
-	 echo ""; \
-	 sed -ne $(sed-y); \
-	 echo ""; \
-	 echo "#endif" )
-endef
-
 #####
 # 1) Generate bounds.h
 
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 3686d6abafde..9edda5466020 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -50,32 +50,10 @@ CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
 # The gate DSO image is built using a special linker script.
 include $(src)/Makefile.gate
 
-# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
-define sed-y
-	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
-endef
-quiet_cmd_nr_irqs = GEN     $@
-define cmd_nr_irqs
-	(set -e; \
-	 echo "#ifndef __ASM_NR_IRQS_H__"; \
-	 echo "#define __ASM_NR_IRQS_H__"; \
-	 echo "/*"; \
-	 echo " * DO NOT MODIFY."; \
-	 echo " *"; \
-	 echo " * This file was generated by Kbuild"; \
-	 echo " *"; \
-	 echo " */"; \
-	 echo ""; \
-	 sed -ne $(sed-y) $<; \
-	 echo ""; \
-	 echo "#endif" ) > $@
-endef
-
 # We use internal kbuild rules to avoid the "is up to date" message from make
 arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c
 	$(Q)mkdir -p $(dir $@)
 	$(call if_changed_dep,cc_s_c)
 
-include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s
-	$(Q)mkdir -p $(dir $@)
-	$(call cmd,nr_irqs)
+include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s FORCE
+	$(call filechk,offsets,__ASM_NR_IRQS_H__)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 0a07f9014944..dd567e5d59e0 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -408,3 +408,31 @@ quiet_cmd_xzmisc = XZMISC  $@
 cmd_xzmisc = (cat $(filter-out FORCE,$^) | \
 	xz --check=crc32 --lzma2=dict=1MiB) > $@ || \
 	(rm -f $@ ; false)
+
+# ASM offsets
+# ---------------------------------------------------------------------------
+
+# Default sed regexp - multiline due to syntax constraints
+define sed-offsets
+	"/^->/{s:->#\(.*\):/* \1 */:; \
+	s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
+	s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
+	s:->::; p;}"
+endef
+
+# Use filechk to avoid rebuilds when a header changes, but the resulting file
+# does not
+define filechk_offsets
+	(set -e; \
+	 echo "#ifndef $2"; \
+	 echo "#define $2"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Kbuild"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-offsets); \
+	 echo ""; \
+	 echo "#endif" )
+endef
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index 19d9bcadc0cc..b497d9764dcf 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -7,32 +7,8 @@ modpost-objs	:= modpost.o file2alias.o sumversion.o
 
 devicetable-offsets-file := devicetable-offsets.h
 
-define sed-y
-	"/^->/{s:->#\(.*\):/* \1 */:; \
-	s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
-	s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
-	s:->::; p;}"
-endef
-
-quiet_cmd_offsets = GEN     $@
-define cmd_offsets
-	(set -e; \
-	 echo "#ifndef __DEVICETABLE_OFFSETS_H__"; \
-	 echo "#define __DEVICETABLE_OFFSETS_H__"; \
-	 echo "/*"; \
-	 echo " * DO NOT MODIFY."; \
-	 echo " *"; \
-	 echo " * This file was generated by Kbuild"; \
-	 echo " *"; \
-	 echo " */"; \
-	 echo ""; \
-	 sed -ne $(sed-y) $<; \
-	 echo ""; \
-	 echo "#endif" ) > $@
-endef
-
-$(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s
-	$(call if_changed,offsets)
+$(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s FORCE
+	$(call filechk,offsets,__DEVICETABLE_OFFSETS_H__)
 
 targets += $(devicetable-offsets-file) devicetable-offsets.s
 
-- 
cgit v1.2.3


From c0cfbe6941085f2ebd58e645c10f8836b6d28fe9 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 30 Mar 2017 12:06:09 -0700
Subject: irqchip: mips-cpu: Replace magic 0x100 with IE_SW0

Replace use of the magic number 0x100 (ie. bit 8) with the more
explanatory IE_SW0 (ie. interrupt enable for software interrupt 0) or
C_SW0 (ie. cause bit for software interrupt 0) as appropriate.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15834/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/irqchip/irq-mips-cpu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-mips-cpu.c b/drivers/irqchip/irq-mips-cpu.c
index 8c504f562e9d..e6b413669e57 100644
--- a/drivers/irqchip/irq-mips-cpu.c
+++ b/drivers/irqchip/irq-mips-cpu.c
@@ -41,13 +41,13 @@
 
 static inline void unmask_mips_irq(struct irq_data *d)
 {
-	set_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
+	set_c0_status(IE_SW0 << (d->irq - MIPS_CPU_IRQ_BASE));
 	irq_enable_hazard();
 }
 
 static inline void mask_mips_irq(struct irq_data *d)
 {
-	clear_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_status(IE_SW0 << (d->irq - MIPS_CPU_IRQ_BASE));
 	irq_disable_hazard();
 }
 
@@ -70,7 +70,7 @@ static unsigned int mips_mt_cpu_irq_startup(struct irq_data *d)
 {
 	unsigned int vpflags = dvpe();
 
-	clear_c0_cause(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_cause(C_SW0 << (d->irq - MIPS_CPU_IRQ_BASE));
 	evpe(vpflags);
 	unmask_mips_irq(d);
 	return 0;
@@ -83,7 +83,7 @@ static unsigned int mips_mt_cpu_irq_startup(struct irq_data *d)
 static void mips_mt_cpu_irq_ack(struct irq_data *d)
 {
 	unsigned int vpflags = dvpe();
-	clear_c0_cause(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_cause(C_SW0 << (d->irq - MIPS_CPU_IRQ_BASE));
 	evpe(vpflags);
 	mask_mips_irq(d);
 }
-- 
cgit v1.2.3


From 131735afc1838997da2c151b614b13f0352cf448 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 30 Mar 2017 12:06:10 -0700
Subject: irqchip: mips-cpu: Prepare for non-legacy IRQ domains

The various struct irq_chip callbacks in the MIPS CPU interrupt
controller driver have been calculating the hardware interrupt number by
subtracting MIPS_CPU_IRQ_BASE from the virq number. This presumes a
linear mapping beginning from MIPS_CPU_IRQ_BASE, and this will not hold
once an IPI IRQ domain is introduced. Switch to using the hwirq field of
struct irq_data which already contains the hardware interrupt number
instead of attempting to calculate it.

Similarly, plat_irq_dispatch calculated the virq number by adding
MIPS_CPU_IRQ_BASE to the hardware interrupt number. Ready this for the
introduction of an IPI IRQ domain by instead using irq_linear_revmap.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15835/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/irqchip/irq-mips-cpu.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/irqchip/irq-mips-cpu.c b/drivers/irqchip/irq-mips-cpu.c
index e6b413669e57..338de924b269 100644
--- a/drivers/irqchip/irq-mips-cpu.c
+++ b/drivers/irqchip/irq-mips-cpu.c
@@ -39,15 +39,17 @@
 #include <asm/mipsmtregs.h>
 #include <asm/setup.h>
 
+static struct irq_domain *irq_domain;
+
 static inline void unmask_mips_irq(struct irq_data *d)
 {
-	set_c0_status(IE_SW0 << (d->irq - MIPS_CPU_IRQ_BASE));
+	set_c0_status(IE_SW0 << d->hwirq);
 	irq_enable_hazard();
 }
 
 static inline void mask_mips_irq(struct irq_data *d)
 {
-	clear_c0_status(IE_SW0 << (d->irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_status(IE_SW0 << d->hwirq);
 	irq_disable_hazard();
 }
 
@@ -70,7 +72,7 @@ static unsigned int mips_mt_cpu_irq_startup(struct irq_data *d)
 {
 	unsigned int vpflags = dvpe();
 
-	clear_c0_cause(C_SW0 << (d->irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_cause(C_SW0 << d->hwirq);
 	evpe(vpflags);
 	unmask_mips_irq(d);
 	return 0;
@@ -83,7 +85,7 @@ static unsigned int mips_mt_cpu_irq_startup(struct irq_data *d)
 static void mips_mt_cpu_irq_ack(struct irq_data *d)
 {
 	unsigned int vpflags = dvpe();
-	clear_c0_cause(C_SW0 << (d->irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_cause(C_SW0 << d->hwirq);
 	evpe(vpflags);
 	mask_mips_irq(d);
 }
@@ -103,6 +105,7 @@ static struct irq_chip mips_mt_cpu_irq_controller = {
 asmlinkage void __weak plat_irq_dispatch(void)
 {
 	unsigned long pending = read_c0_cause() & read_c0_status() & ST0_IM;
+	unsigned int virq;
 	int irq;
 
 	if (!pending) {
@@ -113,7 +116,8 @@ asmlinkage void __weak plat_irq_dispatch(void)
 	pending >>= CAUSEB_IP;
 	while (pending) {
 		irq = fls(pending) - 1;
-		do_IRQ(MIPS_CPU_IRQ_BASE + irq);
+		virq = irq_linear_revmap(irq_domain, irq);
+		do_IRQ(virq);
 		pending &= ~BIT(irq);
 	}
 }
@@ -145,15 +149,14 @@ static const struct irq_domain_ops mips_cpu_intc_irq_domain_ops = {
 
 static void __init __mips_cpu_irq_init(struct device_node *of_node)
 {
-	struct irq_domain *domain;
-
 	/* Mask interrupts. */
 	clear_c0_status(ST0_IM);
 	clear_c0_cause(CAUSEF_IP);
 
-	domain = irq_domain_add_legacy(of_node, 8, MIPS_CPU_IRQ_BASE, 0,
-				       &mips_cpu_intc_irq_domain_ops, NULL);
-	if (!domain)
+	irq_domain = irq_domain_add_legacy(of_node, 8, MIPS_CPU_IRQ_BASE, 0,
+					   &mips_cpu_intc_irq_domain_ops,
+					   NULL);
+	if (!irq_domain)
 		panic("Failed to add irqdomain for MIPS CPU");
 }
 
-- 
cgit v1.2.3


From 3838a547fda22a37faab5770d01acd72aaeabbf6 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 30 Mar 2017 12:06:11 -0700
Subject: irqchip: mips-cpu: Introduce IPI IRQ domain support

Introduce support for registering an IPI IRQ domain suitable for use by
systems using the MIPS MT (multithreading) ASE within a single core.
This will allow for such systems to be supported generically, without
the current kludge of IPI code split between the MIPS arch & the malta
board support code.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15836/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/irqchip/Kconfig        |   2 +
 drivers/irqchip/irq-mips-cpu.c | 125 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 119 insertions(+), 8 deletions(-)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 8162121bb1bc..f03ef434503b 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -128,7 +128,9 @@ config IMGPDC_IRQ
 config IRQ_MIPS_CPU
 	bool
 	select GENERIC_IRQ_CHIP
+	select GENERIC_IRQ_IPI if SYS_SUPPORTS_MULTITHREADING
 	select IRQ_DOMAIN
+	select IRQ_DOMAIN_HIERARCHY if GENERIC_IRQ_IPI
 
 config CLPS711X_IRQCHIP
 	bool
diff --git a/drivers/irqchip/irq-mips-cpu.c b/drivers/irqchip/irq-mips-cpu.c
index 338de924b269..b247f3c743ac 100644
--- a/drivers/irqchip/irq-mips-cpu.c
+++ b/drivers/irqchip/irq-mips-cpu.c
@@ -17,15 +17,14 @@
 /*
  * Almost all MIPS CPUs define 8 interrupt sources.  They are typically
  * level triggered (i.e., cannot be cleared from CPU; must be cleared from
- * device).  The first two are software interrupts which we don't really
- * use or support.  The last one is usually the CPU timer interrupt if
- * counter register is present or, for CPUs with an external FPU, by
- * convention it's the FPU exception interrupt.
+ * device).
  *
- * Don't even think about using this on SMP.  You have been warned.
+ * The first two are software interrupts (i.e. not exposed as pins) which
+ * may be used for IPIs in multi-threaded single-core systems.
  *
- * This file exports one global function:
- *	void mips_cpu_irq_init(void);
+ * The last one is usually the CPU timer interrupt if the counter register
+ * is present, or for old CPUs with an external FPU by convention it's the
+ * FPU exception interrupt.
  */
 #include <linux/init.h>
 #include <linux/interrupt.h>
@@ -40,6 +39,7 @@
 #include <asm/setup.h>
 
 static struct irq_domain *irq_domain;
+static struct irq_domain *ipi_domain;
 
 static inline void unmask_mips_irq(struct irq_data *d)
 {
@@ -90,6 +90,29 @@ static void mips_mt_cpu_irq_ack(struct irq_data *d)
 	mask_mips_irq(d);
 }
 
+#ifdef CONFIG_GENERIC_IRQ_IPI
+
+static void mips_mt_send_ipi(struct irq_data *d, unsigned int cpu)
+{
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	int vpflags;
+
+	local_irq_save(flags);
+
+	/* We can only send IPIs to VPEs within the local core */
+	WARN_ON(cpu_data[cpu].core != current_cpu_data.core);
+
+	vpflags = dvpe();
+	settc(cpu_vpe_id(&cpu_data[cpu]));
+	write_vpe_c0_cause(read_vpe_c0_cause() | (C_SW0 << hwirq));
+	evpe(vpflags);
+
+	local_irq_restore(flags);
+}
+
+#endif /* CONFIG_GENERIC_IRQ_IPI */
+
 static struct irq_chip mips_mt_cpu_irq_controller = {
 	.name		= "MIPS",
 	.irq_startup	= mips_mt_cpu_irq_startup,
@@ -100,6 +123,9 @@ static struct irq_chip mips_mt_cpu_irq_controller = {
 	.irq_eoi	= unmask_mips_irq,
 	.irq_disable	= mask_mips_irq,
 	.irq_enable	= unmask_mips_irq,
+#ifdef CONFIG_GENERIC_IRQ_IPI
+	.ipi_send_single = mips_mt_send_ipi,
+#endif
 };
 
 asmlinkage void __weak plat_irq_dispatch(void)
@@ -116,7 +142,10 @@ asmlinkage void __weak plat_irq_dispatch(void)
 	pending >>= CAUSEB_IP;
 	while (pending) {
 		irq = fls(pending) - 1;
-		virq = irq_linear_revmap(irq_domain, irq);
+		if (IS_ENABLED(CONFIG_GENERIC_IRQ_IPI) && irq < 2)
+			virq = irq_linear_revmap(ipi_domain, irq);
+		else
+			virq = irq_linear_revmap(irq_domain, irq);
 		do_IRQ(virq);
 		pending &= ~BIT(irq);
 	}
@@ -147,6 +176,79 @@ static const struct irq_domain_ops mips_cpu_intc_irq_domain_ops = {
 	.xlate = irq_domain_xlate_onecell,
 };
 
+#ifdef CONFIG_GENERIC_IRQ_IPI
+
+struct cpu_ipi_domain_state {
+	DECLARE_BITMAP(allocated, 2);
+};
+
+static int mips_cpu_ipi_alloc(struct irq_domain *domain, unsigned int virq,
+			      unsigned int nr_irqs, void *arg)
+{
+	struct cpu_ipi_domain_state *state = domain->host_data;
+	unsigned int i, hwirq;
+	int ret;
+
+	for (i = 0; i < nr_irqs; i++) {
+		hwirq = find_first_zero_bit(state->allocated, 2);
+		if (hwirq == 2)
+			return -EBUSY;
+		bitmap_set(state->allocated, hwirq, 1);
+
+		ret = irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq,
+						    &mips_mt_cpu_irq_controller,
+						    NULL);
+		if (ret)
+			return ret;
+
+		ret = irq_set_irq_type(virq + i, IRQ_TYPE_LEVEL_HIGH);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int mips_cpu_ipi_match(struct irq_domain *d, struct device_node *node,
+			      enum irq_domain_bus_token bus_token)
+{
+	bool is_ipi;
+
+	switch (bus_token) {
+	case DOMAIN_BUS_IPI:
+		is_ipi = d->bus_token == bus_token;
+		return (!node || (to_of_node(d->fwnode) == node)) && is_ipi;
+	default:
+		return 0;
+	}
+}
+
+static const struct irq_domain_ops mips_cpu_ipi_chip_ops = {
+	.alloc	= mips_cpu_ipi_alloc,
+	.match	= mips_cpu_ipi_match,
+};
+
+static void mips_cpu_register_ipi_domain(struct device_node *of_node)
+{
+	struct cpu_ipi_domain_state *ipi_domain_state;
+
+	ipi_domain_state = kzalloc(sizeof(*ipi_domain_state), GFP_KERNEL);
+	ipi_domain = irq_domain_add_hierarchy(irq_domain,
+					      IRQ_DOMAIN_FLAG_IPI_SINGLE,
+					      2, of_node,
+					      &mips_cpu_ipi_chip_ops,
+					      ipi_domain_state);
+	if (!ipi_domain)
+		panic("Failed to add MIPS CPU IPI domain");
+	ipi_domain->bus_token = DOMAIN_BUS_IPI;
+}
+
+#else /* !CONFIG_GENERIC_IRQ_IPI */
+
+static inline void mips_cpu_register_ipi_domain(struct device_node *of_node) {}
+
+#endif /* !CONFIG_GENERIC_IRQ_IPI */
+
 static void __init __mips_cpu_irq_init(struct device_node *of_node)
 {
 	/* Mask interrupts. */
@@ -158,6 +260,13 @@ static void __init __mips_cpu_irq_init(struct device_node *of_node)
 					   NULL);
 	if (!irq_domain)
 		panic("Failed to add irqdomain for MIPS CPU");
+
+	/*
+	 * Only proceed to register the software interrupt IPI implementation
+	 * for CPUs which implement the MIPS MT (multi-threading) ASE.
+	 */
+	if (cpu_has_mipsmt)
+		mips_cpu_register_ipi_domain(of_node);
 }
 
 void __init mips_cpu_irq_init(void)
-- 
cgit v1.2.3


From 1eed40043579608e16509c43eeeb3a53a8a42378 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 30 Mar 2017 12:06:12 -0700
Subject: MIPS: smp-mt: Use CPU interrupt controller IPI IRQ domain support

Remove the smp-mt IPI code that supported single-core multithreaded
systems and instead make use of the IPI IRQ domain support provided by
the MIPS CPU interrupt controller driver. This removes some less than
nice code, the horrible split between arch & board code and the
duplication that led to within board code.

The lantiq portion of this patch has only been compile tested. Malta has
been tested & is functional.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15837/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp-mt.c       | 49 ++----------------------
 arch/mips/lantiq/irq.c          | 52 --------------------------
 arch/mips/mti-malta/malta-int.c | 83 ++---------------------------------------
 3 files changed, 8 insertions(+), 176 deletions(-)

diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index e398cbc3d776..ed6b4df583ea 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -83,6 +83,8 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
 	if (tc != 0)
 		smvp_copy_vpe_config();
 
+	cpu_data[ncpu].vpe_id = tc;
+
 	return ncpu;
 }
 
@@ -114,49 +116,6 @@ static void __init smvp_tc_init(unsigned int tc, unsigned int mvpconf0)
 	write_tc_c0_tchalt(TCHALT_H);
 }
 
-static void vsmp_send_ipi_single(int cpu, unsigned int action)
-{
-	int i;
-	unsigned long flags;
-	int vpflags;
-
-#ifdef CONFIG_MIPS_GIC
-	if (gic_present) {
-		mips_smp_send_ipi_single(cpu, action);
-		return;
-	}
-#endif
-	local_irq_save(flags);
-
-	vpflags = dvpe();	/* can't access the other CPU's registers whilst MVPE enabled */
-
-	switch (action) {
-	case SMP_CALL_FUNCTION:
-		i = C_SW1;
-		break;
-
-	case SMP_RESCHEDULE_YOURSELF:
-	default:
-		i = C_SW0;
-		break;
-	}
-
-	/* 1:1 mapping of vpe and tc... */
-	settc(cpu);
-	write_vpe_c0_cause(read_vpe_c0_cause() | i);
-	evpe(vpflags);
-
-	local_irq_restore(flags);
-}
-
-static void vsmp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
-{
-	unsigned int i;
-
-	for_each_cpu(i, mask)
-		vsmp_send_ipi_single(i, action);
-}
-
 static void vsmp_init_secondary(void)
 {
 #ifdef CONFIG_MIPS_GIC
@@ -281,8 +240,8 @@ static void __init vsmp_prepare_cpus(unsigned int max_cpus)
 }
 
 struct plat_smp_ops vsmp_smp_ops = {
-	.send_ipi_single	= vsmp_send_ipi_single,
-	.send_ipi_mask		= vsmp_send_ipi_mask,
+	.send_ipi_single	= mips_smp_send_ipi_single,
+	.send_ipi_mask		= mips_smp_send_ipi_mask,
 	.init_secondary		= vsmp_init_secondary,
 	.smp_finish		= vsmp_smp_finish,
 	.boot_secondary		= vsmp_boot_secondary,
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 0ddf3698b85d..33728b7af426 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -274,47 +274,6 @@ static void ltq_hw_irq_handler(struct irq_desc *desc)
 	ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2);
 }
 
-#ifdef CONFIG_MIPS_MT_SMP
-void __init arch_init_ipiirq(int irq, struct irqaction *action)
-{
-	setup_irq(irq, action);
-	irq_set_handler(irq, handle_percpu_irq);
-}
-
-static void ltq_sw0_irqdispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ);
-}
-
-static void ltq_sw1_irqdispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ);
-}
-static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
-{
-	scheduler_ipi();
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
-{
-	generic_smp_call_function_interrupt();
-	return IRQ_HANDLED;
-}
-
-static struct irqaction irq_resched = {
-	.handler	= ipi_resched_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "IPI_resched"
-};
-
-static struct irqaction irq_call = {
-	.handler	= ipi_call_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "IPI_call"
-};
-#endif
-
 asmlinkage void plat_irq_dispatch(void)
 {
 	unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
@@ -402,17 +361,6 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
 		(MAX_IM * INT_NUM_IM_OFFSET) + MIPS_CPU_IRQ_CASCADE,
 		&irq_domain_ops, 0);
 
-#if defined(CONFIG_MIPS_MT_SMP)
-	if (cpu_has_vint) {
-		pr_info("Setting up IPI vectored interrupts\n");
-		set_vi_handler(MIPS_CPU_IPI_RESCHED_IRQ, ltq_sw0_irqdispatch);
-		set_vi_handler(MIPS_CPU_IPI_CALL_IRQ, ltq_sw1_irqdispatch);
-	}
-	arch_init_ipiirq(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ,
-		&irq_resched);
-	arch_init_ipiirq(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ, &irq_call);
-#endif
-
 #ifndef CONFIG_MIPS_MT_SMP
 	set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 |
 		IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index cb675ec6f283..fe9bb479f2a0 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -145,56 +145,6 @@ static irqreturn_t corehi_handler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_MIPS_MT_SMP
-
-#define MIPS_CPU_IPI_RESCHED_IRQ 0	/* SW int 0 for resched */
-#define C_RESCHED C_SW0
-#define MIPS_CPU_IPI_CALL_IRQ 1		/* SW int 1 for resched */
-#define C_CALL C_SW1
-static int cpu_ipi_resched_irq, cpu_ipi_call_irq;
-
-static void ipi_resched_dispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ);
-}
-
-static void ipi_call_dispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ);
-}
-
-static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
-{
-#ifdef CONFIG_MIPS_VPE_APSP_API_CMP
-	if (aprp_hook)
-		aprp_hook();
-#endif
-
-	scheduler_ipi();
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
-{
-	generic_smp_call_function_interrupt();
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction irq_resched = {
-	.handler	= ipi_resched_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "IPI_resched"
-};
-
-static struct irqaction irq_call = {
-	.handler	= ipi_call_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "IPI_call"
-};
-#endif /* CONFIG_MIPS_MT_SMP */
-
 static struct irqaction corehi_irqaction = {
 	.handler = corehi_handler,
 	.name = "CoreHi",
@@ -222,12 +172,6 @@ static msc_irqmap_t msc_eicirqmap[] __initdata = {
 
 static int msc_nr_eicirqs __initdata = ARRAY_SIZE(msc_eicirqmap);
 
-void __init arch_init_ipiirq(int irq, struct irqaction *action)
-{
-	setup_irq(irq, action);
-	irq_set_handler(irq, handle_percpu_irq);
-}
-
 void __init arch_init_irq(void)
 {
 	int corehi_irq;
@@ -262,30 +206,11 @@ void __init arch_init_irq(void)
 
 	if (gic_present) {
 		corehi_irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_COREHI;
+	} else if (cpu_has_veic) {
+		set_vi_handler(MSC01E_INT_COREHI, corehi_irqdispatch);
+		corehi_irq = MSC01E_INT_BASE + MSC01E_INT_COREHI;
 	} else {
-#if defined(CONFIG_MIPS_MT_SMP)
-		/* set up ipi interrupts */
-		if (cpu_has_veic) {
-			set_vi_handler (MSC01E_INT_SW0, ipi_resched_dispatch);
-			set_vi_handler (MSC01E_INT_SW1, ipi_call_dispatch);
-			cpu_ipi_resched_irq = MSC01E_INT_SW0;
-			cpu_ipi_call_irq = MSC01E_INT_SW1;
-		} else {
-			cpu_ipi_resched_irq = MIPS_CPU_IRQ_BASE +
-				MIPS_CPU_IPI_RESCHED_IRQ;
-			cpu_ipi_call_irq = MIPS_CPU_IRQ_BASE +
-				MIPS_CPU_IPI_CALL_IRQ;
-		}
-		arch_init_ipiirq(cpu_ipi_resched_irq, &irq_resched);
-		arch_init_ipiirq(cpu_ipi_call_irq, &irq_call);
-#endif
-		if (cpu_has_veic) {
-			set_vi_handler(MSC01E_INT_COREHI,
-				       corehi_irqdispatch);
-			corehi_irq = MSC01E_INT_BASE + MSC01E_INT_COREHI;
-		} else {
-			corehi_irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_COREHI;
-		}
+		corehi_irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_COREHI;
 	}
 
 	setup_irq(corehi_irq, &corehi_irqaction);
-- 
cgit v1.2.3


From e64889823d58305a5ddb2828ae0a988c59e87d7e Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 30 Mar 2017 12:06:13 -0700
Subject: MIPS: Stengthen IPI IRQ domain sanity check

Commit fbde2d7d8290 ("MIPS: Add generic SMP IPI support") introduced a
sanity check that an IPI IRQ domain can be found during boot, in order
to ensure that IPIs are able to be set up in systems using such domains.
However it was added at a point where systems may have used an IPI IRQ
domain in some situations but not others, and we could not know which
were the case until runtime, so commit 578bffc82ec5 ("MIPS: Don't BUG_ON
when no IPI domain is found") made that check simply skip IPI init if no
domain were found in order to fix the boot for systems such as QEMU
Malta.

We now use IPI IRQ domains for the MIPS CPU interrupt controller, which
means systems which make use of IPI IRQ domains will always do so when
running on multiple CPUs. As a result we now strengthen the sanity check
to ensure that an IPI IRQ domain is found when multiple CPUs are present
in the system.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15838/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 6e71130549ea..aba1afb64b62 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -261,16 +261,20 @@ int mips_smp_ipi_allocate(const struct cpumask *mask)
 		ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI);
 
 	/*
-	 * There are systems which only use IPI domains some of the time,
-	 * depending upon configuration we don't know until runtime. An
-	 * example is Malta where we may compile in support for GIC & the
-	 * MT ASE, but run on a system which has multiple VPEs in a single
-	 * core and doesn't include a GIC. Until all IPI implementations
-	 * have been converted to use IPI domains the best we can do here
-	 * is to return & hope some other code sets up the IPIs.
+	 * There are systems which use IPI IRQ domains, but only have one
+	 * registered when some runtime condition is met. For example a Malta
+	 * kernel may include support for GIC & CPU interrupt controller IPI
+	 * IRQ domains, but if run on a system with no GIC & no MT ASE then
+	 * neither will be supported or registered.
+	 *
+	 * We only have a problem if we're actually using multiple CPUs so fail
+	 * loudly if that is the case. Otherwise simply return, skipping IPI
+	 * setup, if we're running with only a single CPU.
 	 */
-	if (!ipidomain)
+	if (!ipidomain) {
+		BUG_ON(num_present_cpus() > 1);
 		return 0;
+	}
 
 	virq = irq_reserve_ipi(ipidomain, mask);
 	BUG_ON(!virq);
-- 
cgit v1.2.3


From 72f941af88a543e6a038c75b8467903b7812239c Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 30 Mar 2017 14:27:02 -0700
Subject: MIPS: Remove confusing else statement in __do_page_fault()

Commit 41c594ab65fc ("[MIPS] MT: Improved multithreading support.")
added an else case to an if statement in do_page_fault() (which has
since gained 2 leading underscores) for some unclear reason. If the
condition in the if statement evaluates true then we execute a goto &
branch elsewhere anyway, so the else has no effect. Combined with an #if
0 block with misleading indentation introduced in the same commit it
makes the code less clear than it could be.

Remove the unnecessary else statement & de-indent the printk within
the #if 0 block in order to make the code easier for humans to parse.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: trivial@kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15842/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/fault.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 3bef306cdfdb..4f8f5bf46977 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -267,19 +267,19 @@ do_sigbus:
 	/* Kernel mode? Handle exceptions or die */
 	if (!user_mode(regs))
 		goto no_context;
-	else
+
 	/*
 	 * Send a sigbus, regardless of whether we were in kernel
 	 * or user mode.
 	 */
 #if 0
-		printk("do_page_fault() #3: sending SIGBUS to %s for "
-		       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
-		       tsk->comm,
-		       write ? "write access to" : "read access from",
-		       field, address,
-		       field, (unsigned long) regs->cp0_epc,
-		       field, (unsigned long) regs->regs[31]);
+	printk("do_page_fault() #3: sending SIGBUS to %s for "
+	       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
+	       tsk->comm,
+	       write ? "write access to" : "read access from",
+	       field, address,
+	       field, (unsigned long) regs->cp0_epc,
+	       field, (unsigned long) regs->regs[31]);
 #endif
 	current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
 	tsk->thread.cp0_badvaddr = address;
-- 
cgit v1.2.3


From 4162c5ce52e593acd6b53efa09eb945650e2e729 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Fri, 24 Mar 2017 16:33:05 +0800
Subject: clk: sunxi-ng: use 1 as fallback for minimum multiplier

A zero multiplier does not make sense for clocks.

Use 1 as the minimum when a multiplier minimum isn't specified.

Fixes: 2beaa601c849 ("clk: sunxi-ng: Implement minimum for multipliers")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu_nk.c   | 8 ++++----
 drivers/clk/sunxi-ng/ccu_nkm.c  | 8 ++++----
 drivers/clk/sunxi-ng/ccu_nkmp.c | 4 ++--
 drivers/clk/sunxi-ng/ccu_nm.c   | 2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/clk/sunxi-ng/ccu_nk.c b/drivers/clk/sunxi-ng/ccu_nk.c
index b9e9b8a9d1b4..2485bda87a9a 100644
--- a/drivers/clk/sunxi-ng/ccu_nk.c
+++ b/drivers/clk/sunxi-ng/ccu_nk.c
@@ -102,9 +102,9 @@ static long ccu_nk_round_rate(struct clk_hw *hw, unsigned long rate,
 	if (nk->common.features & CCU_FEATURE_FIXED_POSTDIV)
 		rate *= nk->fixed_post_div;
 
-	_nk.min_n = nk->n.min;
+	_nk.min_n = nk->n.min ?: 1;
 	_nk.max_n = nk->n.max ?: 1 << nk->n.width;
-	_nk.min_k = nk->k.min;
+	_nk.min_k = nk->k.min ?: 1;
 	_nk.max_k = nk->k.max ?: 1 << nk->k.width;
 
 	ccu_nk_find_best(*parent_rate, rate, &_nk);
@@ -127,9 +127,9 @@ static int ccu_nk_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (nk->common.features & CCU_FEATURE_FIXED_POSTDIV)
 		rate = rate * nk->fixed_post_div;
 
-	_nk.min_n = nk->n.min;
+	_nk.min_n = nk->n.min ?: 1;
 	_nk.max_n = nk->n.max ?: 1 << nk->n.width;
-	_nk.min_k = nk->k.min;
+	_nk.min_k = nk->k.min ?: 1;
 	_nk.max_k = nk->k.max ?: 1 << nk->k.width;
 
 	ccu_nk_find_best(parent_rate, rate, &_nk);
diff --git a/drivers/clk/sunxi-ng/ccu_nkm.c b/drivers/clk/sunxi-ng/ccu_nkm.c
index 71f81e95a061..cba84afe1cf1 100644
--- a/drivers/clk/sunxi-ng/ccu_nkm.c
+++ b/drivers/clk/sunxi-ng/ccu_nkm.c
@@ -109,9 +109,9 @@ static unsigned long ccu_nkm_round_rate(struct ccu_mux_internal *mux,
 	struct ccu_nkm *nkm = data;
 	struct _ccu_nkm _nkm;
 
-	_nkm.min_n = nkm->n.min;
+	_nkm.min_n = nkm->n.min ?: 1;
 	_nkm.max_n = nkm->n.max ?: 1 << nkm->n.width;
-	_nkm.min_k = nkm->k.min;
+	_nkm.min_k = nkm->k.min ?: 1;
 	_nkm.max_k = nkm->k.max ?: 1 << nkm->k.width;
 	_nkm.min_m = 1;
 	_nkm.max_m = nkm->m.max ?: 1 << nkm->m.width;
@@ -138,9 +138,9 @@ static int ccu_nkm_set_rate(struct clk_hw *hw, unsigned long rate,
 	unsigned long flags;
 	u32 reg;
 
-	_nkm.min_n = nkm->n.min;
+	_nkm.min_n = nkm->n.min ?: 1;
 	_nkm.max_n = nkm->n.max ?: 1 << nkm->n.width;
-	_nkm.min_k = nkm->k.min;
+	_nkm.min_k = nkm->k.min ?: 1;
 	_nkm.max_k = nkm->k.max ?: 1 << nkm->k.width;
 	_nkm.min_m = 1;
 	_nkm.max_m = nkm->m.max ?: 1 << nkm->m.width;
diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
index a2b40a000157..c9d47dbff912 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -116,9 +116,9 @@ static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate,
 	struct ccu_nkmp *nkmp = hw_to_ccu_nkmp(hw);
 	struct _ccu_nkmp _nkmp;
 
-	_nkmp.min_n = nkmp->n.min;
+	_nkmp.min_n = nkmp->n.min ?: 1;
 	_nkmp.max_n = nkmp->n.max ?: 1 << nkmp->n.width;
-	_nkmp.min_k = nkmp->k.min;
+	_nkmp.min_k = nkmp->k.min ?: 1;
 	_nkmp.max_k = nkmp->k.max ?: 1 << nkmp->k.width;
 	_nkmp.min_m = 1;
 	_nkmp.max_m = nkmp->m.max ?: 1 << nkmp->m.width;
diff --git a/drivers/clk/sunxi-ng/ccu_nm.c b/drivers/clk/sunxi-ng/ccu_nm.c
index af71b1909cd9..f312c92f2a21 100644
--- a/drivers/clk/sunxi-ng/ccu_nm.c
+++ b/drivers/clk/sunxi-ng/ccu_nm.c
@@ -99,7 +99,7 @@ static long ccu_nm_round_rate(struct clk_hw *hw, unsigned long rate,
 	struct ccu_nm *nm = hw_to_ccu_nm(hw);
 	struct _ccu_nm _nm;
 
-	_nm.min_n = nm->n.min;
+	_nm.min_n = nm->n.min ?: 1;
 	_nm.max_n = nm->n.max ?: 1 << nm->n.width;
 	_nm.min_m = 1;
 	_nm.max_m = nm->m.max ?: 1 << nm->m.width;
-- 
cgit v1.2.3


From 95ad8ed9c87fae043c347ba6cba05aabe5b04d76 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Fri, 24 Mar 2017 16:33:06 +0800
Subject: clk: sunxi-ng: Fix round_rate/set_rate multiplier minimum mismatch

In commit 2beaa601c849 ("clk: sunxi-ng: Implement minimum for
multipliers"), the multiplier minimums in the set_rate callback
for NM and NKMP style clocks were not updated.

This patch fixes them to match their round_rate callbacks.

Fixes: 2beaa601c849 ("clk: sunxi-ng: Implement minimum for multipliers")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu_nkmp.c | 4 ++--
 drivers/clk/sunxi-ng/ccu_nm.c   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
index c9d47dbff912..0f80e5e498bb 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -138,9 +138,9 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
 	unsigned long flags;
 	u32 reg;
 
-	_nkmp.min_n = 1;
+	_nkmp.min_n = nkmp->n.min ?: 1;
 	_nkmp.max_n = nkmp->n.max ?: 1 << nkmp->n.width;
-	_nkmp.min_k = 1;
+	_nkmp.min_k = nkmp->k.min ?: 1;
 	_nkmp.max_k = nkmp->k.max ?: 1 << nkmp->k.width;
 	_nkmp.min_m = 1;
 	_nkmp.max_m = nkmp->m.max ?: 1 << nkmp->m.width;
diff --git a/drivers/clk/sunxi-ng/ccu_nm.c b/drivers/clk/sunxi-ng/ccu_nm.c
index f312c92f2a21..5e5e90a4a50c 100644
--- a/drivers/clk/sunxi-ng/ccu_nm.c
+++ b/drivers/clk/sunxi-ng/ccu_nm.c
@@ -122,7 +122,7 @@ static int ccu_nm_set_rate(struct clk_hw *hw, unsigned long rate,
 	else
 		ccu_frac_helper_disable(&nm->common, &nm->frac);
 
-	_nm.min_n = 1;
+	_nm.min_n = nm->n.min ?: 1;
 	_nm.max_n = nm->n.max ?: 1 << nm->n.width;
 	_nm.min_m = 1;
 	_nm.max_m = nm->m.max ?: 1 << nm->m.width;
-- 
cgit v1.2.3


From 7149c1becddf49022fe0aad2fa395377cd8f753e Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Fri, 24 Mar 2017 16:33:07 +0800
Subject: clk: sunxi-ng: a80: Fix audio PLL comment not matching actual code

We ignore the d1 and d2 dividers in the audio PLL, and force them to
1 (register value 0) at probe time. However the comment preceding the
audio PLL definition says we enforce the default value, which is not
the same.

Fix the preceding comment to match what we do in code.

Fixes: b8eb71dcdd08 ("clk: sunxi-ng: Add A80 CCU")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun9i-a80.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
index 51f6d495de5b..8936ef87652c 100644
--- a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
+++ b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
@@ -70,8 +70,7 @@ static struct ccu_mult pll_c1cpux_clk = {
 /*
  * The Audio PLL has d1, d2 dividers in addition to the usual N, M
  * factors. Since we only need 2 frequencies from this PLL: 22.5792 MHz
- * and 24.576 MHz, ignore them for now. Enforce the default for them,
- * which is d1 = 0, d2 = 1.
+ * and 24.576 MHz, ignore them for now. Enforce d1 = 0 and d2 = 0.
  */
 #define SUN9I_A80_PLL_AUDIO_REG	0x008
 
-- 
cgit v1.2.3


From 6db78b201b3fa3611b9557f0d677cd4560358f9d Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Wed, 12 Apr 2017 18:29:23 +0200
Subject: pwm: tegra: Avoid potential overflow for short periods

For very short periods, the result of the division might overflow the
unsigned long hz variable (on 32-bit architectures). Avoid that by
making it an unsigned long long. While at it, also remove an unneeded
local variable whose only purpose is to store a temporary computation.

Acked-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-tegra.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index 9c7f180b9f3a..c040f87ee144 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -75,9 +75,8 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 			    int duty_ns, int period_ns)
 {
 	struct tegra_pwm_chip *pc = to_tegra_pwm_chip(chip);
-	unsigned long long c = duty_ns;
-	unsigned long rate, hz;
-	unsigned long long ns100 = NSEC_PER_SEC;
+	unsigned long long c = duty_ns, hz;
+	unsigned long rate;
 	u32 val = 0;
 	int err;
 
@@ -98,9 +97,8 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	rate = clk_get_rate(pc->clk) >> PWM_DUTY_WIDTH;
 
 	/* Consider precision in PWM_SCALE_WIDTH rate calculation */
-	ns100 *= 100;
-	hz = DIV_ROUND_CLOSEST_ULL(ns100, period_ns);
-	rate = DIV_ROUND_CLOSEST(rate * 100, hz);
+	hz = DIV_ROUND_CLOSEST_ULL(100ULL * NSEC_PER_SEC, period_ns);
+	rate = DIV_ROUND_CLOSEST_ULL(100ULL * rate, hz);
 
 	/*
 	 * Since the actual PWM divider is the register's frequency divider
-- 
cgit v1.2.3


From 6db249db5ed9c09e396ed77811853cbd63415f0a Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Thu, 13 Apr 2017 04:54:42 +0800
Subject: pwm: mediatek: Don't explicitly set .owner

drivers/pwm/pwm-mediatek.c:210:3-8: No need to set .owner here. The core will do it.

 Remove .owner field if calls are used which set it automatically

Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci

CC: John Crispin <john@phrozen.org>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-mediatek.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c
index 7263952d1aac..5c11bc708a3c 100644
--- a/drivers/pwm/pwm-mediatek.c
+++ b/drivers/pwm/pwm-mediatek.c
@@ -207,7 +207,6 @@ MODULE_DEVICE_TABLE(of, mtk_pwm_of_match);
 static struct platform_driver mtk_pwm_driver = {
 	.driver = {
 		.name = "mtk-pwm",
-		.owner = THIS_MODULE,
 		.of_match_table = mtk_pwm_of_match,
 	},
 	.probe = mtk_pwm_probe,
-- 
cgit v1.2.3


From c40c461e1944b9cfb520e04184ec1e5c80fb210b Mon Sep 17 00:00:00 2001
From: Sven Van Asbroeck <thesven73@gmail.com>
Date: Thu, 13 Apr 2017 08:58:11 -0400
Subject: pwm: pca9685: Fix GPIO-only operation

GPIO-only driver operation never clears the SLEEP bit, which can cause
the GPIOs to become unusable.

Example:
1. user requests first PWM  ->      driver clears SLEEP bit
2. user frees last PWM      ->      driver sets SLEEP bit
3. user requests GPIO
4. user switches GPIO on    ->      output does not turn on
                                    because SLEEP bit is set

Prevent this behaviour by letting the runtime PM framework control the
SLEEP bit. This will put the chip to SLEEP if no PWMs/GPIOs are exported
or in use.

Fixes: bccec89f0a35 ("Allow any of the 16 PWMs to be used as a GPIO")
Reported-by: Sven Van Asbroeck <TheSven73@googlemail.com>
Signed-off-by: Sven Van Asbroeck <TheSven73@googlemail.com>
Suggested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-pca9685.c | 112 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 79 insertions(+), 33 deletions(-)

diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c
index 0cfb3571a732..5f55cfab9b1c 100644
--- a/drivers/pwm/pwm-pca9685.c
+++ b/drivers/pwm/pwm-pca9685.c
@@ -30,6 +30,7 @@
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/pm_runtime.h>
 
 /*
  * Because the PCA9685 has only one prescaler per chip, changing the period of
@@ -79,7 +80,6 @@
 struct pca9685 {
 	struct pwm_chip chip;
 	struct regmap *regmap;
-	int active_cnt;
 	int duty_ns;
 	int period_ns;
 #if IS_ENABLED(CONFIG_GPIOLIB)
@@ -111,20 +111,10 @@ static int pca9685_pwm_gpio_request(struct gpio_chip *gpio, unsigned int offset)
 	pwm_set_chip_data(pwm, (void *)1);
 
 	mutex_unlock(&pca->lock);
+	pm_runtime_get_sync(pca->chip.dev);
 	return 0;
 }
 
-static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset)
-{
-	struct pca9685 *pca = gpiochip_get_data(gpio);
-	struct pwm_device *pwm;
-
-	mutex_lock(&pca->lock);
-	pwm = &pca->chip.pwms[offset];
-	pwm_set_chip_data(pwm, NULL);
-	mutex_unlock(&pca->lock);
-}
-
 static bool pca9685_pwm_is_gpio(struct pca9685 *pca, struct pwm_device *pwm)
 {
 	bool is_gpio = false;
@@ -177,6 +167,19 @@ static void pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset,
 	regmap_write(pca->regmap, LED_N_ON_H(pwm->hwpwm), on);
 }
 
+static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset)
+{
+	struct pca9685 *pca = gpiochip_get_data(gpio);
+	struct pwm_device *pwm;
+
+	pca9685_pwm_gpio_set(gpio, offset, 0);
+	pm_runtime_put(pca->chip.dev);
+	mutex_lock(&pca->lock);
+	pwm = &pca->chip.pwms[offset];
+	pwm_set_chip_data(pwm, NULL);
+	mutex_unlock(&pca->lock);
+}
+
 static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip,
 					  unsigned int offset)
 {
@@ -238,6 +241,16 @@ static inline int pca9685_pwm_gpio_probe(struct pca9685 *pca)
 }
 #endif
 
+static void pca9685_set_sleep_mode(struct pca9685 *pca, int sleep)
+{
+	regmap_update_bits(pca->regmap, PCA9685_MODE1,
+			   MODE1_SLEEP, sleep ? MODE1_SLEEP : 0);
+	if (!sleep) {
+		/* Wait 500us for the oscillator to be back up */
+		udelay(500);
+	}
+}
+
 static int pca9685_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 			      int duty_ns, int period_ns)
 {
@@ -252,19 +265,20 @@ static int pca9685_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 
 		if (prescale >= PCA9685_PRESCALE_MIN &&
 			prescale <= PCA9685_PRESCALE_MAX) {
+			/*
+			 * putting the chip briefly into SLEEP mode
+			 * at this point won't interfere with the
+			 * pm_runtime framework, because the pm_runtime
+			 * state is guaranteed active here.
+			 */
 			/* Put chip into sleep mode */
-			regmap_update_bits(pca->regmap, PCA9685_MODE1,
-					   MODE1_SLEEP, MODE1_SLEEP);
+			pca9685_set_sleep_mode(pca, 1);
 
 			/* Change the chip-wide output frequency */
 			regmap_write(pca->regmap, PCA9685_PRESCALE, prescale);
 
 			/* Wake the chip up */
-			regmap_update_bits(pca->regmap, PCA9685_MODE1,
-					   MODE1_SLEEP, 0x0);
-
-			/* Wait 500us for the oscillator to be back up */
-			udelay(500);
+			pca9685_set_sleep_mode(pca, 0);
 
 			pca->period_ns = period_ns;
 		} else {
@@ -406,21 +420,15 @@ static int pca9685_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
 
 	if (pca9685_pwm_is_gpio(pca, pwm))
 		return -EBUSY;
-
-	if (pca->active_cnt++ == 0)
-		return regmap_update_bits(pca->regmap, PCA9685_MODE1,
-					  MODE1_SLEEP, 0x0);
+	pm_runtime_get_sync(chip->dev);
 
 	return 0;
 }
 
 static void pca9685_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-	struct pca9685 *pca = to_pca(chip);
-
-	if (--pca->active_cnt == 0)
-		regmap_update_bits(pca->regmap, PCA9685_MODE1, MODE1_SLEEP,
-				   MODE1_SLEEP);
+	pca9685_pwm_disable(chip, pwm);
+	pm_runtime_put(chip->dev);
 }
 
 static const struct pwm_ops pca9685_pwm_ops = {
@@ -492,22 +500,54 @@ static int pca9685_pwm_probe(struct i2c_client *client,
 		return ret;
 
 	ret = pca9685_pwm_gpio_probe(pca);
-	if (ret < 0)
+	if (ret < 0) {
 		pwmchip_remove(&pca->chip);
+		return ret;
+	}
+
+	/* the chip comes out of power-up in the active state */
+	pm_runtime_set_active(&client->dev);
+	/*
+	 * enable will put the chip into suspend, which is what we
+	 * want as all outputs are disabled at this point
+	 */
+	pm_runtime_enable(&client->dev);
 
-	return ret;
+	return 0;
 }
 
 static int pca9685_pwm_remove(struct i2c_client *client)
 {
 	struct pca9685 *pca = i2c_get_clientdata(client);
+	int ret;
 
-	regmap_update_bits(pca->regmap, PCA9685_MODE1, MODE1_SLEEP,
-			   MODE1_SLEEP);
+	ret = pwmchip_remove(&pca->chip);
+	if (ret)
+		return ret;
+	pm_runtime_disable(&client->dev);
+	return 0;
+}
 
-	return pwmchip_remove(&pca->chip);
+#ifdef CONFIG_PM
+static int pca9685_pwm_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct pca9685 *pca = i2c_get_clientdata(client);
+
+	pca9685_set_sleep_mode(pca, 1);
+	return 0;
 }
 
+static int pca9685_pwm_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct pca9685 *pca = i2c_get_clientdata(client);
+
+	pca9685_set_sleep_mode(pca, 0);
+	return 0;
+}
+#endif
+
 static const struct i2c_device_id pca9685_id[] = {
 	{ "pca9685", 0 },
 	{ /* sentinel */ },
@@ -530,11 +570,17 @@ static const struct of_device_id pca9685_dt_ids[] = {
 MODULE_DEVICE_TABLE(of, pca9685_dt_ids);
 #endif
 
+static const struct dev_pm_ops pca9685_pwm_pm = {
+	SET_RUNTIME_PM_OPS(pca9685_pwm_runtime_suspend,
+			   pca9685_pwm_runtime_resume, NULL)
+};
+
 static struct i2c_driver pca9685_i2c_driver = {
 	.driver = {
 		.name = "pca9685-pwm",
 		.acpi_match_table = ACPI_PTR(pca9685_acpi_ids),
 		.of_match_table = of_match_ptr(pca9685_dt_ids),
+		.pm = &pca9685_pwm_pm,
 	},
 	.probe = pca9685_pwm_probe,
 	.remove = pca9685_pwm_remove,
-- 
cgit v1.2.3


From 46fa8bc08744c373d405a87430e0b3e13ae94b4c Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Thu, 13 Apr 2017 19:40:27 +0530
Subject: pwm: tegra: Read PWM clock source rate in driver init

It is required to know the PWM clock source frequency to calculate the
PWM period.

In driver, the clock source frequency of the PWM does not get change
and, hence, get the clock source frequency in driver init. Get this
values later for period calculation from pwm_config().

This will help in avoiding the clock call for getting clock rate in the
pwm_config() each time.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-tegra.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index c040f87ee144..8c6ed556db28 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -50,6 +50,8 @@ struct tegra_pwm_chip {
 	struct clk *clk;
 	struct reset_control*rst;
 
+	unsigned long clk_rate;
+
 	void __iomem *regs;
 
 	const struct tegra_pwm_soc *soc;
@@ -94,7 +96,7 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	 * Compute the prescaler value for which (1 << PWM_DUTY_WIDTH)
 	 * cycles at the PWM clock rate will take period_ns nanoseconds.
 	 */
-	rate = clk_get_rate(pc->clk) >> PWM_DUTY_WIDTH;
+	rate = pc->clk_rate >> PWM_DUTY_WIDTH;
 
 	/* Consider precision in PWM_SCALE_WIDTH rate calculation */
 	hz = DIV_ROUND_CLOSEST_ULL(100ULL * NSEC_PER_SEC, period_ns);
@@ -199,6 +201,9 @@ static int tegra_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(pwm->clk))
 		return PTR_ERR(pwm->clk);
 
+	/* Read PWM clock rate from source */
+	pwm->clk_rate = clk_get_rate(pwm->clk);
+
 	pwm->rst = devm_reset_control_get(&pdev->dev, "pwm");
 	if (IS_ERR(pwm->rst)) {
 		ret = PTR_ERR(pwm->rst);
-- 
cgit v1.2.3


From 7ee06cb2f840a96be46233181ed4557901a74385 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 8 Apr 2017 19:54:20 +0200
Subject: x86: i8259: export legacy_pic symbol

The classic PC rtc-coms driver has a workaround for broken ACPI device
nodes for it which lack an irq resource. This workaround used to
unconditionally hardcode the irq to 8 in these cases.

This was causing irq conflict problems on systems without a legacy-pic
so a recent patch added an if (nr_legacy_irqs()) guard to the
workaround to avoid this irq conflict.

nr_legacy_irqs() uses the legacy_pic symbol under the hood causing
an undefined symbol error if the rtc-cmos code is build as a module.

This commit exports the legacy_pic symbol to fix this.

Cc: rtc-linux@googlegroups.com
Cc: alexandre.belloni@free-electrons.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/x86/kernel/i8259.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index be22f5a2192e..4e3b8a587c88 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -418,6 +418,7 @@ struct legacy_pic default_legacy_pic = {
 };
 
 struct legacy_pic *legacy_pic = &default_legacy_pic;
+EXPORT_SYMBOL(legacy_pic);
 
 static int __init i8259A_init_ops(void)
 {
-- 
cgit v1.2.3


From a1e23a42f1bdc00e32fc4869caef12e4e6272f26 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 18 Mar 2017 14:45:49 +0100
Subject: rtc: cmos: Do not assume irq 8 for rtc when there are no legacy irqs

On some systems (e.g. Intel Bay Trail systems) the legacy PIC is not
used, in this case virq 8 will be a random irq, rather then hw_irq 8
from the PIC.

Requesting virq 8 in this case will not help us to get alarm irqs and
may cause problems for other drivers which actually do need virq 8,
for example on an Asus Transformer T100TA this leads to:

[ 28.745155] genirq: Flags mismatch irq 8. 00000088 (mmc0) vs. 00000080 (rtc0)
<snip oops>
[ 28.753700] mmc0: Failed to request IRQ 8: -16
[ 28.975934] sdhci-acpi: probe of 80860F14:01 failed with error -16

This commit fixes this by making the rtc-cmos driver continue
without using an irq rather then claiming irq 8 when no irq is
specified in the pnp-info and there are no legacy-irqs.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-cmos.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index f4a96dbdabf2..b3de973a6260 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -41,6 +41,9 @@
 #include <linux/pm.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#ifdef CONFIG_X86
+#include <asm/i8259.h>
+#endif
 
 /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
 #include <linux/mc146818rtc.h>
@@ -1193,17 +1196,23 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 {
 	cmos_wake_setup(&pnp->dev);
 
-	if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0))
+	if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) {
+		unsigned int irq = 0;
+#ifdef CONFIG_X86
 		/* Some machines contain a PNP entry for the RTC, but
 		 * don't define the IRQ. It should always be safe to
-		 * hardcode it in these cases
+		 * hardcode it on systems with a legacy PIC.
 		 */
+		if (nr_legacy_irqs())
+			irq = 8;
+#endif
 		return cmos_do_probe(&pnp->dev,
-				pnp_get_resource(pnp, IORESOURCE_IO, 0), 8);
-	else
+				pnp_get_resource(pnp, IORESOURCE_IO, 0), irq);
+	} else {
 		return cmos_do_probe(&pnp->dev,
 				pnp_get_resource(pnp, IORESOURCE_IO, 0),
 				pnp_irq(pnp, 0));
+	}
 }
 
 static void cmos_pnp_remove(struct pnp_dev *pnp)
-- 
cgit v1.2.3


From 03a32da5ca63d418ab95063b06687e00a6df9855 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Sun, 26 Mar 2017 20:34:23 -0700
Subject: rtc: cpcap: fix improper use of IRQ_NONE for request_threaded_irq

There's a funny typo where IRQ_NONE is used instead of IRQF_TRIGGER_NONE
for request_threaded_irq(). Let's fix it before it gets copied elsewhere.

Fixes: dd3bf50b35e3 ("rtc: cpcap: new rtc driver")
Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-By: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-cpcap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c
index 5d163be98e9f..3a0333e1f21a 100644
--- a/drivers/rtc/rtc-cpcap.c
+++ b/drivers/rtc/rtc-cpcap.c
@@ -275,7 +275,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev)
 
 	rtc->alarm_irq = platform_get_irq(pdev, 0);
 	err = devm_request_threaded_irq(dev, rtc->alarm_irq, NULL,
-					cpcap_rtc_alarm_irq, IRQ_NONE,
+					cpcap_rtc_alarm_irq, IRQF_TRIGGER_NONE,
 					"rtc_alarm", rtc);
 	if (err) {
 		dev_err(dev, "Could not request alarm irq: %d\n", err);
@@ -291,7 +291,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev)
 	 */
 	rtc->update_irq = platform_get_irq(pdev, 1);
 	err = devm_request_threaded_irq(dev, rtc->update_irq, NULL,
-					cpcap_rtc_update_irq, IRQ_NONE,
+					cpcap_rtc_update_irq, IRQF_TRIGGER_NONE,
 					"rtc_1hz", rtc);
 	if (err) {
 		dev_err(dev, "Could not request update irq: %d\n", err);
-- 
cgit v1.2.3


From 8566f70c8a90f3914b06e934852596ba94aaa381 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Thu, 23 Mar 2017 16:54:57 -0700
Subject: rtc: ds1307: support m41t0 variant

The m41t0 variant is very similar to the already supported m41t00
variant, with the notable exception of the oscillator fail bit.
The data sheet notes:

  If the oscillator fail (OF) bit is internally set to a '1,' this
  indicates that the oscillator has either stopped, or was stopped
  for some period of time and can be used to judge the validity of
  the clock and date data.

The bit will get cleared with a regular write of the system time,
so no changes are needed to clear it.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 Documentation/devicetree/bindings/i2c/trivial-devices.txt |  1 +
 drivers/rtc/rtc-ds1307.c                                  | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
index ad10fbe61562..3e0a34c88e07 100644
--- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt
+++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
@@ -160,6 +160,7 @@ sii,s35390a		2-wire CMOS real-time clock
 silabs,si7020		Relative Humidity and Temperature Sensors
 skyworks,sky81452	Skyworks SKY81452: Six-Channel White LED Driver with Touch Panel Bias Supply
 st,24c256		i2c serial eeprom  (24cxx)
+st,m41t0		Serial real-time clock (RTC)
 st,m41t00		Serial real-time clock (RTC)
 st,m41t62		Serial real-time clock (RTC) with alarm
 st,m41t80		M41T80 - SERIAL ACCESS RTC WITH ALARMS
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index f085b487bbe7..e29481391496 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -39,6 +39,7 @@ enum ds_type {
 	ds_1340,
 	ds_1388,
 	ds_3231,
+	m41t0,
 	m41t00,
 	mcp794xx,
 	rx_8025,
@@ -53,6 +54,7 @@ enum ds_type {
 #	define DS1340_BIT_nEOSC		0x80
 #	define MCP794XX_BIT_ST		0x80
 #define DS1307_REG_MIN		0x01	/* 00-59 */
+#	define M41T0_BIT_OF		0x80
 #define DS1307_REG_HOUR		0x02	/* 00-23, or 1-12{am,pm} */
 #	define DS1307_BIT_12HR		0x40	/* in REG_HOUR */
 #	define DS1307_BIT_PM		0x20	/* in REG_HOUR */
@@ -183,6 +185,7 @@ static const struct i2c_device_id ds1307_id[] = {
 	{ "ds1388", ds_1388 },
 	{ "ds1340", ds_1340 },
 	{ "ds3231", ds_3231 },
+	{ "m41t0", m41t0 },
 	{ "m41t00", m41t00 },
 	{ "mcp7940x", mcp794xx },
 	{ "mcp7941x", mcp794xx },
@@ -261,6 +264,7 @@ static const struct acpi_device_id ds1307_acpi_ids[] = {
 	{ .id = "DS1388", .driver_data = ds_1388 },
 	{ .id = "DS1340", .driver_data = ds_1340 },
 	{ .id = "DS3231", .driver_data = ds_3231 },
+	{ .id = "M41T0", .driver_data = m41t0 },
 	{ .id = "M41T00", .driver_data = m41t00 },
 	{ .id = "MCP7940X", .driver_data = mcp794xx },
 	{ .id = "MCP7941X", .driver_data = mcp794xx },
@@ -456,6 +460,13 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 
 	dev_dbg(dev, "%s: %7ph\n", "read", ds1307->regs);
 
+	/* if oscillator fail bit is set, no data can be trusted */
+	if (ds1307->type == m41t0 &&
+	    ds1307->regs[DS1307_REG_MIN] & M41T0_BIT_OF) {
+		dev_warn_once(dev, "oscillator failed, set time!\n");
+		return -EINVAL;
+	}
+
 	t->tm_sec = bcd2bin(ds1307->regs[DS1307_REG_SECS] & 0x7f);
 	t->tm_min = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
 	tmp = ds1307->regs[DS1307_REG_HOUR] & 0x3f;
@@ -1578,6 +1589,7 @@ read_rtc:
 	tmp = ds1307->regs[DS1307_REG_SECS];
 	switch (ds1307->type) {
 	case ds_1307:
+	case m41t0:
 	case m41t00:
 		/* clock halted?  turn it on, so clock can tick. */
 		if (tmp & DS1307_BIT_CH) {
@@ -1642,6 +1654,7 @@ read_rtc:
 	tmp = ds1307->regs[DS1307_REG_HOUR];
 	switch (ds1307->type) {
 	case ds_1340:
+	case m41t0:
 	case m41t00:
 		/*
 		 * NOTE: ignores century bits; fix before deploying
-- 
cgit v1.2.3


From db2f814194eb5898b6e0a2de9700ef204f4d040f Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Sat, 8 Apr 2017 17:22:02 +0200
Subject: rtc: ds1307: Add m41t0 to OF device ID table

m41t0 was added to the I2C device ID table but not the OF table. Fix that.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index e29481391496..77339b3d50a1 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -226,6 +226,10 @@ static const struct of_device_id ds1307_of_match[] = {
 		.compatible = "maxim,ds3231",
 		.data = (void *)ds_3231
 	},
+	{
+		.compatible = "st,m41t0",
+		.data = (void *)m41t00
+	},
 	{
 		.compatible = "st,m41t00",
 		.data = (void *)m41t00
-- 
cgit v1.2.3


From b0fcb4b413028376894feaaaf62bcb09ab1b52f2 Mon Sep 17 00:00:00 2001
From: Mathias Kresin <dev@kresin.me>
Date: Thu, 13 Apr 2017 09:23:54 +0200
Subject: mtd: spi-nor: enable stateless 4b op codes for mx25u25635f

All required stateless 4-byte op codes are supported by this flash
chip. The stateless 4-byte support can't be autodetected due to a
missing 4-byte Address Instruction Table in SFDP.

Fixes hangs on reboot for SoCs expecting the flash chip in 3byte mode.

Signed-off-by: Mathias Kresin <dev@kresin.me>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 19b0b01be2d3..36684ca7aa24 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1023,7 +1023,7 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
 	{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
 	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
-	{ "mx25u25635f", INFO(0xc22539, 0, 64 * 1024, 512, SECT_4K) },
+	{ "mx25u25635f", INFO(0xc22539, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_4B_OPCODES) },
 	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
 	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_QUAD_READ) },
 	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
-- 
cgit v1.2.3


From aeea4c10a88c9ec405072cecb18939b0a9c876f6 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Thu, 2 Mar 2017 22:42:38 +0300
Subject: MAINTAINERS: add btrfs file entries for include directories

Add file entries for btrfs header files.

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Acked-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 676c139bc883..f3dae93a3c90 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2904,6 +2904,8 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git
 S:	Maintained
 F:	Documentation/filesystems/btrfs.txt
 F:	fs/btrfs/
+F:	include/linux/btrfs*
+F:	include/uapi/linux/btrfs*
 
 BTTV VIDEO4LINUX DRIVER
 M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
-- 
cgit v1.2.3


From 14506127979a5a3d0c5d9b4cc76ce9d4ec23b717 Mon Sep 17 00:00:00 2001
From: Adam Borowski <kilobyte@angband.pl>
Date: Tue, 7 Mar 2017 23:34:44 +0100
Subject: btrfs: fix a bogus warning when converting only data or metadata

If your filesystem has, eg, data:raid0 metadata:raid1, and you run "btrfs
balance -dconvert=raid1", the meta.target field will be uninitialized.
That's otherwise ok, as it's unused except for this warning.

Thus, let's use the existing set of raid levels for the comparison.

As a side effect, non-convert balances will now nag about data>metadata.

Signed-off-by: Adam Borowski <kilobyte@angband.pl>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ab8a66d852f9..02bbc4c8c755 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3755,6 +3755,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 		  struct btrfs_ioctl_balance_args *bargs)
 {
 	struct btrfs_fs_info *fs_info = bctl->fs_info;
+	u64 meta_target, data_target;
 	u64 allowed;
 	int mixed = 0;
 	int ret;
@@ -3851,11 +3852,16 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 		}
 	} while (read_seqretry(&fs_info->profiles_lock, seq));
 
-	if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
-		btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
+	/* if we're not converting, the target field is uninitialized */
+	meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+		bctl->meta.target : fs_info->avail_metadata_alloc_bits;
+	data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+		bctl->data.target : fs_info->avail_data_alloc_bits;
+	if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
+		btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
 		btrfs_warn(fs_info,
 			   "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
-			   bctl->meta.target, bctl->data.target);
+			   meta_target, data_target);
 	}
 
 	if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
-- 
cgit v1.2.3


From f95fda8751b7b243c810805d1deaae502716d8fb Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Thu, 2 Mar 2017 18:41:19 -0800
Subject: Btrfs: remove ASSERT in btrfs_truncate_inode_items

After 76b42abbf748 ("Btrfs: fix data loss after truncate when using the
no-holes feature"),

For either NO_HOLES or inline extents, we've set last_size to newsize to
avoid data loss after remount or inode got evicted and read again, thus,
we don't need this check anymore.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5e71f1ea3391..568b28114f09 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4603,13 +4603,6 @@ error:
 
 	btrfs_free_path(path);
 
-	if (err == 0) {
-		/* only inline file may have last_size != new_size */
-		if (new_size >= fs_info->sectorsize ||
-		    new_size > fs_info->max_inline)
-			ASSERT(last_size == new_size);
-	}
-
 	if (be_nice && bytes_deleted > SZ_32M) {
 		unsigned long updates = trans->delayed_ref_updates;
 		if (updates) {
-- 
cgit v1.2.3


From 140475ae4ad10d140bb69572499b1ff87367e807 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:10 +0200
Subject: btrfs: convert btrfs_bio.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 8 ++++----
 fs/btrfs/volumes.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 02bbc4c8c755..0f6706047167 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5301,22 +5301,22 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
 		GFP_NOFS|__GFP_NOFAIL);
 
 	atomic_set(&bbio->error, 0);
-	atomic_set(&bbio->refs, 1);
+	refcount_set(&bbio->refs, 1);
 
 	return bbio;
 }
 
 void btrfs_get_bbio(struct btrfs_bio *bbio)
 {
-	WARN_ON(!atomic_read(&bbio->refs));
-	atomic_inc(&bbio->refs);
+	WARN_ON(!refcount_read(&bbio->refs));
+	refcount_inc(&bbio->refs);
 }
 
 void btrfs_put_bbio(struct btrfs_bio *bbio)
 {
 	if (!bbio)
 		return;
-	if (atomic_dec_and_test(&bbio->refs))
+	if (refcount_dec_and_test(&bbio->refs))
 		kfree(bbio);
 }
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 59be81206dd7..ac0bf7d0df60 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -298,7 +298,7 @@ struct btrfs_bio;
 typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
 
 struct btrfs_bio {
-	atomic_t refs;
+	refcount_t refs;
 	atomic_t stripes_pending;
 	struct btrfs_fs_info *fs_info;
 	u64 map_type; /* get from map_lookup->type */
-- 
cgit v1.2.3


From 9b64f57ddf8673d29fafb3405d4aa1e93f5a4cd7 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:11 +0200
Subject: btrfs: convert btrfs_transaction.use_count from atomic_t to
 refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c      |  2 +-
 fs/btrfs/extent-tree.c  |  2 +-
 fs/btrfs/ordered-data.c |  2 +-
 fs/btrfs/transaction.c  | 20 ++++++++++----------
 fs/btrfs/transaction.h  |  4 +++-
 5 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index eb1ee7b6f532..e4aa64c263f9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4615,7 +4615,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
 		t = list_first_entry(&fs_info->trans_list,
 				     struct btrfs_transaction, list);
 		if (t->state >= TRANS_STATE_COMMIT_START) {
-			atomic_inc(&t->use_count);
+			refcount_inc(&t->use_count);
 			spin_unlock(&fs_info->trans_lock);
 			btrfs_wait_for_commit(fs_info, t->transid);
 			btrfs_put_transaction(t);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index be5477676cc8..85e51f311b57 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10850,7 +10850,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
 		spin_lock(&fs_info->trans_lock);
 		trans = fs_info->running_transaction;
 		if (trans)
-			atomic_inc(&trans->use_count);
+			refcount_inc(&trans->use_count);
 		spin_unlock(&fs_info->trans_lock);
 
 		ret = find_free_dev_extent_start(trans, device, minlen, start,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9a46878ba60f..da5399f9fb54 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -623,7 +623,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
 		spin_lock(&fs_info->trans_lock);
 		trans = fs_info->running_transaction;
 		if (trans)
-			atomic_inc(&trans->use_count);
+			refcount_inc(&trans->use_count);
 		spin_unlock(&fs_info->trans_lock);
 
 		ASSERT(trans);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 61b807de3e16..a7d7a7d1d78a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -60,8 +60,8 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
 
 void btrfs_put_transaction(struct btrfs_transaction *transaction)
 {
-	WARN_ON(atomic_read(&transaction->use_count) == 0);
-	if (atomic_dec_and_test(&transaction->use_count)) {
+	WARN_ON(refcount_read(&transaction->use_count) == 0);
+	if (refcount_dec_and_test(&transaction->use_count)) {
 		BUG_ON(!list_empty(&transaction->list));
 		WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
 		if (transaction->delayed_refs.pending_csums)
@@ -207,7 +207,7 @@ loop:
 			spin_unlock(&fs_info->trans_lock);
 			return -EBUSY;
 		}
-		atomic_inc(&cur_trans->use_count);
+		refcount_inc(&cur_trans->use_count);
 		atomic_inc(&cur_trans->num_writers);
 		extwriter_counter_inc(cur_trans, type);
 		spin_unlock(&fs_info->trans_lock);
@@ -257,7 +257,7 @@ loop:
 	 * One for this trans handle, one so it will live on until we
 	 * commit the transaction.
 	 */
-	atomic_set(&cur_trans->use_count, 2);
+	refcount_set(&cur_trans->use_count, 2);
 	atomic_set(&cur_trans->pending_ordered, 0);
 	cur_trans->flags = 0;
 	cur_trans->start_time = get_seconds();
@@ -432,7 +432,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info)
 	spin_lock(&fs_info->trans_lock);
 	cur_trans = fs_info->running_transaction;
 	if (cur_trans && is_transaction_blocked(cur_trans)) {
-		atomic_inc(&cur_trans->use_count);
+		refcount_inc(&cur_trans->use_count);
 		spin_unlock(&fs_info->trans_lock);
 
 		wait_event(fs_info->transaction_wait,
@@ -744,7 +744,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
 		list_for_each_entry(t, &fs_info->trans_list, list) {
 			if (t->transid == transid) {
 				cur_trans = t;
-				atomic_inc(&cur_trans->use_count);
+				refcount_inc(&cur_trans->use_count);
 				ret = 0;
 				break;
 			}
@@ -773,7 +773,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
 				if (t->state == TRANS_STATE_COMPLETED)
 					break;
 				cur_trans = t;
-				atomic_inc(&cur_trans->use_count);
+				refcount_inc(&cur_trans->use_count);
 				break;
 			}
 		}
@@ -1839,7 +1839,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 
 	/* take transaction reference */
 	cur_trans = trans->transaction;
-	atomic_inc(&cur_trans->use_count);
+	refcount_inc(&cur_trans->use_count);
 
 	btrfs_end_transaction(trans);
 
@@ -2015,7 +2015,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	spin_lock(&fs_info->trans_lock);
 	if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
 		spin_unlock(&fs_info->trans_lock);
-		atomic_inc(&cur_trans->use_count);
+		refcount_inc(&cur_trans->use_count);
 		ret = btrfs_end_transaction(trans);
 
 		wait_for_commit(cur_trans);
@@ -2035,7 +2035,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 		prev_trans = list_entry(cur_trans->list.prev,
 					struct btrfs_transaction, list);
 		if (prev_trans->state != TRANS_STATE_COMPLETED) {
-			atomic_inc(&prev_trans->use_count);
+			refcount_inc(&prev_trans->use_count);
 			spin_unlock(&fs_info->trans_lock);
 
 			wait_for_commit(prev_trans);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 5dfb5590fff6..902619f83db6 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -18,6 +18,8 @@
 
 #ifndef __BTRFS_TRANSACTION__
 #define __BTRFS_TRANSACTION__
+
+#include <linux/refcount.h>
 #include "btrfs_inode.h"
 #include "delayed-ref.h"
 #include "ctree.h"
@@ -49,7 +51,7 @@ struct btrfs_transaction {
 	 * transaction can end
 	 */
 	atomic_t num_writers;
-	atomic_t use_count;
+	refcount_t use_count;
 	atomic_t pending_ordered;
 
 	unsigned long flags;
-- 
cgit v1.2.3


From 490b54d6fb75f6ffd0471ec58bb38a992e2b40cd Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:12 +0200
Subject: btrfs: convert extent_map.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c         |  4 ++--
 fs/btrfs/extent_map.c        | 10 +++++-----
 fs/btrfs/extent_map.h        |  3 ++-
 fs/btrfs/tree-log.c          |  2 +-
 fs/btrfs/volumes.c           |  2 +-
 include/trace/events/btrfs.h |  2 +-
 6 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 27fdb250b446..3649932e48d5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2859,7 +2859,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
 		em = *em_cached;
 		if (extent_map_in_tree(em) && start >= em->start &&
 		    start < extent_map_end(em)) {
-			atomic_inc(&em->refs);
+			refcount_inc(&em->refs);
 			return em;
 		}
 
@@ -2870,7 +2870,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
 	em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
 	if (em_cached && !IS_ERR_OR_NULL(em)) {
 		BUG_ON(*em_cached);
-		atomic_inc(&em->refs);
+		refcount_inc(&em->refs);
 		*em_cached = em;
 	}
 	return em;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 26f9ac719d20..69850155870c 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -55,7 +55,7 @@ struct extent_map *alloc_extent_map(void)
 	em->flags = 0;
 	em->compress_type = BTRFS_COMPRESS_NONE;
 	em->generation = 0;
-	atomic_set(&em->refs, 1);
+	refcount_set(&em->refs, 1);
 	INIT_LIST_HEAD(&em->list);
 	return em;
 }
@@ -71,8 +71,8 @@ void free_extent_map(struct extent_map *em)
 {
 	if (!em)
 		return;
-	WARN_ON(atomic_read(&em->refs) == 0);
-	if (atomic_dec_and_test(&em->refs)) {
+	WARN_ON(refcount_read(&em->refs) == 0);
+	if (refcount_dec_and_test(&em->refs)) {
 		WARN_ON(extent_map_in_tree(em));
 		WARN_ON(!list_empty(&em->list));
 		if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
@@ -322,7 +322,7 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree,
 					struct extent_map *em,
 					int modified)
 {
-	atomic_inc(&em->refs);
+	refcount_inc(&em->refs);
 	em->mod_start = em->start;
 	em->mod_len = em->len;
 
@@ -381,7 +381,7 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
 	if (strict && !(end > em->start && start < extent_map_end(em)))
 		return NULL;
 
-	atomic_inc(&em->refs);
+	refcount_inc(&em->refs);
 	return em;
 }
 
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index eb8b8fae036b..a67b2def5413 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -2,6 +2,7 @@
 #define __EXTENTMAP__
 
 #include <linux/rbtree.h>
+#include <linux/refcount.h>
 
 #define EXTENT_MAP_LAST_BYTE ((u64)-4)
 #define EXTENT_MAP_HOLE ((u64)-3)
@@ -41,7 +42,7 @@ struct extent_map {
 		 */
 		struct map_lookup *map_lookup;
 	};
-	atomic_t refs;
+	refcount_t refs;
 	unsigned int compress_type;
 	struct list_head list;
 };
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a59674c3e69e..ccfe9fe7754a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4196,7 +4196,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 		if (em->generation <= test_gen)
 			continue;
 		/* Need a ref to keep it from getting evicted from cache */
-		atomic_inc(&em->refs);
+		refcount_inc(&em->refs);
 		set_bit(EXTENT_FLAG_LOGGING, &em->flags);
 		list_add_tail(&em->list, &extents);
 		num++;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0f6706047167..dce59fb59b0c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4839,7 +4839,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	ret = add_extent_mapping(em_tree, em, 0);
 	if (!ret) {
 		list_add_tail(&em->list, &trans->transaction->pending_chunks);
-		atomic_inc(&em->refs);
+		refcount_inc(&em->refs);
 	}
 	write_unlock(&em_tree->lock);
 	if (ret) {
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index a3c3cab643a9..9dd29e806fed 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -213,7 +213,7 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->block_start	= map->block_start;
 		__entry->block_len	= map->block_len;
 		__entry->flags		= map->flags;
-		__entry->refs		= atomic_read(&map->refs);
+		__entry->refs		= refcount_read(&map->refs);
 		__entry->compress_type	= map->compress_type;
 	),
 
-- 
cgit v1.2.3


From e76edab7f059bc1047c1865141e2709d70e74852 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:13 +0200
Subject: btrfs: convert btrfs_ordered_extent.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ordered-data.c      | 18 +++++++++---------
 fs/btrfs/ordered-data.h      |  2 +-
 include/trace/events/btrfs.h |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index da5399f9fb54..7b40e2e7292a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -212,7 +212,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 		set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
 
 	/* one ref for the tree */
-	atomic_set(&entry->refs, 1);
+	refcount_set(&entry->refs, 1);
 	init_waitqueue_head(&entry->wait);
 	INIT_LIST_HEAD(&entry->list);
 	INIT_LIST_HEAD(&entry->root_extent_list);
@@ -358,7 +358,7 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
 out:
 	if (!ret && cached && entry) {
 		*cached = entry;
-		atomic_inc(&entry->refs);
+		refcount_inc(&entry->refs);
 	}
 	spin_unlock_irqrestore(&tree->lock, flags);
 	return ret == 0;
@@ -425,7 +425,7 @@ have_entry:
 out:
 	if (!ret && cached && entry) {
 		*cached = entry;
-		atomic_inc(&entry->refs);
+		refcount_inc(&entry->refs);
 	}
 	spin_unlock_irqrestore(&tree->lock, flags);
 	return ret == 0;
@@ -456,7 +456,7 @@ void btrfs_get_logged_extents(struct btrfs_inode *inode,
 		if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
 			continue;
 		list_add(&ordered->log_list, logged_list);
-		atomic_inc(&ordered->refs);
+		refcount_inc(&ordered->refs);
 	}
 	spin_unlock_irq(&tree->lock);
 }
@@ -565,7 +565,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
 
 	trace_btrfs_ordered_extent_put(entry->inode, entry);
 
-	if (atomic_dec_and_test(&entry->refs)) {
+	if (refcount_dec_and_test(&entry->refs)) {
 		ASSERT(list_empty(&entry->log_list));
 		ASSERT(list_empty(&entry->trans_list));
 		ASSERT(list_empty(&entry->root_extent_list));
@@ -690,7 +690,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
 
 		list_move_tail(&ordered->root_extent_list,
 			       &root->ordered_extents);
-		atomic_inc(&ordered->refs);
+		refcount_inc(&ordered->refs);
 		spin_unlock(&root->ordered_extent_lock);
 
 		btrfs_init_work(&ordered->flush_work,
@@ -870,7 +870,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
 	if (!offset_in_entry(entry, file_offset))
 		entry = NULL;
 	if (entry)
-		atomic_inc(&entry->refs);
+		refcount_inc(&entry->refs);
 out:
 	spin_unlock_irq(&tree->lock);
 	return entry;
@@ -911,7 +911,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
 	}
 out:
 	if (entry)
-		atomic_inc(&entry->refs);
+		refcount_inc(&entry->refs);
 	spin_unlock_irq(&tree->lock);
 	return entry;
 }
@@ -948,7 +948,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
 		goto out;
 
 	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
-	atomic_inc(&entry->refs);
+	refcount_inc(&entry->refs);
 out:
 	spin_unlock_irq(&tree->lock);
 	return entry;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 195c93b67fe0..e0c1d5b8d859 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -113,7 +113,7 @@ struct btrfs_ordered_extent {
 	int compress_type;
 
 	/* reference count */
-	atomic_t refs;
+	refcount_t refs;
 
 	/* the inode we belong to */
 	struct inode *inode;
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 9dd29e806fed..8f206263fee7 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -275,7 +275,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__entry->bytes_left	= ordered->bytes_left;
 		__entry->flags		= ordered->flags;
 		__entry->compress_type	= ordered->compress_type;
-		__entry->refs		= atomic_read(&ordered->refs);
+		__entry->refs		= refcount_read(&ordered->refs);
 		__entry->root_objectid	=
 				BTRFS_I(inode)->root->root_key.objectid;
 		__entry->truncated_len	= ordered->truncated_len;
-- 
cgit v1.2.3


From 1e4f4714d59e3fdcde29ee12d40d2e96875a026f Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:14 +0200
Subject: btrfs: convert btrfs_caching_control.count from atomic_t to
 refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  3 ++-
 fs/btrfs/extent-tree.c | 12 ++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c4115901d906..cdfc2a46448b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -39,6 +39,7 @@
 #include <linux/security.h>
 #include <linux/sizes.h>
 #include <linux/dynamic_debug.h>
+#include <linux/refcount.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "async-thread.h"
@@ -518,7 +519,7 @@ struct btrfs_caching_control {
 	struct btrfs_work work;
 	struct btrfs_block_group_cache *block_group;
 	u64 progress;
-	atomic_t count;
+	refcount_t count;
 };
 
 /* Once caching_thread() finds this much free space, it will wake up waiters. */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 85e51f311b57..5c84eea60703 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -316,14 +316,14 @@ get_caching_control(struct btrfs_block_group_cache *cache)
 	}
 
 	ctl = cache->caching_ctl;
-	atomic_inc(&ctl->count);
+	refcount_inc(&ctl->count);
 	spin_unlock(&cache->lock);
 	return ctl;
 }
 
 static void put_caching_control(struct btrfs_caching_control *ctl)
 {
-	if (atomic_dec_and_test(&ctl->count))
+	if (refcount_dec_and_test(&ctl->count))
 		kfree(ctl);
 }
 
@@ -599,7 +599,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 	init_waitqueue_head(&caching_ctl->wait);
 	caching_ctl->block_group = cache;
 	caching_ctl->progress = cache->key.objectid;
-	atomic_set(&caching_ctl->count, 1);
+	refcount_set(&caching_ctl->count, 1);
 	btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
 			caching_thread, NULL, NULL);
 
@@ -620,7 +620,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 		struct btrfs_caching_control *ctl;
 
 		ctl = cache->caching_ctl;
-		atomic_inc(&ctl->count);
+		refcount_inc(&ctl->count);
 		prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
 		spin_unlock(&cache->lock);
 
@@ -707,7 +707,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 	}
 
 	down_write(&fs_info->commit_root_sem);
-	atomic_inc(&caching_ctl->count);
+	refcount_inc(&caching_ctl->count);
 	list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
 	up_write(&fs_info->commit_root_sem);
 
@@ -10416,7 +10416,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 				    &fs_info->caching_block_groups, list)
 				if (ctl->block_group == block_group) {
 					caching_ctl = ctl;
-					atomic_inc(&caching_ctl->count);
+					refcount_inc(&caching_ctl->count);
 					break;
 				}
 		}
-- 
cgit v1.2.3


From 6df8cdf5bda221f268ac23940bce589ad176993d Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:15 +0200
Subject: btrfs: convert btrfs_delayed_ref_node.refs from atomic_t to
 refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/backref.c     | 2 +-
 fs/btrfs/delayed-ref.c | 8 ++++----
 fs/btrfs/delayed-ref.h | 8 +++++---
 fs/btrfs/disk-io.c     | 2 +-
 fs/btrfs/extent-tree.c | 6 +++---
 5 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7699e16784d3..116338344224 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1286,7 +1286,7 @@ again:
 		head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
 		if (head) {
 			if (!mutex_trylock(&head->mutex)) {
-				atomic_inc(&head->node.refs);
+				refcount_inc(&head->node.refs);
 				spin_unlock(&delayed_refs->lock);
 
 				btrfs_release_path(path);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 6eb80952efb3..be70d90dfee5 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -164,7 +164,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
 	if (mutex_trylock(&head->mutex))
 		return 0;
 
-	atomic_inc(&head->node.refs);
+	refcount_inc(&head->node.refs);
 	spin_unlock(&delayed_refs->lock);
 
 	mutex_lock(&head->mutex);
@@ -590,7 +590,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 	delayed_refs = &trans->transaction->delayed_refs;
 
 	/* first set the basic ref node struct up */
-	atomic_set(&ref->refs, 1);
+	refcount_set(&ref->refs, 1);
 	ref->bytenr = bytenr;
 	ref->num_bytes = num_bytes;
 	ref->ref_mod = count_mod;
@@ -682,7 +682,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 	delayed_refs = &trans->transaction->delayed_refs;
 
 	/* first set the basic ref node struct up */
-	atomic_set(&ref->refs, 1);
+	refcount_set(&ref->refs, 1);
 	ref->bytenr = bytenr;
 	ref->num_bytes = num_bytes;
 	ref->ref_mod = 1;
@@ -739,7 +739,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 		seq = atomic64_read(&fs_info->tree_mod_seq);
 
 	/* first set the basic ref node struct up */
-	atomic_set(&ref->refs, 1);
+	refcount_set(&ref->refs, 1);
 	ref->bytenr = bytenr;
 	ref->num_bytes = num_bytes;
 	ref->ref_mod = 1;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 0e537f98f1a1..c0264ff01b53 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -18,6 +18,8 @@
 #ifndef __DELAYED_REF__
 #define __DELAYED_REF__
 
+#include <linux/refcount.h>
+
 /* these are the possible values of struct btrfs_delayed_ref_node->action */
 #define BTRFS_ADD_DELAYED_REF    1 /* add one backref to the tree */
 #define BTRFS_DROP_DELAYED_REF   2 /* delete one backref from the tree */
@@ -53,7 +55,7 @@ struct btrfs_delayed_ref_node {
 	u64 seq;
 
 	/* ref count on this data structure */
-	atomic_t refs;
+	refcount_t refs;
 
 	/*
 	 * how many refs is this entry adding or deleting.  For
@@ -220,8 +222,8 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
 
 static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
 {
-	WARN_ON(atomic_read(&ref->refs) == 0);
-	if (atomic_dec_and_test(&ref->refs)) {
+	WARN_ON(refcount_read(&ref->refs) == 0);
+	if (refcount_dec_and_test(&ref->refs)) {
 		WARN_ON(ref->in_tree);
 		switch (ref->type) {
 		case BTRFS_TREE_BLOCK_REF_KEY:
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e4aa64c263f9..3748bc54a6ab 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4343,7 +4343,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 		head = rb_entry(node, struct btrfs_delayed_ref_head,
 				href_node);
 		if (!mutex_trylock(&head->mutex)) {
-			atomic_inc(&head->node.refs);
+			refcount_inc(&head->node.refs);
 			spin_unlock(&delayed_refs->lock);
 
 			mutex_lock(&head->mutex);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5c84eea60703..5a7ddcff406b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -892,7 +892,7 @@ search_again:
 	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
 	if (head) {
 		if (!mutex_trylock(&head->mutex)) {
-			atomic_inc(&head->node.refs);
+			refcount_inc(&head->node.refs);
 			spin_unlock(&delayed_refs->lock);
 
 			btrfs_release_path(path);
@@ -2980,7 +2980,7 @@ again:
 				struct btrfs_delayed_ref_node *ref;
 
 				ref = &head->node;
-				atomic_inc(&ref->refs);
+				refcount_inc(&ref->refs);
 
 				spin_unlock(&delayed_refs->lock);
 				/*
@@ -3057,7 +3057,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
 	}
 
 	if (!mutex_trylock(&head->mutex)) {
-		atomic_inc(&head->node.refs);
+		refcount_inc(&head->node.refs);
 		spin_unlock(&delayed_refs->lock);
 
 		btrfs_release_path(path);
-- 
cgit v1.2.3


From 6de5f18e7b0da0cdd265eda047a0bc4f48260bcb Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:16 +0200
Subject: btrfs: convert btrfs_delayed_node.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c | 28 ++++++++++++++--------------
 fs/btrfs/delayed-inode.h |  4 ++--
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 1aff676f0e5b..7396c36e0adb 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -52,7 +52,7 @@ static inline void btrfs_init_delayed_node(
 {
 	delayed_node->root = root;
 	delayed_node->inode_id = inode_id;
-	atomic_set(&delayed_node->refs, 0);
+	refcount_set(&delayed_node->refs, 0);
 	delayed_node->ins_root = RB_ROOT;
 	delayed_node->del_root = RB_ROOT;
 	mutex_init(&delayed_node->mutex);
@@ -81,7 +81,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 
 	node = READ_ONCE(btrfs_inode->delayed_node);
 	if (node) {
-		atomic_inc(&node->refs);
+		refcount_inc(&node->refs);
 		return node;
 	}
 
@@ -89,14 +89,14 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 	node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
 	if (node) {
 		if (btrfs_inode->delayed_node) {
-			atomic_inc(&node->refs);	/* can be accessed */
+			refcount_inc(&node->refs);	/* can be accessed */
 			BUG_ON(btrfs_inode->delayed_node != node);
 			spin_unlock(&root->inode_lock);
 			return node;
 		}
 		btrfs_inode->delayed_node = node;
 		/* can be accessed and cached in the inode */
-		atomic_add(2, &node->refs);
+		refcount_add(2, &node->refs);
 		spin_unlock(&root->inode_lock);
 		return node;
 	}
@@ -125,7 +125,7 @@ again:
 	btrfs_init_delayed_node(node, root, ino);
 
 	/* cached in the btrfs inode and can be accessed */
-	atomic_add(2, &node->refs);
+	refcount_set(&node->refs, 2);
 
 	ret = radix_tree_preload(GFP_NOFS);
 	if (ret) {
@@ -166,7 +166,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
 	} else {
 		list_add_tail(&node->n_list, &root->node_list);
 		list_add_tail(&node->p_list, &root->prepare_list);
-		atomic_inc(&node->refs);	/* inserted into list */
+		refcount_inc(&node->refs);	/* inserted into list */
 		root->nodes++;
 		set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
 	}
@@ -180,7 +180,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
 	spin_lock(&root->lock);
 	if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
 		root->nodes--;
-		atomic_dec(&node->refs);	/* not in the list */
+		refcount_dec(&node->refs);	/* not in the list */
 		list_del_init(&node->n_list);
 		if (!list_empty(&node->p_list))
 			list_del_init(&node->p_list);
@@ -201,7 +201,7 @@ static struct btrfs_delayed_node *btrfs_first_delayed_node(
 
 	p = delayed_root->node_list.next;
 	node = list_entry(p, struct btrfs_delayed_node, n_list);
-	atomic_inc(&node->refs);
+	refcount_inc(&node->refs);
 out:
 	spin_unlock(&delayed_root->lock);
 
@@ -228,7 +228,7 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
 		p = node->n_list.next;
 
 	next = list_entry(p, struct btrfs_delayed_node, n_list);
-	atomic_inc(&next->refs);
+	refcount_inc(&next->refs);
 out:
 	spin_unlock(&delayed_root->lock);
 
@@ -253,11 +253,11 @@ static void __btrfs_release_delayed_node(
 		btrfs_dequeue_delayed_node(delayed_root, delayed_node);
 	mutex_unlock(&delayed_node->mutex);
 
-	if (atomic_dec_and_test(&delayed_node->refs)) {
+	if (refcount_dec_and_test(&delayed_node->refs)) {
 		bool free = false;
 		struct btrfs_root *root = delayed_node->root;
 		spin_lock(&root->inode_lock);
-		if (atomic_read(&delayed_node->refs) == 0) {
+		if (refcount_read(&delayed_node->refs) == 0) {
 			radix_tree_delete(&root->delayed_nodes_tree,
 					  delayed_node->inode_id);
 			free = true;
@@ -286,7 +286,7 @@ static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
 	p = delayed_root->prepare_list.next;
 	list_del_init(p);
 	node = list_entry(p, struct btrfs_delayed_node, p_list);
-	atomic_inc(&node->refs);
+	refcount_inc(&node->refs);
 out:
 	spin_unlock(&delayed_root->lock);
 
@@ -1621,7 +1621,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
 	 * insert/delete delayed items in this period. So we also needn't
 	 * requeue or dequeue this delayed node.
 	 */
-	atomic_dec(&delayed_node->refs);
+	refcount_dec(&delayed_node->refs);
 
 	return true;
 }
@@ -1963,7 +1963,7 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
 		inode_id = delayed_nodes[n - 1]->inode_id + 1;
 
 		for (i = 0; i < n; i++)
-			atomic_inc(&delayed_nodes[i]->refs);
+			refcount_inc(&delayed_nodes[i]->refs);
 		spin_unlock(&root->inode_lock);
 
 		for (i = 0; i < n; i++) {
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 40327cc3b99a..2f494fdde0ac 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -26,7 +26,7 @@
 #include <linux/list.h>
 #include <linux/wait.h>
 #include <linux/atomic.h>
-
+#include <linux/refcount.h>
 #include "ctree.h"
 
 /* types of the delayed item */
@@ -67,7 +67,7 @@ struct btrfs_delayed_node {
 	struct rb_root del_root;
 	struct mutex mutex;
 	struct btrfs_inode_item inode_item;
-	atomic_t refs;
+	refcount_t refs;
 	u64 index_cnt;
 	unsigned long flags;
 	int count;
-- 
cgit v1.2.3


From 089e77e10d7e44a007101cf7323dc7bda66172f4 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:17 +0200
Subject: btrfs: convert btrfs_delayed_item.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c | 18 +++++++++---------
 fs/btrfs/delayed-inode.h |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 7396c36e0adb..8ae409b5a61d 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -308,7 +308,7 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
 		item->ins_or_del = 0;
 		item->bytes_reserved = 0;
 		item->delayed_node = NULL;
-		atomic_set(&item->refs, 1);
+		refcount_set(&item->refs, 1);
 	}
 	return item;
 }
@@ -483,7 +483,7 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
 {
 	if (item) {
 		__btrfs_remove_delayed_item(item);
-		if (atomic_dec_and_test(&item->refs))
+		if (refcount_dec_and_test(&item->refs))
 			kfree(item);
 	}
 }
@@ -1600,14 +1600,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
 	mutex_lock(&delayed_node->mutex);
 	item = __btrfs_first_delayed_insertion_item(delayed_node);
 	while (item) {
-		atomic_inc(&item->refs);
+		refcount_inc(&item->refs);
 		list_add_tail(&item->readdir_list, ins_list);
 		item = __btrfs_next_delayed_item(item);
 	}
 
 	item = __btrfs_first_delayed_deletion_item(delayed_node);
 	while (item) {
-		atomic_inc(&item->refs);
+		refcount_inc(&item->refs);
 		list_add_tail(&item->readdir_list, del_list);
 		item = __btrfs_next_delayed_item(item);
 	}
@@ -1634,13 +1634,13 @@ void btrfs_readdir_put_delayed_items(struct inode *inode,
 
 	list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
 		list_del(&curr->readdir_list);
-		if (atomic_dec_and_test(&curr->refs))
+		if (refcount_dec_and_test(&curr->refs))
 			kfree(curr);
 	}
 
 	list_for_each_entry_safe(curr, next, del_list, readdir_list) {
 		list_del(&curr->readdir_list);
-		if (atomic_dec_and_test(&curr->refs))
+		if (refcount_dec_and_test(&curr->refs))
 			kfree(curr);
 	}
 
@@ -1667,7 +1667,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
 		list_del(&curr->readdir_list);
 		ret = (curr->key.offset == index);
 
-		if (atomic_dec_and_test(&curr->refs))
+		if (refcount_dec_and_test(&curr->refs))
 			kfree(curr);
 
 		if (ret)
@@ -1705,7 +1705,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
 		list_del(&curr->readdir_list);
 
 		if (curr->key.offset < ctx->pos) {
-			if (atomic_dec_and_test(&curr->refs))
+			if (refcount_dec_and_test(&curr->refs))
 				kfree(curr);
 			continue;
 		}
@@ -1722,7 +1722,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
 		over = !dir_emit(ctx, name, name_len,
 			       location.objectid, d_type);
 
-		if (atomic_dec_and_test(&curr->refs))
+		if (refcount_dec_and_test(&curr->refs))
 			kfree(curr);
 
 		if (over)
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 2f494fdde0ac..c4189d495934 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -80,7 +80,7 @@ struct btrfs_delayed_item {
 	struct list_head readdir_list;	/* used for readdir items */
 	u64 bytes_reserved;
 	struct btrfs_delayed_node *delayed_node;
-	atomic_t refs;
+	refcount_t refs;
 	int ins_or_del;
 	u32 data_len;
 	char data[0];
-- 
cgit v1.2.3


From 0700cea7c8b387c8c6bc4de79b197baa0b3fc4a3 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:18 +0200
Subject: btrfs: convert btrfs_root.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h   | 2 +-
 fs/btrfs/disk-io.c | 2 +-
 fs/btrfs/disk-io.h | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index cdfc2a46448b..285566cc2f7d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1222,7 +1222,7 @@ struct btrfs_root {
 	dev_t anon_dev;
 
 	spinlock_t root_item_lock;
-	atomic_t refs;
+	refcount_t refs;
 
 	struct mutex delalloc_mutex;
 	spinlock_t delalloc_lock;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3748bc54a6ab..bd415e1dd114 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1340,7 +1340,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 	atomic_set(&root->log_writers, 0);
 	atomic_set(&root->log_batch, 0);
 	atomic_set(&root->orphan_inodes, 0);
-	atomic_set(&root->refs, 1);
+	refcount_set(&root->refs, 1);
 	atomic_set(&root->will_be_snapshoted, 0);
 	atomic64_set(&root->qgroup_meta_rsv, 0);
 	root->log_transid = 0;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 2e0ec29bfd69..21f1ceb85b76 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -101,14 +101,14 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
  */
 static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root)
 {
-	if (atomic_inc_not_zero(&root->refs))
+	if (refcount_inc_not_zero(&root->refs))
 		return root;
 	return NULL;
 }
 
 static inline void btrfs_put_fs_root(struct btrfs_root *root)
 {
-	if (atomic_dec_and_test(&root->refs))
+	if (refcount_dec_and_test(&root->refs))
 		kfree(root);
 }
 
-- 
cgit v1.2.3


From b7ac31b7b2ebd735b7b67c85711ef6d16648051a Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:19 +0200
Subject: btrfs: convert extent_state.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 14 +++++++-------
 fs/btrfs/extent_io.h |  3 ++-
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3649932e48d5..174ffffd6d97 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -68,7 +68,7 @@ void btrfs_leak_debug_check(void)
 		pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
 		       state->start, state->end, state->state,
 		       extent_state_in_tree(state),
-		       atomic_read(&state->refs));
+		       refcount_read(&state->refs));
 		list_del(&state->leak_list);
 		kmem_cache_free(extent_state_cache, state);
 	}
@@ -238,7 +238,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
 	state->failrec = NULL;
 	RB_CLEAR_NODE(&state->rb_node);
 	btrfs_leak_debug_add(&state->leak_list, &states);
-	atomic_set(&state->refs, 1);
+	refcount_set(&state->refs, 1);
 	init_waitqueue_head(&state->wq);
 	trace_alloc_extent_state(state, mask, _RET_IP_);
 	return state;
@@ -248,7 +248,7 @@ void free_extent_state(struct extent_state *state)
 {
 	if (!state)
 		return;
-	if (atomic_dec_and_test(&state->refs)) {
+	if (refcount_dec_and_test(&state->refs)) {
 		WARN_ON(extent_state_in_tree(state));
 		btrfs_leak_debug_del(&state->leak_list);
 		trace_free_extent_state(state, _RET_IP_);
@@ -641,7 +641,7 @@ again:
 		if (cached && extent_state_in_tree(cached) &&
 		    cached->start <= start && cached->end > start) {
 			if (clear)
-				atomic_dec(&cached->refs);
+				refcount_dec(&cached->refs);
 			state = cached;
 			goto hit_next;
 		}
@@ -793,7 +793,7 @@ process_node:
 
 		if (state->state & bits) {
 			start = state->start;
-			atomic_inc(&state->refs);
+			refcount_inc(&state->refs);
 			wait_on_state(tree, state);
 			free_extent_state(state);
 			goto again;
@@ -834,7 +834,7 @@ static void cache_state_if_flags(struct extent_state *state,
 	if (cached_ptr && !(*cached_ptr)) {
 		if (!flags || (state->state & flags)) {
 			*cached_ptr = state;
-			atomic_inc(&state->refs);
+			refcount_inc(&state->refs);
 		}
 	}
 }
@@ -1538,7 +1538,7 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
 		if (!found) {
 			*start = state->start;
 			*cached_state = state;
-			atomic_inc(&state->refs);
+			refcount_inc(&state->refs);
 		}
 		found++;
 		*end = state->end;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 3e4fad4a909d..8d2d6e4272d5 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -2,6 +2,7 @@
 #define __EXTENTIO__
 
 #include <linux/rbtree.h>
+#include <linux/refcount.h>
 #include "ulist.h"
 
 /* bits for the extent state */
@@ -143,7 +144,7 @@ struct extent_state {
 
 	/* ADD NEW ELEMENTS AFTER THIS */
 	wait_queue_head_t wq;
-	atomic_t refs;
+	refcount_t refs;
 	unsigned state;
 
 	struct io_failure_record *failrec;
-- 
cgit v1.2.3


From a50299ae7cc4ba7268749e144e7c16321c25e3b3 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:20 +0200
Subject: btrfs: convert compressed_bio.pending_bios from atomic_t to
 refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c7721a6aa3bb..10e6b282d09d 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -44,7 +44,7 @@
 
 struct compressed_bio {
 	/* number of bios pending for this compressed extent */
-	atomic_t pending_bios;
+	refcount_t pending_bios;
 
 	/* the pages with the compressed data on them */
 	struct page **compressed_pages;
@@ -161,7 +161,7 @@ static void end_compressed_bio_read(struct bio *bio)
 	/* if there are more bios still pending for this compressed
 	 * extent, just exit
 	 */
-	if (!atomic_dec_and_test(&cb->pending_bios))
+	if (!refcount_dec_and_test(&cb->pending_bios))
 		goto out;
 
 	inode = cb->inode;
@@ -274,7 +274,7 @@ static void end_compressed_bio_write(struct bio *bio)
 	/* if there are more bios still pending for this compressed
 	 * extent, just exit
 	 */
-	if (!atomic_dec_and_test(&cb->pending_bios))
+	if (!refcount_dec_and_test(&cb->pending_bios))
 		goto out;
 
 	/* ok, we're the last bio for this extent, step one is to
@@ -342,7 +342,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
 	if (!cb)
 		return -ENOMEM;
-	atomic_set(&cb->pending_bios, 0);
+	refcount_set(&cb->pending_bios, 0);
 	cb->errors = 0;
 	cb->inode = inode;
 	cb->start = start;
@@ -363,7 +363,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	bio->bi_private = cb;
 	bio->bi_end_io = end_compressed_bio_write;
-	atomic_inc(&cb->pending_bios);
+	refcount_set(&cb->pending_bios, 1);
 
 	/* create and submit bios for the compressed pages */
 	bytes_left = compressed_len;
@@ -388,7 +388,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 			 * we inc the count.  Otherwise, the cb might get
 			 * freed before we're done setting it up
 			 */
-			atomic_inc(&cb->pending_bios);
+			refcount_inc(&cb->pending_bios);
 			ret = btrfs_bio_wq_end_io(fs_info, bio,
 						  BTRFS_WQ_ENDIO_DATA);
 			BUG_ON(ret); /* -ENOMEM */
@@ -607,7 +607,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	if (!cb)
 		goto out;
 
-	atomic_set(&cb->pending_bios, 0);
+	refcount_set(&cb->pending_bios, 0);
 	cb->errors = 0;
 	cb->inode = inode;
 	cb->mirror_num = mirror_num;
@@ -656,7 +656,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
 	comp_bio->bi_private = cb;
 	comp_bio->bi_end_io = end_compressed_bio_read;
-	atomic_inc(&cb->pending_bios);
+	refcount_set(&cb->pending_bios, 1);
 
 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
 		page = cb->compressed_pages[pg_index];
@@ -685,7 +685,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			 * we inc the count.  Otherwise, the cb might get
 			 * freed before we're done setting it up
 			 */
-			atomic_inc(&cb->pending_bios);
+			refcount_inc(&cb->pending_bios);
 
 			if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
 				ret = btrfs_lookup_bio_sums(inode, comp_bio,
-- 
cgit v1.2.3


From 6f615018b35fb5ea1ce49673079fc637a0b6be70 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:21 +0200
Subject: btrfs: convert scrub_recover.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b0251eb1239f..c9406bf302b8 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -64,7 +64,7 @@ struct scrub_ctx;
 #define SCRUB_MAX_PAGES_PER_BLOCK	16	/* 64k per node/leaf/sector */
 
 struct scrub_recover {
-	atomic_t		refs;
+	refcount_t		refs;
 	struct btrfs_bio	*bbio;
 	u64			map_length;
 };
@@ -857,12 +857,12 @@ out:
 
 static inline void scrub_get_recover(struct scrub_recover *recover)
 {
-	atomic_inc(&recover->refs);
+	refcount_inc(&recover->refs);
 }
 
 static inline void scrub_put_recover(struct scrub_recover *recover)
 {
-	if (atomic_dec_and_test(&recover->refs)) {
+	if (refcount_dec_and_test(&recover->refs)) {
 		btrfs_put_bbio(recover->bbio);
 		kfree(recover);
 	}
@@ -1343,7 +1343,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 			return -ENOMEM;
 		}
 
-		atomic_set(&recover->refs, 1);
+		refcount_set(&recover->refs, 1);
 		recover->bbio = bbio;
 		recover->map_length = mapped_length;
 
-- 
cgit v1.2.3


From 186debd6ede29da9d51cffdb93cadee66e4e1e23 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:23 +0200
Subject: btrfs: convert scrub_block.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c9406bf302b8..d2c35848e61d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -112,7 +112,7 @@ struct scrub_block {
 	struct scrub_page	*pagev[SCRUB_MAX_PAGES_PER_BLOCK];
 	int			page_count;
 	atomic_t		outstanding_pages;
-	atomic_t		refs; /* free mem on transition to zero */
+	refcount_t		refs; /* free mem on transition to zero */
 	struct scrub_ctx	*sctx;
 	struct scrub_parity	*sparity;
 	struct {
@@ -1998,12 +1998,12 @@ static int scrub_checksum_super(struct scrub_block *sblock)
 
 static void scrub_block_get(struct scrub_block *sblock)
 {
-	atomic_inc(&sblock->refs);
+	refcount_inc(&sblock->refs);
 }
 
 static void scrub_block_put(struct scrub_block *sblock)
 {
-	if (atomic_dec_and_test(&sblock->refs)) {
+	if (refcount_dec_and_test(&sblock->refs)) {
 		int i;
 
 		if (sblock->sparity)
@@ -2255,7 +2255,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
 
 	/* one ref inside this function, plus one for each page added to
 	 * a bio later on */
-	atomic_set(&sblock->refs, 1);
+	refcount_set(&sblock->refs, 1);
 	sblock->sctx = sctx;
 	sblock->no_io_error_seen = 1;
 
@@ -2555,7 +2555,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
 
 	/* one ref inside this function, plus one for each page added to
 	 * a bio later on */
-	atomic_set(&sblock->refs, 1);
+	refcount_set(&sblock->refs, 1);
 	sblock->sctx = sctx;
 	sblock->no_io_error_seen = 1;
 	sblock->sparity = sparity;
-- 
cgit v1.2.3


From 78a764504d1e11411bec0e068c5d9e0a417aff08 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:24 +0200
Subject: btrfs: convert scrub_parity.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index d2c35848e61d..8130ab11821f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -142,7 +142,7 @@ struct scrub_parity {
 
 	int			stripe_len;
 
-	atomic_t		refs;
+	refcount_t		refs;
 
 	struct list_head	spages;
 
@@ -2822,12 +2822,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors)
 
 static void scrub_parity_get(struct scrub_parity *sparity)
 {
-	atomic_inc(&sparity->refs);
+	refcount_inc(&sparity->refs);
 }
 
 static void scrub_parity_put(struct scrub_parity *sparity)
 {
-	if (!atomic_dec_and_test(&sparity->refs))
+	if (!refcount_dec_and_test(&sparity->refs))
 		return;
 
 	scrub_parity_check_and_repair(sparity);
@@ -2879,7 +2879,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
 	sparity->scrub_dev = sdev;
 	sparity->logic_start = logic_start;
 	sparity->logic_end = logic_end;
-	atomic_set(&sparity->refs, 1);
+	refcount_set(&sparity->refs, 1);
 	INIT_LIST_HEAD(&sparity->spages);
 	sparity->dbitmap = sparity->bitmap;
 	sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
-- 
cgit v1.2.3


From 99f4cdb16f80e1b30392b479d852036f87394b20 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:25 +0200
Subject: btrfs: convert scrub_ctx.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 8130ab11821f..6ba0c9ddf777 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -202,7 +202,7 @@ struct scrub_ctx {
 	 * doesn't free the scrub context before or while the workers are
 	 * doing the wakeup() call.
 	 */
-	atomic_t                refs;
+	refcount_t              refs;
 };
 
 struct scrub_fixup_nodatasum {
@@ -305,7 +305,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx);
 
 static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
 {
-	atomic_inc(&sctx->refs);
+	refcount_inc(&sctx->refs);
 	atomic_inc(&sctx->bios_in_flight);
 }
 
@@ -356,7 +356,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
 {
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
 
-	atomic_inc(&sctx->refs);
+	refcount_inc(&sctx->refs);
 	/*
 	 * increment scrubs_running to prevent cancel requests from
 	 * completing as long as a worker is running. we must also
@@ -447,7 +447,7 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
 
 static void scrub_put_ctx(struct scrub_ctx *sctx)
 {
-	if (atomic_dec_and_test(&sctx->refs))
+	if (refcount_dec_and_test(&sctx->refs))
 		scrub_free_ctx(sctx);
 }
 
@@ -462,7 +462,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
 	sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
 	if (!sctx)
 		goto nomem;
-	atomic_set(&sctx->refs, 1);
+	refcount_set(&sctx->refs, 1);
 	sctx->is_dev_replace = is_dev_replace;
 	sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
 	sctx->curr = -1;
-- 
cgit v1.2.3


From dec95574f4e6545c701420b950278dc6f55d0368 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 10:55:26 +0200
Subject: btrfs: convert btrfs_raid_bio.refs from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/raid56.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 1571bf26dc07..a8954f5188b4 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -149,7 +149,7 @@ struct btrfs_raid_bio {
 
 	int generic_bio_cnt;
 
-	atomic_t refs;
+	refcount_t refs;
 
 	atomic_t stripes_pending;
 
@@ -389,7 +389,7 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
 		if (bio_list_empty(&rbio->bio_list)) {
 			if (!list_empty(&rbio->hash_list)) {
 				list_del_init(&rbio->hash_list);
-				atomic_dec(&rbio->refs);
+				refcount_dec(&rbio->refs);
 				BUG_ON(!list_empty(&rbio->plug_list));
 			}
 		}
@@ -480,7 +480,7 @@ static void cache_rbio(struct btrfs_raid_bio *rbio)
 
 	/* bump our ref if we were not in the list before */
 	if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
-		atomic_inc(&rbio->refs);
+		refcount_inc(&rbio->refs);
 
 	if (!list_empty(&rbio->stripe_cache)){
 		list_move(&rbio->stripe_cache, &table->stripe_cache);
@@ -689,7 +689,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
 			    test_bit(RBIO_CACHE_BIT, &cur->flags) &&
 			    !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
 				list_del_init(&cur->hash_list);
-				atomic_dec(&cur->refs);
+				refcount_dec(&cur->refs);
 
 				steal_rbio(cur, rbio);
 				cache_drop = cur;
@@ -738,7 +738,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
 		}
 	}
 lockit:
-	atomic_inc(&rbio->refs);
+	refcount_inc(&rbio->refs);
 	list_add(&rbio->hash_list, &h->hash_list);
 out:
 	spin_unlock_irqrestore(&h->lock, flags);
@@ -784,7 +784,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
 		}
 
 		list_del_init(&rbio->hash_list);
-		atomic_dec(&rbio->refs);
+		refcount_dec(&rbio->refs);
 
 		/*
 		 * we use the plug list to hold all the rbios
@@ -801,7 +801,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
 			list_del_init(&rbio->plug_list);
 
 			list_add(&next->hash_list, &h->hash_list);
-			atomic_inc(&next->refs);
+			refcount_inc(&next->refs);
 			spin_unlock(&rbio->bio_list_lock);
 			spin_unlock_irqrestore(&h->lock, flags);
 
@@ -843,8 +843,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
 {
 	int i;
 
-	WARN_ON(atomic_read(&rbio->refs) < 0);
-	if (!atomic_dec_and_test(&rbio->refs))
+	if (!refcount_dec_and_test(&rbio->refs))
 		return;
 
 	WARN_ON(!list_empty(&rbio->stripe_cache));
@@ -997,7 +996,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
 	rbio->stripe_npages = stripe_npages;
 	rbio->faila = -1;
 	rbio->failb = -1;
-	atomic_set(&rbio->refs, 1);
+	refcount_set(&rbio->refs, 1);
 	atomic_set(&rbio->error, 0);
 	atomic_set(&rbio->stripes_pending, 0);
 
-- 
cgit v1.2.3


From 09ed2f165cb3449237dec842b3564044e12d22cb Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Fri, 10 Mar 2017 11:09:48 -0800
Subject: Btrfs: add file item tracepoints

While debugging truncate problems, I found that these tracepoints could
help us quickly know what went wrong.

Two sets of tracepoints are created to track regular/prealloc file item
and inline file item respectively, I put inline as a separate one since
what inline file items cares about are way less than the regular one.

This adds four tracepoints:
- btrfs_get_extent_show_fi_regular
- btrfs_get_extent_show_fi_inline
- btrfs_truncate_show_fi_regular
- btrfs_truncate_show_fi_inline

Cc: David Sterba <dsterba@suse.cz>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ formatting adjustments ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c             |  15 +++++
 include/trace/events/btrfs.h | 139 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 154 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 568b28114f09..da71119b5b7c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4401,9 +4401,17 @@ search_again:
 			if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
 				item_end +=
 				    btrfs_file_extent_num_bytes(leaf, fi);
+
+				trace_btrfs_truncate_show_fi_regular(
+					BTRFS_I(inode), leaf, fi,
+					found_key.offset);
 			} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 				item_end += btrfs_file_extent_inline_len(leaf,
 							 path->slots[0], fi);
+
+				trace_btrfs_truncate_show_fi_inline(
+					BTRFS_I(inode), leaf, fi, path->slots[0],
+					found_key.offset);
 			}
 			item_end--;
 		}
@@ -6828,11 +6836,18 @@ again:
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
 		extent_end = extent_start +
 		       btrfs_file_extent_num_bytes(leaf, item);
+
+		trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
+						       extent_start);
 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
 		size_t size;
 		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
 		extent_end = ALIGN(extent_start + size,
 				   fs_info->sectorsize);
+
+		trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
+						      path->slots[0],
+						      extent_start);
 	}
 next:
 	if (start >= extent_end) {
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 8f206263fee7..cef39e2baf21 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -12,6 +12,7 @@ struct btrfs_root;
 struct btrfs_fs_info;
 struct btrfs_inode;
 struct extent_map;
+struct btrfs_file_extent_item;
 struct btrfs_ordered_extent;
 struct btrfs_delayed_ref_node;
 struct btrfs_delayed_tree_ref;
@@ -54,6 +55,12 @@ struct btrfs_qgroup_extent_record;
 	      (obj >= BTRFS_ROOT_TREE_OBJECTID &&			\
 	       obj <= BTRFS_QUOTA_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
 
+#define show_fi_type(type)						\
+	__print_symbolic(type,						\
+		 { BTRFS_FILE_EXTENT_INLINE,	"INLINE" },		\
+		 { BTRFS_FILE_EXTENT_REG,	"REG"	 },		\
+		 { BTRFS_FILE_EXTENT_PREALLOC,	"PREALLOC"})
+
 #define BTRFS_GROUP_FLAGS	\
 	{ BTRFS_BLOCK_GROUP_DATA,	"DATA"},	\
 	{ BTRFS_BLOCK_GROUP_SYSTEM,	"SYSTEM"},	\
@@ -232,6 +239,138 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		  __entry->refs, __entry->compress_type)
 );
 
+/* file extent item */
+DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, u64 start),
+
+	TP_ARGS(bi, l, fi, start),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	root_obj	)
+		__field(	u64,	ino		)
+		__field(	loff_t,	isize		)
+		__field(	u64,	disk_isize	)
+		__field(	u64,	num_bytes	)
+		__field(	u64,	ram_bytes	)
+		__field(	u64,	disk_bytenr	)
+		__field(	u64,	disk_num_bytes	)
+		__field(	u64,	extent_offset	)
+		__field(	u8,	extent_type	)
+		__field(	u8,	compression	)
+		__field(	u64,	extent_start	)
+		__field(	u64,	extent_end	)
+	),
+
+	TP_fast_assign_btrfs(bi->root->fs_info,
+		__entry->root_obj	= bi->root->objectid;
+		__entry->ino		= btrfs_ino(bi);
+		__entry->isize		= bi->vfs_inode.i_size;
+		__entry->disk_isize	= bi->disk_i_size;
+		__entry->num_bytes	= btrfs_file_extent_num_bytes(l, fi);
+		__entry->ram_bytes	= btrfs_file_extent_ram_bytes(l, fi);
+		__entry->disk_bytenr	= btrfs_file_extent_disk_bytenr(l, fi);
+		__entry->disk_num_bytes	= btrfs_file_extent_disk_num_bytes(l, fi);
+		__entry->extent_offset	= btrfs_file_extent_offset(l, fi);
+		__entry->extent_type	= btrfs_file_extent_type(l, fi);
+		__entry->compression	= btrfs_file_extent_compression(l, fi);
+		__entry->extent_start	= start;
+		__entry->extent_end	= (start + __entry->num_bytes);
+	),
+
+	TP_printk_btrfs(
+		"root=%llu(%s) inode=%llu size=%llu disk_isize=%llu "
+		"file extent range=[%llu %llu] "
+		"(num_bytes=%llu ram_bytes=%llu disk_bytenr=%llu "
+		"disk_num_bytes=%llu extent_offset=%llu type=%s "
+		"compression=%u",
+		show_root_type(__entry->root_obj), __entry->ino,
+		__entry->isize,
+		__entry->disk_isize, __entry->extent_start,
+		__entry->extent_end, __entry->num_bytes, __entry->ram_bytes,
+		__entry->disk_bytenr, __entry->disk_num_bytes,
+		__entry->extent_offset, show_fi_type(__entry->extent_type),
+		__entry->compression)
+);
+
+DECLARE_EVENT_CLASS(
+	btrfs__file_extent_item_inline,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, int slot, u64 start),
+
+	TP_ARGS(bi, l, fi, slot,  start),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	root_obj	)
+		__field(	u64,	ino		)
+		__field(	loff_t,	isize		)
+		__field(	u64,	disk_isize	)
+		__field(	u8,	extent_type	)
+		__field(	u8,	compression	)
+		__field(	u64,	extent_start	)
+		__field(	u64,	extent_end	)
+	),
+
+	TP_fast_assign_btrfs(
+		bi->root->fs_info,
+		__entry->root_obj	= bi->root->objectid;
+		__entry->ino		= btrfs_ino(bi);
+		__entry->isize		= bi->vfs_inode.i_size;
+		__entry->disk_isize	= bi->disk_i_size;
+		__entry->extent_type	= btrfs_file_extent_type(l, fi);
+		__entry->compression	= btrfs_file_extent_compression(l, fi);
+		__entry->extent_start	= start;
+		__entry->extent_end	= (start + btrfs_file_extent_inline_len(l, slot, fi));
+	),
+
+	TP_printk_btrfs(
+		"root=%llu(%s) inode=%llu size=%llu disk_isize=%llu "
+		"file extent range=[%llu %llu] "
+		"extent_type=%s compression=%u",
+		show_root_type(__entry->root_obj), __entry->ino, __entry->isize,
+		__entry->disk_isize, __entry->extent_start,
+		__entry->extent_end, show_fi_type(__entry->extent_type),
+		__entry->compression)
+);
+
+DEFINE_EVENT(
+	btrfs__file_extent_item_regular, btrfs_get_extent_show_fi_regular,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, u64 start),
+
+	TP_ARGS(bi, l, fi, start)
+);
+
+DEFINE_EVENT(
+	btrfs__file_extent_item_regular, btrfs_truncate_show_fi_regular,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, u64 start),
+
+	TP_ARGS(bi, l, fi, start)
+);
+
+DEFINE_EVENT(
+	btrfs__file_extent_item_inline, btrfs_get_extent_show_fi_inline,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, int slot, u64 start),
+
+	TP_ARGS(bi, l, fi, slot, start)
+);
+
+DEFINE_EVENT(
+	btrfs__file_extent_item_inline, btrfs_truncate_show_fi_inline,
+
+	TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
+		 struct btrfs_file_extent_item *fi, int slot, u64 start),
+
+	TP_ARGS(bi, l, fi, slot, start)
+);
+
 #define show_ordered_flags(flags)					   \
 	__print_flags(flags, "|",					   \
 		{ (1 << BTRFS_ORDERED_IO_DONE), 	"IO_DONE" 	}, \
-- 
cgit v1.2.3


From 592d92eeab3770b2525d2fc5589b205c9f8c33e3 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Tue, 14 Mar 2017 13:33:55 -0700
Subject: Btrfs: create a helper for getting chunk map

We have similar code here and there, this merges them into a helper.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c |   3 +-
 fs/btrfs/volumes.c   | 163 +++++++++++++++++----------------------------------
 fs/btrfs/volumes.h   |   2 +-
 3 files changed, 57 insertions(+), 111 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 174ffffd6d97..77bd016ba18b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2004,14 +2004,13 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
 	u64 map_length = 0;
 	u64 sector;
 	struct btrfs_bio *bbio = NULL;
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 	int ret;
 
 	ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
 	BUG_ON(!mirror_num);
 
 	/* we can't repair anything in raid56 yet */
-	if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
+	if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num))
 		return 0;
 
 	bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index dce59fb59b0c..122201897bee 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2795,10 +2795,38 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 
+static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
+					u64 logical, u64 length)
+{
+	struct extent_map_tree *em_tree;
+	struct extent_map *em;
+
+	em_tree = &fs_info->mapping_tree.map_tree;
+	read_lock(&em_tree->lock);
+	em = lookup_extent_mapping(em_tree, logical, length);
+	read_unlock(&em_tree->lock);
+
+	if (!em) {
+		btrfs_crit(fs_info, "unable to find logical %llu length %llu",
+			   logical, length);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (em->start > logical || em->start + em->len < logical) {
+		btrfs_crit(fs_info,
+			   "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
+			   logical, length, em->start, em->start + em->len);
+		free_extent_map(em);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* callers are responsible for dropping em's ref. */
+	return em;
+}
+
 int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 		       struct btrfs_fs_info *fs_info, u64 chunk_offset)
 {
-	struct extent_map_tree *em_tree;
 	struct extent_map *em;
 	struct map_lookup *map;
 	u64 dev_extent_len = 0;
@@ -2806,23 +2834,15 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 	int i, ret = 0;
 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 
-	em_tree = &fs_info->mapping_tree.map_tree;
-
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, chunk_offset, 1);
-	read_unlock(&em_tree->lock);
-
-	if (!em || em->start > chunk_offset ||
-	    em->start + em->len < chunk_offset) {
+	em = get_chunk_map(fs_info, chunk_offset, 1);
+	if (IS_ERR(em)) {
 		/*
 		 * This is a logic error, but we don't want to just rely on the
 		 * user having built with ASSERT enabled, so if ASSERT doesn't
 		 * do anything we still error out.
 		 */
 		ASSERT(0);
-		if (em)
-			free_extent_map(em);
-		return -EINVAL;
+		return PTR_ERR(em);
 	}
 	map = em->map_lookup;
 	mutex_lock(&fs_info->chunk_mutex);
@@ -4894,7 +4914,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
 	struct btrfs_device *device;
 	struct btrfs_chunk *chunk;
 	struct btrfs_stripe *stripe;
-	struct extent_map_tree *em_tree;
 	struct extent_map *em;
 	struct map_lookup *map;
 	size_t item_size;
@@ -4903,24 +4922,9 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
 	int i = 0;
 	int ret = 0;
 
-	em_tree = &fs_info->mapping_tree.map_tree;
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size);
-	read_unlock(&em_tree->lock);
-
-	if (!em) {
-		btrfs_crit(fs_info, "unable to find logical %Lu len %Lu",
-			   chunk_offset, chunk_size);
-		return -EINVAL;
-	}
-
-	if (em->start != chunk_offset || em->len != chunk_size) {
-		btrfs_crit(fs_info,
-			   "found a bad mapping, wanted %Lu-%Lu, found %Lu-%Lu",
-			    chunk_offset, chunk_size, em->start, em->len);
-		free_extent_map(em);
-		return -EINVAL;
-	}
+	em = get_chunk_map(fs_info, chunk_offset, chunk_size);
+	if (IS_ERR(em))
+		return PTR_ERR(em);
 
 	map = em->map_lookup;
 	item_size = btrfs_chunk_item_size(map->num_stripes);
@@ -5061,15 +5065,12 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 {
 	struct extent_map *em;
 	struct map_lookup *map;
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 	int readonly = 0;
 	int miss_ndevs = 0;
 	int i;
 
-	read_lock(&map_tree->map_tree.lock);
-	em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
-	read_unlock(&map_tree->map_tree.lock);
-	if (!em)
+	em = get_chunk_map(fs_info, chunk_offset, 1);
+	if (IS_ERR(em))
 		return 1;
 
 	map = em->map_lookup;
@@ -5123,34 +5124,19 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
 
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 {
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 	struct extent_map *em;
 	struct map_lookup *map;
-	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	int ret;
 
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, logical, len);
-	read_unlock(&em_tree->lock);
-
-	/*
-	 * We could return errors for these cases, but that could get ugly and
-	 * we'd probably do the same thing which is just not do anything else
-	 * and exit, so return 1 so the callers don't try to use other copies.
-	 */
-	if (!em) {
-		btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical,
-			    logical+len);
-		return 1;
-	}
-
-	if (em->start > logical || em->start + em->len < logical) {
-		btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got %Lu-%Lu",
-			   logical, logical+len, em->start,
-			   em->start + em->len);
-		free_extent_map(em);
+	em = get_chunk_map(fs_info, logical, len);
+	if (IS_ERR(em))
+		/*
+		 * We could return errors for these cases, but that could get
+		 * ugly and we'd probably do the same thing which is just not do
+		 * anything else and exit, so return 1 so the callers don't try
+		 * to use other copies.
+		 */
 		return 1;
-	}
 
 	map = em->map_lookup;
 	if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
@@ -5179,15 +5165,11 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 {
 	struct extent_map *em;
 	struct map_lookup *map;
-	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	unsigned long len = fs_info->sectorsize;
 
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, logical, len);
-	read_unlock(&em_tree->lock);
-	BUG_ON(!em);
+	em = get_chunk_map(fs_info, logical, len);
+	BUG_ON(IS_ERR(em));
 
-	BUG_ON(em->start > logical || em->start + em->len < logical);
 	map = em->map_lookup;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
 		len = map->stripe_len * nr_data_stripes(map);
@@ -5195,20 +5177,16 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 	return len;
 }
 
-int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
+int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
 			   u64 logical, u64 len, int mirror_num)
 {
 	struct extent_map *em;
 	struct map_lookup *map;
-	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	int ret = 0;
 
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, logical, len);
-	read_unlock(&em_tree->lock);
-	BUG_ON(!em);
+	em = get_chunk_map(fs_info, logical, len);
+	BUG_ON(IS_ERR(em));
 
-	BUG_ON(em->start > logical || em->start + em->len < logical);
 	map = em->map_lookup;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
 		ret = 1;
@@ -5328,8 +5306,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 {
 	struct extent_map *em;
 	struct map_lookup *map;
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
-	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	u64 offset;
 	u64 stripe_offset;
 	u64 stripe_end_offset;
@@ -5351,23 +5327,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	u64 physical_to_patch_in_first_stripe = 0;
 	u64 raid56_full_stripe_start = (u64)-1;
 
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, logical, *length);
-	read_unlock(&em_tree->lock);
-
-	if (!em) {
-		btrfs_crit(fs_info, "unable to find logical %llu len %llu",
-			logical, *length);
-		return -EINVAL;
-	}
-
-	if (em->start > logical || em->start + em->len < logical) {
-		btrfs_crit(fs_info,
-			   "found a bad mapping, wanted %Lu, found %Lu-%Lu",
-			   logical, em->start, em->start + em->len);
-		free_extent_map(em);
-		return -EINVAL;
-	}
+	em = get_chunk_map(fs_info, logical, *length);
+	if (IS_ERR(em))
+		return PTR_ERR(em);
 
 	map = em->map_lookup;
 	offset = logical - em->start;
@@ -5903,8 +5865,6 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
 		     u64 chunk_start, u64 physical, u64 devid,
 		     u64 **logical, int *naddrs, int *stripe_len)
 {
-	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
-	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	struct extent_map *em;
 	struct map_lookup *map;
 	u64 *buf;
@@ -5914,24 +5874,11 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
 	u64 rmap_len;
 	int i, j, nr = 0;
 
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, chunk_start, 1);
-	read_unlock(&em_tree->lock);
-
-	if (!em) {
-		btrfs_err(fs_info, "couldn't find em for chunk %Lu",
-			chunk_start);
+	em = get_chunk_map(fs_info, chunk_start, 1);
+	if (IS_ERR(em))
 		return -EIO;
-	}
 
-	if (em->start != chunk_start) {
-		btrfs_err(fs_info, "bad chunk start, em=%Lu, wanted=%Lu",
-		       em->start, chunk_start);
-		free_extent_map(em);
-		return -EIO;
-	}
 	map = em->map_lookup;
-
 	length = em->len;
 	rmap_len = map->stripe_len;
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ac0bf7d0df60..9ee6b706db31 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -475,7 +475,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
 					      struct btrfs_device *tgtdev);
 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
-int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
+int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
 			   u64 logical, u64 len, int mirror_num);
 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 				    struct btrfs_mapping_tree *map_tree,
-- 
cgit v1.2.3


From 0b3d4cd371edb6c4ef47f3fb0cb55bdd9aa471c1 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Tue, 14 Mar 2017 13:33:56 -0700
Subject: Btrfs: separate DISCARD from __btrfs_map_block

Since DISCARD is not as important as an operation like write, we don't
copy it to target device during replace, and it makes __btrfs_map_block
less complex.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 289 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 175 insertions(+), 114 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 122201897bee..d777d626aa87 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5298,6 +5298,158 @@ void btrfs_put_bbio(struct btrfs_bio *bbio)
 		kfree(bbio);
 }
 
+/* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */
+/*
+ * Please note that, discard won't be sent to target device of device
+ * replace.
+ */
+static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
+					 u64 logical, u64 length,
+					 struct btrfs_bio **bbio_ret)
+{
+	struct extent_map *em;
+	struct map_lookup *map;
+	struct btrfs_bio *bbio;
+	u64 offset;
+	u64 stripe_nr;
+	u64 stripe_nr_end;
+	u64 stripe_end_offset;
+	u64 stripe_cnt;
+	u64 stripe_len;
+	u64 stripe_offset;
+	u64 num_stripes;
+	u32 stripe_index;
+	u32 factor = 0;
+	u32 sub_stripes = 0;
+	u64 stripes_per_dev = 0;
+	u32 remaining_stripes = 0;
+	u32 last_stripe = 0;
+	int ret = 0;
+	int i;
+
+	/* discard always return a bbio */
+	ASSERT(bbio_ret);
+
+	em = get_chunk_map(fs_info, logical, length);
+	if (IS_ERR(em))
+		return PTR_ERR(em);
+
+	map = em->map_lookup;
+	/* we don't discard raid56 yet */
+	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	offset = logical - em->start;
+	length = min_t(u64, em->len - offset, length);
+
+	stripe_len = map->stripe_len;
+	/*
+	 * stripe_nr counts the total number of stripes we have to stride
+	 * to get to this block
+	 */
+	stripe_nr = div64_u64(offset, stripe_len);
+
+	/* stripe_offset is the offset of this block in its stripe */
+	stripe_offset = offset - stripe_nr * stripe_len;
+
+	stripe_nr_end = round_up(offset + length, map->stripe_len);
+	stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
+	stripe_cnt = stripe_nr_end - stripe_nr;
+	stripe_end_offset = stripe_nr_end * map->stripe_len -
+			    (offset + length);
+	/*
+	 * after this, stripe_nr is the number of stripes on this
+	 * device we have to walk to find the data, and stripe_index is
+	 * the number of our device in the stripe array
+	 */
+	num_stripes = 1;
+	stripe_index = 0;
+	if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+			 BTRFS_BLOCK_GROUP_RAID10)) {
+		if (map->type & BTRFS_BLOCK_GROUP_RAID0)
+			sub_stripes = 1;
+		else
+			sub_stripes = map->sub_stripes;
+
+		factor = map->num_stripes / sub_stripes;
+		num_stripes = min_t(u64, map->num_stripes,
+				    sub_stripes * stripe_cnt);
+		stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
+		stripe_index *= sub_stripes;
+		stripes_per_dev = div_u64_rem(stripe_cnt, factor,
+					      &remaining_stripes);
+		div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
+		last_stripe *= sub_stripes;
+	} else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+				BTRFS_BLOCK_GROUP_DUP)) {
+		num_stripes = map->num_stripes;
+	} else {
+		stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
+					&stripe_index);
+	}
+
+	bbio = alloc_btrfs_bio(num_stripes, 0);
+	if (!bbio) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < num_stripes; i++) {
+		bbio->stripes[i].physical =
+			map->stripes[stripe_index].physical +
+			stripe_offset + stripe_nr * map->stripe_len;
+		bbio->stripes[i].dev = map->stripes[stripe_index].dev;
+
+		if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+				 BTRFS_BLOCK_GROUP_RAID10)) {
+			bbio->stripes[i].length = stripes_per_dev *
+				map->stripe_len;
+
+			if (i / sub_stripes < remaining_stripes)
+				bbio->stripes[i].length +=
+					map->stripe_len;
+
+			/*
+			 * Special for the first stripe and
+			 * the last stripe:
+			 *
+			 * |-------|...|-------|
+			 *     |----------|
+			 *    off     end_off
+			 */
+			if (i < sub_stripes)
+				bbio->stripes[i].length -=
+					stripe_offset;
+
+			if (stripe_index >= last_stripe &&
+			    stripe_index <= (last_stripe +
+					     sub_stripes - 1))
+				bbio->stripes[i].length -=
+					stripe_end_offset;
+
+			if (i == sub_stripes - 1)
+				stripe_offset = 0;
+		} else {
+			bbio->stripes[i].length = length;
+		}
+
+		stripe_index++;
+		if (stripe_index == map->num_stripes) {
+			stripe_index = 0;
+			stripe_nr++;
+		}
+	}
+
+	*bbio_ret = bbio;
+	bbio->map_type = map->type;
+	bbio->num_stripes = num_stripes;
+out:
+	free_extent_map(em);
+	return ret;
+}
+
 static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 			     enum btrfs_map_op op,
 			     u64 logical, u64 *length,
@@ -5308,10 +5460,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	struct map_lookup *map;
 	u64 offset;
 	u64 stripe_offset;
-	u64 stripe_end_offset;
 	u64 stripe_nr;
-	u64 stripe_nr_orig;
-	u64 stripe_nr_end;
 	u64 stripe_len;
 	u32 stripe_index;
 	int i;
@@ -5327,6 +5476,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	u64 physical_to_patch_in_first_stripe = 0;
 	u64 raid56_full_stripe_start = (u64)-1;
 
+	if (op == BTRFS_MAP_DISCARD)
+		return __btrfs_map_block_for_discard(fs_info, logical,
+						     *length, bbio_ret);
+
 	em = get_chunk_map(fs_info, logical, *length);
 	if (IS_ERR(em))
 		return PTR_ERR(em);
@@ -5368,14 +5521,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		raid56_full_stripe_start *= full_stripe_len;
 	}
 
-	if (op == BTRFS_MAP_DISCARD) {
-		/* we don't discard raid56 yet */
-		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-			ret = -EOPNOTSUPP;
-			goto out;
-		}
-		*length = min_t(u64, em->len - offset, *length);
-	} else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+	if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
 		u64 max_len;
 		/* For writes to RAID[56], allow a full stripeset across all disks.
 		   For other RAID types and for RAID[56] reads, just allow a single
@@ -5406,8 +5552,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		btrfs_dev_replace_set_lock_blocking(dev_replace);
 
 	if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
-	    op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
-	    op != BTRFS_MAP_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) {
+	    op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS &&
+	    dev_replace->tgtdev != NULL) {
 		/*
 		 * in dev-replace case, for repair case (that's the only
 		 * case where the mirror is selected explicitly when
@@ -5487,24 +5633,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
 	num_stripes = 1;
 	stripe_index = 0;
-	stripe_nr_orig = stripe_nr;
-	stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
-	stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
-	stripe_end_offset = stripe_nr_end * map->stripe_len -
-			    (offset + *length);
-
 	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
-		if (op == BTRFS_MAP_DISCARD)
-			num_stripes = min_t(u64, map->num_stripes,
-					    stripe_nr_end - stripe_nr_orig);
 		stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
 				&stripe_index);
-		if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
-		    op != BTRFS_MAP_GET_READ_MIRRORS)
+		if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS)
 			mirror_num = 1;
 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD ||
-		    op == BTRFS_MAP_GET_READ_MIRRORS)
+		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
@@ -5517,8 +5652,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		}
 
 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD ||
-		    op == BTRFS_MAP_GET_READ_MIRRORS) {
+		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) {
 			num_stripes = map->num_stripes;
 		} else if (mirror_num) {
 			stripe_index = mirror_num - 1;
@@ -5534,10 +5668,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
 		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
 			num_stripes = map->sub_stripes;
-		else if (op == BTRFS_MAP_DISCARD)
-			num_stripes = min_t(u64, map->sub_stripes *
-					    (stripe_nr_end - stripe_nr_orig),
-					    map->num_stripes);
 		else if (mirror_num)
 			stripe_index += mirror_num - 1;
 		else {
@@ -5580,8 +5710,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 			/* We distribute the parity blocks across stripes */
 			div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
 					&stripe_index);
-			if ((op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
-			    op != BTRFS_MAP_GET_READ_MIRRORS) && mirror_num <= 1)
+			if ((op != BTRFS_MAP_WRITE &&
+			     op != BTRFS_MAP_GET_READ_MIRRORS) &&
+			    mirror_num <= 1)
 				mirror_num = 1;
 		}
 	} else {
@@ -5604,7 +5735,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
 	num_alloc_stripes = num_stripes;
 	if (dev_replace_is_ongoing) {
-		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD)
+		if (op == BTRFS_MAP_WRITE)
 			num_alloc_stripes <<= 1;
 		if (op == BTRFS_MAP_GET_READ_MIRRORS)
 			num_alloc_stripes++;
@@ -5647,84 +5778,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 				RAID6_Q_STRIPE;
 	}
 
-	if (op == BTRFS_MAP_DISCARD) {
-		u32 factor = 0;
-		u32 sub_stripes = 0;
-		u64 stripes_per_dev = 0;
-		u32 remaining_stripes = 0;
-		u32 last_stripe = 0;
-
-		if (map->type &
-		    (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
-			if (map->type & BTRFS_BLOCK_GROUP_RAID0)
-				sub_stripes = 1;
-			else
-				sub_stripes = map->sub_stripes;
-
-			factor = map->num_stripes / sub_stripes;
-			stripes_per_dev = div_u64_rem(stripe_nr_end -
-						      stripe_nr_orig,
-						      factor,
-						      &remaining_stripes);
-			div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
-			last_stripe *= sub_stripes;
-		}
 
-		for (i = 0; i < num_stripes; i++) {
-			bbio->stripes[i].physical =
-				map->stripes[stripe_index].physical +
-				stripe_offset + stripe_nr * map->stripe_len;
-			bbio->stripes[i].dev = map->stripes[stripe_index].dev;
-
-			if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
-					 BTRFS_BLOCK_GROUP_RAID10)) {
-				bbio->stripes[i].length = stripes_per_dev *
-							  map->stripe_len;
-
-				if (i / sub_stripes < remaining_stripes)
-					bbio->stripes[i].length +=
-						map->stripe_len;
-
-				/*
-				 * Special for the first stripe and
-				 * the last stripe:
-				 *
-				 * |-------|...|-------|
-				 *     |----------|
-				 *    off     end_off
-				 */
-				if (i < sub_stripes)
-					bbio->stripes[i].length -=
-						stripe_offset;
-
-				if (stripe_index >= last_stripe &&
-				    stripe_index <= (last_stripe +
-						     sub_stripes - 1))
-					bbio->stripes[i].length -=
-						stripe_end_offset;
-
-				if (i == sub_stripes - 1)
-					stripe_offset = 0;
-			} else
-				bbio->stripes[i].length = *length;
-
-			stripe_index++;
-			if (stripe_index == map->num_stripes) {
-				/* This could only happen for RAID0/10 */
-				stripe_index = 0;
-				stripe_nr++;
-			}
-		}
-	} else {
-		for (i = 0; i < num_stripes; i++) {
-			bbio->stripes[i].physical =
-				map->stripes[stripe_index].physical +
-				stripe_offset +
-				stripe_nr * map->stripe_len;
-			bbio->stripes[i].dev =
-				map->stripes[stripe_index].dev;
-			stripe_index++;
-		}
+	for (i = 0; i < num_stripes; i++) {
+		bbio->stripes[i].physical =
+			map->stripes[stripe_index].physical +
+			stripe_offset +
+			stripe_nr * map->stripe_len;
+		bbio->stripes[i].dev =
+			map->stripes[stripe_index].dev;
+		stripe_index++;
 	}
 
 	if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
@@ -5734,8 +5796,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		sort_parity_stripes(bbio, num_stripes);
 
 	tgtdev_indexes = 0;
-	if (dev_replace_is_ongoing &&
-	   (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) &&
+	if (dev_replace_is_ongoing && op == BTRFS_MAP_WRITE &&
 	    dev_replace->tgtdev != NULL) {
 		int index_where_to_add;
 		u64 srcdev_devid = dev_replace->srcdev->devid;
-- 
cgit v1.2.3


From 5ab56090b8824c79748931f322f71da1d702fa08 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Tue, 14 Mar 2017 13:33:57 -0700
Subject: Btrfs: introduce a function to get extra mirror from replace

As the part of getting extra mirror in __btrfs_map_block is
independent, this puts it into a separate function.

Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 161 +++++++++++++++++++++++++++++------------------------
 1 file changed, 89 insertions(+), 72 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d777d626aa87..8865e440736a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -139,6 +139,11 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
 static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
+static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+			     enum btrfs_map_op op,
+			     u64 logical, u64 *length,
+			     struct btrfs_bio **bbio_ret,
+			     int mirror_num, int need_raid_map);
 
 DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
@@ -5450,6 +5455,83 @@ out:
 	return ret;
 }
 
+/*
+ * In dev-replace case, for repair case (that's the only case where the mirror
+ * is selected explicitly when calling btrfs_map_block), blocks left of the
+ * left cursor can also be read from the target drive.
+ *
+ * For REQ_GET_READ_MIRRORS, the target drive is added as the last one to the
+ * array of stripes.
+ * For READ, it also needs to be supported using the same mirror number.
+ *
+ * If the requested block is not left of the left cursor, EIO is returned. This
+ * can happen because btrfs_num_copies() returns one more in the dev-replace
+ * case.
+ */
+static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
+					 u64 logical, u64 length,
+					 u64 srcdev_devid, int *mirror_num,
+					 u64 *physical)
+{
+	struct btrfs_bio *bbio = NULL;
+	int num_stripes;
+	int index_srcdev = 0;
+	int found = 0;
+	u64 physical_of_found = 0;
+	int i;
+	int ret = 0;
+
+	ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
+				logical, &length, &bbio, 0, 0);
+	if (ret) {
+		ASSERT(bbio == NULL);
+		return ret;
+	}
+
+	num_stripes = bbio->num_stripes;
+	if (*mirror_num > num_stripes) {
+		/*
+		 * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror,
+		 * that means that the requested area is not left of the left
+		 * cursor
+		 */
+		btrfs_put_bbio(bbio);
+		return -EIO;
+	}
+
+	/*
+	 * process the rest of the function using the mirror_num of the source
+	 * drive. Therefore look it up first.  At the end, patch the device
+	 * pointer to the one of the target drive.
+	 */
+	for (i = 0; i < num_stripes; i++) {
+		if (bbio->stripes[i].dev->devid != srcdev_devid)
+			continue;
+
+		/*
+		 * In case of DUP, in order to keep it simple, only add the
+		 * mirror with the lowest physical address
+		 */
+		if (found &&
+		    physical_of_found <= bbio->stripes[i].physical)
+			continue;
+
+		index_srcdev = i;
+		found = 1;
+		physical_of_found = bbio->stripes[i].physical;
+	}
+
+	btrfs_put_bbio(bbio);
+
+	ASSERT(found);
+	if (!found)
+		return -EIO;
+
+	*mirror_num = index_srcdev + 1;
+	*physical = physical_of_found;
+	return ret;
+}
+
 static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 			     enum btrfs_map_op op,
 			     u64 logical, u64 *length,
@@ -5554,79 +5636,14 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
 	    op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS &&
 	    dev_replace->tgtdev != NULL) {
-		/*
-		 * in dev-replace case, for repair case (that's the only
-		 * case where the mirror is selected explicitly when
-		 * calling btrfs_map_block), blocks left of the left cursor
-		 * can also be read from the target drive.
-		 * For REQ_GET_READ_MIRRORS, the target drive is added as
-		 * the last one to the array of stripes. For READ, it also
-		 * needs to be supported using the same mirror number.
-		 * If the requested block is not left of the left cursor,
-		 * EIO is returned. This can happen because btrfs_num_copies()
-		 * returns one more in the dev-replace case.
-		 */
-		u64 tmp_length = *length;
-		struct btrfs_bio *tmp_bbio = NULL;
-		int tmp_num_stripes;
-		u64 srcdev_devid = dev_replace->srcdev->devid;
-		int index_srcdev = 0;
-		int found = 0;
-		u64 physical_of_found = 0;
-
-		ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
-			     logical, &tmp_length, &tmp_bbio, 0, 0);
-		if (ret) {
-			WARN_ON(tmp_bbio != NULL);
-			goto out;
-		}
-
-		tmp_num_stripes = tmp_bbio->num_stripes;
-		if (mirror_num > tmp_num_stripes) {
-			/*
-			 * BTRFS_MAP_GET_READ_MIRRORS does not contain this
-			 * mirror, that means that the requested area
-			 * is not left of the left cursor
-			 */
-			ret = -EIO;
-			btrfs_put_bbio(tmp_bbio);
-			goto out;
-		}
-
-		/*
-		 * process the rest of the function using the mirror_num
-		 * of the source drive. Therefore look it up first.
-		 * At the end, patch the device pointer to the one of the
-		 * target drive.
-		 */
-		for (i = 0; i < tmp_num_stripes; i++) {
-			if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
-				continue;
-
-			/*
-			 * In case of DUP, in order to keep it simple, only add
-			 * the mirror with the lowest physical address
-			 */
-			if (found &&
-			    physical_of_found <= tmp_bbio->stripes[i].physical)
-				continue;
-
-			index_srcdev = i;
-			found = 1;
-			physical_of_found = tmp_bbio->stripes[i].physical;
-		}
-
-		btrfs_put_bbio(tmp_bbio);
-
-		if (!found) {
-			WARN_ON(1);
-			ret = -EIO;
+		ret = get_extra_mirror_from_replace(fs_info, logical, *length,
+						    dev_replace->srcdev->devid,
+						    &mirror_num,
+					    &physical_to_patch_in_first_stripe);
+		if (ret)
 			goto out;
-		}
-
-		mirror_num = index_srcdev + 1;
-		patch_the_first_stripe_for_dev_replace = 1;
-		physical_to_patch_in_first_stripe = physical_of_found;
+		else
+			patch_the_first_stripe_for_dev_replace = 1;
 	} else if (mirror_num > map->num_stripes) {
 		mirror_num = 0;
 	}
-- 
cgit v1.2.3


From 73c0f228250ff7fc1d662b64123f2aea9d04ca7e Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Tue, 14 Mar 2017 13:33:58 -0700
Subject: Btrfs: handle operations for device replace separately

Since this part is mostly independent, this moves it to a separate
function.

Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 179 +++++++++++++++++++++++++++++------------------------
 1 file changed, 98 insertions(+), 81 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8865e440736a..f27f9cebce24 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5532,6 +5532,100 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 
+static void handle_ops_on_dev_replace(enum btrfs_map_op op,
+				      struct btrfs_bio **bbio_ret,
+				      struct btrfs_dev_replace *dev_replace,
+				      int *num_stripes_ret, int *max_errors_ret)
+{
+	struct btrfs_bio *bbio = *bbio_ret;
+	u64 srcdev_devid = dev_replace->srcdev->devid;
+	int tgtdev_indexes = 0;
+	int num_stripes = *num_stripes_ret;
+	int max_errors = *max_errors_ret;
+	int i;
+
+	if (op == BTRFS_MAP_WRITE) {
+		int index_where_to_add;
+
+		/*
+		 * duplicate the write operations while the dev replace
+		 * procedure is running. Since the copying of the old disk to
+		 * the new disk takes place at run time while the filesystem is
+		 * mounted writable, the regular write operations to the old
+		 * disk have to be duplicated to go to the new disk as well.
+		 *
+		 * Note that device->missing is handled by the caller, and that
+		 * the write to the old disk is already set up in the stripes
+		 * array.
+		 */
+		index_where_to_add = num_stripes;
+		for (i = 0; i < num_stripes; i++) {
+			if (bbio->stripes[i].dev->devid == srcdev_devid) {
+				/* write to new disk, too */
+				struct btrfs_bio_stripe *new =
+					bbio->stripes + index_where_to_add;
+				struct btrfs_bio_stripe *old =
+					bbio->stripes + i;
+
+				new->physical = old->physical;
+				new->length = old->length;
+				new->dev = dev_replace->tgtdev;
+				bbio->tgtdev_map[i] = index_where_to_add;
+				index_where_to_add++;
+				max_errors++;
+				tgtdev_indexes++;
+			}
+		}
+		num_stripes = index_where_to_add;
+	} else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
+		int index_srcdev = 0;
+		int found = 0;
+		u64 physical_of_found = 0;
+
+		/*
+		 * During the dev-replace procedure, the target drive can also
+		 * be used to read data in case it is needed to repair a corrupt
+		 * block elsewhere. This is possible if the requested area is
+		 * left of the left cursor. In this area, the target drive is a
+		 * full copy of the source drive.
+		 */
+		for (i = 0; i < num_stripes; i++) {
+			if (bbio->stripes[i].dev->devid == srcdev_devid) {
+				/*
+				 * In case of DUP, in order to keep it simple,
+				 * only add the mirror with the lowest physical
+				 * address
+				 */
+				if (found &&
+				    physical_of_found <=
+				     bbio->stripes[i].physical)
+					continue;
+				index_srcdev = i;
+				found = 1;
+				physical_of_found = bbio->stripes[i].physical;
+			}
+		}
+		if (found) {
+			struct btrfs_bio_stripe *tgtdev_stripe =
+				bbio->stripes + num_stripes;
+
+			tgtdev_stripe->physical = physical_of_found;
+			tgtdev_stripe->length =
+				bbio->stripes[index_srcdev].length;
+			tgtdev_stripe->dev = dev_replace->tgtdev;
+			bbio->tgtdev_map[index_srcdev] = num_stripes;
+
+			tgtdev_indexes++;
+			num_stripes++;
+		}
+	}
+
+	*num_stripes_ret = num_stripes;
+	*max_errors_ret = max_errors;
+	bbio->num_tgtdevs = tgtdev_indexes;
+	*bbio_ret = bbio;
+}
+
 static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 			     enum btrfs_map_op op,
 			     u64 logical, u64 *length,
@@ -5812,86 +5906,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	if (bbio->raid_map)
 		sort_parity_stripes(bbio, num_stripes);
 
-	tgtdev_indexes = 0;
-	if (dev_replace_is_ongoing && op == BTRFS_MAP_WRITE &&
-	    dev_replace->tgtdev != NULL) {
-		int index_where_to_add;
-		u64 srcdev_devid = dev_replace->srcdev->devid;
-
-		/*
-		 * duplicate the write operations while the dev replace
-		 * procedure is running. Since the copying of the old disk
-		 * to the new disk takes place at run time while the
-		 * filesystem is mounted writable, the regular write
-		 * operations to the old disk have to be duplicated to go
-		 * to the new disk as well.
-		 * Note that device->missing is handled by the caller, and
-		 * that the write to the old disk is already set up in the
-		 * stripes array.
-		 */
-		index_where_to_add = num_stripes;
-		for (i = 0; i < num_stripes; i++) {
-			if (bbio->stripes[i].dev->devid == srcdev_devid) {
-				/* write to new disk, too */
-				struct btrfs_bio_stripe *new =
-					bbio->stripes + index_where_to_add;
-				struct btrfs_bio_stripe *old =
-					bbio->stripes + i;
-
-				new->physical = old->physical;
-				new->length = old->length;
-				new->dev = dev_replace->tgtdev;
-				bbio->tgtdev_map[i] = index_where_to_add;
-				index_where_to_add++;
-				max_errors++;
-				tgtdev_indexes++;
-			}
-		}
-		num_stripes = index_where_to_add;
-	} else if (dev_replace_is_ongoing &&
-		   op == BTRFS_MAP_GET_READ_MIRRORS &&
-		   dev_replace->tgtdev != NULL) {
-		u64 srcdev_devid = dev_replace->srcdev->devid;
-		int index_srcdev = 0;
-		int found = 0;
-		u64 physical_of_found = 0;
-
-		/*
-		 * During the dev-replace procedure, the target drive can
-		 * also be used to read data in case it is needed to repair
-		 * a corrupt block elsewhere. This is possible if the
-		 * requested area is left of the left cursor. In this area,
-		 * the target drive is a full copy of the source drive.
-		 */
-		for (i = 0; i < num_stripes; i++) {
-			if (bbio->stripes[i].dev->devid == srcdev_devid) {
-				/*
-				 * In case of DUP, in order to keep it
-				 * simple, only add the mirror with the
-				 * lowest physical address
-				 */
-				if (found &&
-				    physical_of_found <=
-				     bbio->stripes[i].physical)
-					continue;
-				index_srcdev = i;
-				found = 1;
-				physical_of_found = bbio->stripes[i].physical;
-			}
-		}
-		if (found) {
-			struct btrfs_bio_stripe *tgtdev_stripe =
-				bbio->stripes + num_stripes;
-
-			tgtdev_stripe->physical = physical_of_found;
-			tgtdev_stripe->length =
-				bbio->stripes[index_srcdev].length;
-			tgtdev_stripe->dev = dev_replace->tgtdev;
-			bbio->tgtdev_map[index_srcdev] = num_stripes;
-
-			tgtdev_indexes++;
-			num_stripes++;
-		}
+	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
+	    (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)) {
+		handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
+					  &max_errors);
 	}
 
 	*bbio_ret = bbio;
@@ -5899,7 +5917,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	bbio->num_stripes = num_stripes;
 	bbio->max_errors = max_errors;
 	bbio->mirror_num = mirror_num;
-	bbio->num_tgtdevs = tgtdev_indexes;
 
 	/*
 	 * this is the case that REQ_READ && dev_replace_is_ongoing &&
-- 
cgit v1.2.3


From 6fad823f4998cdff49c443891a0acc9f1bc289f9 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Tue, 14 Mar 2017 13:33:59 -0700
Subject: Btrfs: do not add extra mirror when dev_replace target dev is not
 available

With this, we can avoid allocating memory for dev replace copies if the
target dev is not available.

Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f27f9cebce24..8cc40268c50c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5157,7 +5157,8 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 	free_extent_map(em);
 
 	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
-	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
+	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
+	    fs_info->dev_replace.tgtdev)
 		ret++;
 	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
 
@@ -5845,7 +5846,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	}
 
 	num_alloc_stripes = num_stripes;
-	if (dev_replace_is_ongoing) {
+	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
 		if (op == BTRFS_MAP_WRITE)
 			num_alloc_stripes <<= 1;
 		if (op == BTRFS_MAP_GET_READ_MIRRORS)
@@ -5858,7 +5859,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		ret = -ENOMEM;
 		goto out;
 	}
-	if (dev_replace_is_ongoing)
+	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
 		bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
 
 	/* build raid_map */
-- 
cgit v1.2.3


From 2b19a1fef7be743505f7af6ae47065a2253d6f4e Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Tue, 14 Mar 2017 13:34:00 -0700
Subject: Btrfs: helper for ops that requires full stripe

This adds a helper to show directly whether ops require full stripe.

Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8cc40268c50c..259bd2120da5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5627,6 +5627,11 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
 	*bbio_ret = bbio;
 }
 
+static bool need_full_stripe(enum btrfs_map_op op)
+{
+	return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
+}
+
 static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 			     enum btrfs_map_op op,
 			     u64 logical, u64 *length,
@@ -5729,8 +5734,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		btrfs_dev_replace_set_lock_blocking(dev_replace);
 
 	if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
-	    op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS &&
-	    dev_replace->tgtdev != NULL) {
+	    !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
 		ret = get_extra_mirror_from_replace(fs_info, logical, *length,
 						    dev_replace->srcdev->devid,
 						    &mirror_num,
@@ -5863,10 +5867,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
 
 	/* build raid_map */
-	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
-	    need_raid_map &&
-	    ((op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) ||
-	    mirror_num > 1)) {
+	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
+	    (need_full_stripe(op) || mirror_num > 1)) {
 		u64 tmp;
 		unsigned rot;
 
@@ -5901,14 +5903,14 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		stripe_index++;
 	}
 
-	if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
+	if (need_full_stripe(op))
 		max_errors = btrfs_chunk_max_errors(map);
 
 	if (bbio->raid_map)
 		sort_parity_stripes(bbio, num_stripes);
 
 	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
-	    (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)) {
+	    need_full_stripe(op)) {
 		handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
 					  &max_errors);
 	}
-- 
cgit v1.2.3


From 539b50d2f68944e7c1184c68da5c535499cbccfe Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Tue, 14 Mar 2017 13:34:01 -0700
Subject: Btrfs: convert BUG_ON to WARN_ON

These two BUG_ON()s would never be true, ensured by callers' logic.

Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 259bd2120da5..13574f37dc36 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5174,7 +5174,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 	unsigned long len = fs_info->sectorsize;
 
 	em = get_chunk_map(fs_info, logical, len);
-	BUG_ON(IS_ERR(em));
+	WARN_ON(IS_ERR(em));
 
 	map = em->map_lookup;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
@@ -5191,7 +5191,7 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
 	int ret = 0;
 
 	em = get_chunk_map(fs_info, logical, len);
-	BUG_ON(IS_ERR(em));
+	WARN_ON(IS_ERR(em));
 
 	map = em->map_lookup;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
-- 
cgit v1.2.3


From 1a79c1f2466bb54b329155b2b6910f386dc5aa9d Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Mon, 6 Mar 2017 13:49:02 -0800
Subject: Btrfs: update comments in cache_save_setup

We also don't bother to flush free space cache while with free space
tree.

Cc: David Sterba <dsterba@suse.cz>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5a7ddcff406b..1b9ede123ffb 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3443,7 +3443,8 @@ again:
 		/*
 		 * don't bother trying to write stuff out _if_
 		 * a) we're not cached,
-		 * b) we're with nospace_cache mount option.
+		 * b) we're with nospace_cache mount option,
+		 * c) we're with v2 space_cache (FREE_SPACE_TREE).
 		 */
 		dcs = BTRFS_DC_WRITTEN;
 		spin_unlock(&block_group->lock);
-- 
cgit v1.2.3


From 4d339d01063a248b3deb3aff94aaf53be6e84e08 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Sat, 4 Mar 2017 12:32:50 -0600
Subject: btrfs: No need to check !(flags & MS_RDONLY) twice

Code cleanup.
The code block is for !(*flags & MS_RDONLY). We don't need
to check it again.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9530a333d302..6346876c97ea 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1788,8 +1788,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		}
 
 		if (fs_info->fs_devices->missing_devices >
-		     fs_info->num_tolerated_disk_barrier_failures &&
-		    !(*flags & MS_RDONLY)) {
+		     fs_info->num_tolerated_disk_barrier_failures) {
 			btrfs_warn(fs_info,
 				"too many missing devices, writeable remount is not allowed");
 			ret = -EACCES;
-- 
cgit v1.2.3


From 261cc2cca0a8c1d817be65434052feb1db1fd961 Mon Sep 17 00:00:00 2001
From: Hans van Kranenburg <hans.van.kranenburg@mendix.com>
Date: Wed, 8 Mar 2017 18:58:43 +0100
Subject: Btrfs: consistent usage of types in balance_args

The btrfs_balance_args are only used for the balance ioctl, so use __u
instead of __le here for consistency. The __le usage was introduced in
bc3094673f22d and dee32d0ac3719 and was probably a result of
copy/pasting when the code was written.

The usage of __le did not break anything, but it's unnecessary. Also,
this change makes the code less confusing for the careful reader.

Signed-off-by: Hans van Kranenburg <hans.van.kranenburg@mendix.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index dcfc3a5a9cb1..a456e5309238 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -291,10 +291,10 @@ struct btrfs_ioctl_feature_flags {
 struct btrfs_balance_args {
 	__u64 profiles;
 	union {
-		__le64 usage;
+		__u64 usage;
 		struct {
-			__le32 usage_min;
-			__le32 usage_max;
+			__u32 usage_min;
+			__u32 usage_max;
 		};
 	};
 	__u64 devid;
@@ -324,8 +324,8 @@ struct btrfs_balance_args {
 	 * Process chunks that cross stripes_min..stripes_max devices,
 	 * BTRFS_BALANCE_ARGS_STRIPES_RANGE
 	 */
-	__le32 stripes_min;
-	__le32 stripes_max;
+	__u32 stripes_min;
+	__u32 stripes_max;
 
 	__u64 unused[6];
 } __attribute__ ((__packed__));
-- 
cgit v1.2.3


From cc8385b59e17d489814d54d6a846aba7051fdea5 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 2 Mar 2017 18:54:52 +0100
Subject: btrfs: preallocate radix tree node for readahead

We can preallocate the node so insertion does not have to do that under
the lock. The GFP flags for the per-device radix tree are initialized to
 GFP_NOFS & ~__GFP_DIRECT_RECLAIM
but we can use GFP_KERNEL, same as an allocation above anyway, but also
because readahead is optional and not on any critical writeout path.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reada.c   | 7 +++++++
 fs/btrfs/volumes.c | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index e88bca87f5d2..fdae8ca79401 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -270,6 +270,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
 	if (!zone)
 		return NULL;
 
+	ret = radix_tree_preload(GFP_KERNEL);
+	if (ret) {
+		kfree(zone);
+		return NULL;
+	}
+
 	zone->start = start;
 	zone->end = end;
 	INIT_LIST_HEAD(&zone->list);
@@ -299,6 +305,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
 			zone = NULL;
 	}
 	spin_unlock(&fs_info->reada_lock);
+	radix_tree_preload_end();
 
 	return zone;
 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 13574f37dc36..df50a63bbc3f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -252,7 +252,7 @@ static struct btrfs_device *__alloc_device(void)
 	atomic_set(&dev->reada_in_flight, 0);
 	atomic_set(&dev->dev_stats_ccnt, 0);
 	btrfs_device_data_ordered_init(dev);
-	INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+	INIT_RADIX_TREE(&dev->reada_zones, GFP_KERNEL);
 	INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
 	return dev;
-- 
cgit v1.2.3


From 7ef70b4d9987a78d39c4e40a02093493333e5408 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 2 Mar 2017 18:54:52 +0100
Subject: btrfs: preallocate radix tree node for global readahead tree

We can preallocate the node so insertion does not have to do that under
the lock. The GFP flags for the global radix tree are initialized to
 GFP_NOFS & ~__GFP_DIRECT_RECLAIM
but we can use GFP_KERNEL, because readahead is optional and not on any
critical writeout path.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 2 +-
 fs/btrfs/reada.c   | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index bd415e1dd114..12eba54877c2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2693,7 +2693,7 @@ int open_ctree(struct super_block *sb,
 	fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
 	fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
 	/* readahead state */
-	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_KERNEL);
 	spin_lock_init(&fs_info->reada_lock);
 
 	fs_info->thread_pool_size = min_t(unsigned long,
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index fdae8ca79401..a7fa4a5cb296 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -393,6 +393,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 		goto error;
 	}
 
+	ret = radix_tree_preload(GFP_KERNEL);
+	if (ret)
+		goto error;
+
 	/* insert extent in reada_tree + all per-device trees, all or nothing */
 	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
 	spin_lock(&fs_info->reada_lock);
@@ -402,13 +406,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 		re_exist->refcnt++;
 		spin_unlock(&fs_info->reada_lock);
 		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+		radix_tree_preload_end();
 		goto error;
 	}
 	if (ret) {
 		spin_unlock(&fs_info->reada_lock);
 		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+		radix_tree_preload_end();
 		goto error;
 	}
+	radix_tree_preload_end();
 	prev_dev = NULL;
 	dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
 			&fs_info->dev_replace);
-- 
cgit v1.2.3


From d48d71aa9977473b6515bb48933617a06cdc7be9 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 2 Mar 2017 19:43:30 +0100
Subject: btrfs: remove redundant parameter from btree_readahead_hook

We can read fs_info from eb.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h   | 3 +--
 fs/btrfs/disk-io.c | 4 ++--
 fs/btrfs/reada.c   | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 285566cc2f7d..7343bf7f2b35 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3672,8 +3672,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
 			      struct btrfs_key *start, struct btrfs_key *end);
 int btrfs_reada_wait(void *handle);
 void btrfs_reada_detach(void *handle);
-int btree_readahead_hook(struct btrfs_fs_info *fs_info,
-			 struct extent_buffer *eb, int err);
+int btree_readahead_hook(struct extent_buffer *eb, int err);
 
 static inline int is_fstree(u64 rootid)
 {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 12eba54877c2..a9314fe494c9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -762,7 +762,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 err:
 	if (reads_done &&
 	    test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
-		btree_readahead_hook(fs_info, eb, ret);
+		btree_readahead_hook(eb, ret);
 
 	if (ret) {
 		/*
@@ -787,7 +787,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
 	eb->read_mirror = failed_mirror;
 	atomic_dec(&eb->io_pages);
 	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
-		btree_readahead_hook(eb->fs_info, eb, -EIO);
+		btree_readahead_hook(eb, -EIO);
 	return -EIO;	/* we fixed nothing */
 }
 
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index a7fa4a5cb296..306e5108aac7 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -209,9 +209,9 @@ cleanup:
 	return;
 }
 
-int btree_readahead_hook(struct btrfs_fs_info *fs_info,
-			 struct extent_buffer *eb, int err)
+int btree_readahead_hook(struct extent_buffer *eb, int err)
 {
+	struct btrfs_fs_info *fs_info = eb->fs_info;
 	int ret = 0;
 	struct reada_extent *re;
 
-- 
cgit v1.2.3


From 0ceaf282137f261abf634f96b15c535baa6a9c93 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 2 Mar 2017 19:43:30 +0100
Subject: btrfs: remove redundant parameter from reada_find_zone

We can read fs_info from dev.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reada.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 306e5108aac7..2deba18886db 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -235,10 +235,10 @@ start_machine:
 	return ret;
 }
 
-static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
-					  struct btrfs_device *dev, u64 logical,
+static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
 					  struct btrfs_bio *bbio)
 {
+	struct btrfs_fs_info *fs_info = dev->fs_info;
 	int ret;
 	struct reada_zone *zone;
 	struct btrfs_block_group_cache *cache = NULL;
@@ -374,7 +374,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 		 if (!dev->bdev)
 			continue;
 
-		zone = reada_find_zone(fs_info, dev, logical, bbio);
+		zone = reada_find_zone(dev, logical, bbio);
 		if (!zone)
 			continue;
 
-- 
cgit v1.2.3


From 5721b8ad260f6ed15e3c82447981dd7a2f4bdf72 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 2 Mar 2017 19:43:30 +0100
Subject: btrfs: remove redundant parameter from reada_start_machine_dev

We can read fs_info from dev.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reada.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 2deba18886db..5c1e6bbed132 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -653,9 +653,9 @@ static int reada_pick_zone(struct btrfs_device *dev)
 	return 1;
 }
 
-static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
-				   struct btrfs_device *dev)
+static int reada_start_machine_dev(struct btrfs_device *dev)
 {
+	struct btrfs_fs_info *fs_info = dev->fs_info;
 	struct reada_extent *re = NULL;
 	int mirror_num = 0;
 	struct extent_buffer *eb = NULL;
@@ -768,8 +768,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
 		list_for_each_entry(device, &fs_devices->devices, dev_list) {
 			if (atomic_read(&device->reada_in_flight) <
 			    MAX_IN_FLIGHT)
-				enqueued += reada_start_machine_dev(fs_info,
-								    device);
+				enqueued += reada_start_machine_dev(device);
 		}
 		mutex_unlock(&fs_devices->device_list_mutex);
 		total += enqueued;
-- 
cgit v1.2.3


From 994a5d2bc758792f1808d7b112990a14182ef847 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 15 Mar 2017 16:39:59 +0100
Subject: btrfs: remove local blocksize variable in reada_find_extent

The name is misleading and the local variable serves no purpose.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reada.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 5c1e6bbed132..a17e775a4a89 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -320,7 +320,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 	struct btrfs_bio *bbio = NULL;
 	struct btrfs_device *dev;
 	struct btrfs_device *prev_dev;
-	u32 blocksize;
 	u64 length;
 	int real_stripes;
 	int nzones = 0;
@@ -341,7 +340,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 	if (!re)
 		return NULL;
 
-	blocksize = fs_info->nodesize;
 	re->logical = logical;
 	re->top = *top;
 	INIT_LIST_HEAD(&re->extctl);
@@ -351,10 +349,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 	/*
 	 * map block
 	 */
-	length = blocksize;
+	length = fs_info->nodesize;
 	ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
 			&length, &bbio, 0);
-	if (ret || !bbio || length < blocksize)
+	if (ret || !bbio || length < fs_info->nodesize)
 		goto error;
 
 	if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
-- 
cgit v1.2.3


From f486135ebab4fb91366a1e41fb15ed3036ad0cf9 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 15 Mar 2017 16:17:03 +0100
Subject: btrfs: remove unused qgroup members from btrfs_trans_handle

The members have been effectively unused since "Btrfs: rework qgroup
accounting" (fcebe4562dec83b3), there's no substitute for
assert_qgroups_uptodate so it's removed as well.

Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c       |  1 -
 fs/btrfs/qgroup.c            | 12 ------------
 fs/btrfs/qgroup.h            |  1 -
 fs/btrfs/tests/btrfs-tests.c |  1 -
 fs/btrfs/transaction.c       |  3 ---
 fs/btrfs/transaction.h       |  2 --
 6 files changed, 20 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1b9ede123ffb..e870e60e33bc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3003,7 +3003,6 @@ again:
 		goto again;
 	}
 out:
-	assert_qgroups_uptodate(trans);
 	trans->can_flush_pending_bgs = can_flush_pending_bgs;
 	return 0;
 }
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a59801dc2a34..b2fdefc6d191 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2487,18 +2487,6 @@ out:
 	spin_unlock(&fs_info->qgroup_lock);
 }
 
-void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
-{
-	if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
-		return;
-	btrfs_err(trans->fs_info,
-		"qgroups not uptodate in trans handle %p:  list is%s empty, seq is %#x.%x",
-		trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
-		(u32)(trans->delayed_ref_elem.seq >> 32),
-		(u32)trans->delayed_ref_elem.seq);
-	BUG();
-}
-
 /*
  * returns < 0 on error, 0 when more leafs are to be scanned.
  * returns 1 when done.
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 26932a8a1993..96fc56ebf55a 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -196,7 +196,6 @@ static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
 	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
 	trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
 }
-void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index ea272432c930..b18ab8f327a5 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -237,7 +237,6 @@ void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans)
 {
 	memset(trans, 0, sizeof(*trans));
 	trans->transid = 1;
-	INIT_LIST_HEAD(&trans->qgroup_ref_list);
 	trans->type = __TRANS_DUMMY;
 }
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a7d7a7d1d78a..ee5844855c15 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -572,7 +572,6 @@ again:
 
 	h->type = type;
 	h->can_flush_pending_bgs = true;
-	INIT_LIST_HEAD(&h->qgroup_ref_list);
 	INIT_LIST_HEAD(&h->new_bgs);
 
 	smp_mb();
@@ -917,7 +916,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 		wake_up_process(info->transaction_kthread);
 		err = -EIO;
 	}
-	assert_qgroups_uptodate(trans);
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 	if (must_run_delayed_refs) {
@@ -2223,7 +2221,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
 	switch_commit_roots(cur_trans, fs_info);
 
-	assert_qgroups_uptodate(trans);
 	ASSERT(list_empty(&cur_trans->dirty_bgs));
 	ASSERT(list_empty(&cur_trans->io_bgs));
 	update_super_roots(fs_info);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 902619f83db6..c55e44560103 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -127,8 +127,6 @@ struct btrfs_trans_handle {
 	unsigned int type;
 	struct btrfs_root *root;
 	struct btrfs_fs_info *fs_info;
-	struct seq_list delayed_ref_elem;
-	struct list_head qgroup_ref_list;
 	struct list_head new_bgs;
 };
 
-- 
cgit v1.2.3


From f58d88b336c1c83a35d6b2582d45cd87cb2ca406 Mon Sep 17 00:00:00 2001
From: Edmund Nadolski <enadolski@suse.com>
Date: Thu, 16 Mar 2017 10:04:33 -0600
Subject: btrfs: provide enumeration for __merge_refs mode argument

Replace hardcoded numeric values for __merge_refs 'mode' argument
with descriptive constants.

Signed-off-by: Edmund Nadolski <enadolski@suse.com>
Reviewed-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/backref.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 116338344224..6ce7281749a8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -26,6 +26,11 @@
 #include "delayed-ref.h"
 #include "locking.h"
 
+enum merge_mode {
+	MERGE_IDENTICAL_KEYS = 1,
+	MERGE_IDENTICAL_PARENTS,
+};
+
 /* Just an arbitrary number so we can be sure this happened */
 #define BACKREF_FOUND_SHARED 6
 
@@ -809,14 +814,12 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
 /*
  * merge backrefs and adjust counts accordingly
  *
- * mode = 1: merge identical keys, if key is set
- *    FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
- *           additionally, we could even add a key range for the blocks we
- *           looked into to merge even more (-> replace unresolved refs by those
- *           having a parent).
- * mode = 2: merge identical parents
+ *    FIXME: For MERGE_IDENTICAL_KEYS, if we add more keys in __add_prelim_ref
+ *           then we can merge more here. Additionally, we could even add a key
+ *           range for the blocks we looked into to merge even more (-> replace
+ *           unresolved refs by those having a parent).
  */
-static void __merge_refs(struct list_head *head, int mode)
+static void __merge_refs(struct list_head *head, enum merge_mode mode)
 {
 	struct __prelim_ref *pos1;
 
@@ -829,7 +832,7 @@ static void __merge_refs(struct list_head *head, int mode)
 
 			if (!ref_for_same_block(ref1, ref2))
 				continue;
-			if (mode == 1) {
+			if (mode == MERGE_IDENTICAL_KEYS) {
 				if (!ref1->parent && ref2->parent)
 					swap(ref1, ref2);
 			} else {
@@ -1374,7 +1377,7 @@ again:
 	if (ret)
 		goto out;
 
-	__merge_refs(&prefs, 1);
+	__merge_refs(&prefs, MERGE_IDENTICAL_KEYS);
 
 	ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs,
 				      extent_item_pos, total_refs,
@@ -1382,7 +1385,7 @@ again:
 	if (ret)
 		goto out;
 
-	__merge_refs(&prefs, 2);
+	__merge_refs(&prefs, MERGE_IDENTICAL_PARENTS);
 
 	while (!list_empty(&prefs)) {
 		ref = list_first_entry(&prefs, struct __prelim_ref, list);
-- 
cgit v1.2.3


From de47c9d3ff875a3cb1251af35ee8d30afaca78bd Mon Sep 17 00:00:00 2001
From: Edmund Nadolski <enadolski@suse.com>
Date: Thu, 16 Mar 2017 10:04:34 -0600
Subject: btrfs: replace hardcoded value with SEQ_LAST macro

Define the SEQ_LAST macro to replace (u64)-1 in places where said
value triggers a special-case ref search behavior.

Signed-off-by: Edmund Nadolski <enadolski@suse.com>
Reviewed-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/backref.c | 16 ++++++++--------
 fs/btrfs/ctree.h   |  2 ++
 fs/btrfs/qgroup.c  |  4 ++--
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 6ce7281749a8..24865da63d8f 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -538,7 +538,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
 	 * slot==nritems. In that case, go to the next leaf before we continue.
 	 */
 	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
-		if (time_seq == (u64)-1)
+		if (time_seq == SEQ_LAST)
 			ret = btrfs_next_leaf(root, path);
 		else
 			ret = btrfs_next_old_leaf(root, path, time_seq);
@@ -582,7 +582,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
 			eie = NULL;
 		}
 next:
-		if (time_seq == (u64)-1)
+		if (time_seq == SEQ_LAST)
 			ret = btrfs_next_item(root, path);
 		else
 			ret = btrfs_next_old_item(root, path, time_seq);
@@ -634,7 +634,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 
 	if (path->search_commit_root)
 		root_level = btrfs_header_level(root->commit_root);
-	else if (time_seq == (u64)-1)
+	else if (time_seq == SEQ_LAST)
 		root_level = btrfs_header_level(root->node);
 	else
 		root_level = btrfs_old_root_level(root, time_seq);
@@ -645,7 +645,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 	}
 
 	path->lowest_level = level;
-	if (time_seq == (u64)-1)
+	if (time_seq == SEQ_LAST)
 		ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
 					0, 0);
 	else
@@ -1199,7 +1199,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
  *
  * NOTE: This can return values > 0
  *
- * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave
+ * If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave
  * much like trans == NULL case, the difference only lies in it will not
  * commit root.
  * The special case is for qgroup to search roots in commit_transaction().
@@ -1246,7 +1246,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 		path->skip_locking = 1;
 	}
 
-	if (time_seq == (u64)-1)
+	if (time_seq == SEQ_LAST)
 		path->skip_locking = 1;
 
 	/*
@@ -1276,9 +1276,9 @@ again:
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 	if (trans && likely(trans->type != __TRANS_DUMMY) &&
-	    time_seq != (u64)-1) {
+	    time_seq != SEQ_LAST) {
 #else
-	if (trans && time_seq != (u64)-1) {
+	if (trans && time_seq != SEQ_LAST) {
 #endif
 		/*
 		 * look if there are updates for this ref queued and lock the
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7343bf7f2b35..e9d77115e695 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -659,6 +659,8 @@ struct seq_list {
 
 #define SEQ_LIST_INIT(name)	{ .list = LIST_HEAD_INIT((name).list), .seq = 0 }
 
+#define SEQ_LAST	((u64)-1)
+
 enum btrfs_orphan_cleanup_state {
 	ORPHAN_CLEANUP_STARTED	= 1,
 	ORPHAN_CLEANUP_DONE	= 2,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b2fdefc6d191..21647a414aa5 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2055,12 +2055,12 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
 
 		if (!ret) {
 			/*
-			 * Use (u64)-1 as time_seq to do special search, which
+			 * Use SEQ_LAST as time_seq to do special search, which
 			 * doesn't lock tree or delayed_refs and search current
 			 * root. It's safe inside commit_transaction().
 			 */
 			ret = btrfs_find_all_roots(trans, fs_info,
-					record->bytenr, (u64)-1, &new_roots);
+					record->bytenr, SEQ_LAST, &new_roots);
 			if (ret < 0)
 				goto cleanup;
 			if (qgroup_to_skip)
-- 
cgit v1.2.3


From 48a89bc4f2ceab87bc858a8eb189636b09c846a7 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Mon, 27 Mar 2017 12:29:57 -0500
Subject: btrfs: qgroups: Retry after commit on getting EDQUOT

We are facing the same problem with EDQUOT which was experienced with
ENOSPC. Not sure if we require a full ticketing system such as ENOSPC, but
here is a quick fix, which may be too big a hammer.

Quotas are reserved during the start of an operation, incrementing
qg->reserved. However, it is written to disk in a commit_transaction
which could take as long as commit_interval. In the meantime there
could be deletions which are not accounted for because deletions are
accounted for only while committed (free_refroot). So, when we get
a EDQUOT flush the data to disk and try again.

This fixes fstests btrfs/139.

Here is a sample script which shows this issue.

DEVICE=/dev/vdb
MOUNTPOINT=/mnt
TESTVOL=$MOUNTPOINT/tmp
QUOTA=5
PROG=btrfs
DD_BS="4k"
DD_COUNT="256"
RUN_TIMES=5000

mkfs.btrfs -f $DEVICE
mount -o commit=240 $DEVICE $MOUNTPOINT
$PROG subvolume create $TESTVOL
$PROG quota enable $TESTVOL
$PROG qgroup limit ${QUOTA}G $TESTVOL

typeset -i DD_RUN_GOOD
typeset -i QUOTA

function _check_cmd() {
        if [[ ${?} > 0 ]]; then
                echo -n "$(date) E: Running previous command"
                echo ${*}
                echo "Without sync"
                $PROG qgroup show -pcreFf ${TESTVOL}
                echo "With sync"
                $PROG qgroup show -pcreFf --sync ${TESTVOL}
                exit 1
        fi
}

while true; do
  DD_RUN_GOOD=$RUN_TIMES

  while (( ${DD_RUN_GOOD} != 0 )); do
        dd if=/dev/zero of=${TESTVOL}/quotatest${DD_RUN_GOOD} bs=${DD_BS} count=${DD_COUNT}
        _check_cmd "dd if=/dev/zero of=${TESTVOL}/quotatest${DD_RUN_GOOD} bs=${DD_BS} count=${DD_COUNT}"
        DD_RUN_GOOD=(${DD_RUN_GOOD}-1)
  done

  $PROG qgroup show -pcref $TESTVOL
  echo "----------- Cleanup ---------- "
  rm $TESTVOL/quotatest*

done

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 21647a414aa5..1d7942c5a38a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2367,6 +2367,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	u64 ref_root = root->root_key.objectid;
 	int ret = 0;
+	int retried = 0;
 	struct ulist_node *unode;
 	struct ulist_iterator uiter;
 
@@ -2375,7 +2376,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
 
 	if (num_bytes == 0)
 		return 0;
-
+retry:
 	spin_lock(&fs_info->qgroup_lock);
 	quota_root = fs_info->quota_root;
 	if (!quota_root)
@@ -2402,6 +2403,27 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
 		qg = unode_aux_to_qgroup(unode);
 
 		if (enforce && !qgroup_check_limits(qg, num_bytes)) {
+			/*
+			 * Commit the tree and retry, since we may have
+			 * deletions which would free up space.
+			 */
+			if (!retried && qg->reserved > 0) {
+				struct btrfs_trans_handle *trans;
+
+				spin_unlock(&fs_info->qgroup_lock);
+				ret = btrfs_start_delalloc_inodes(root, 0);
+				if (ret)
+					return ret;
+				btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
+				trans = btrfs_join_transaction(root);
+				if (IS_ERR(trans))
+					return PTR_ERR(trans);
+				ret = btrfs_commit_transaction(trans);
+				if (ret)
+					return ret;
+				retried++;
+				goto retry;
+			}
 			ret = -EDQUOT;
 			goto out;
 		}
-- 
cgit v1.2.3


From 171938e528079deced3226a17dcab12121312a64 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 28 Mar 2017 14:44:21 +0200
Subject: btrfs: track exclusive filesystem operation in flags

There are several operations, usually started from ioctls, that cannot
run concurrently. The status is tracked in
mutually_exclusive_operation_running as an atomic_t. We can easily track
the status as one of the per-filesystem flag bits with same
synchronization guarantees.

The conversion replaces:

* atomic_xchg(..., 1)    ->   test_and_set_bit(FLAG, ...)
* atomic_set(..., 0)     ->   clear_bit(FLAG, ...)

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  7 +++++--
 fs/btrfs/dev-replace.c |  5 ++---
 fs/btrfs/ioctl.c       | 33 +++++++++++++++------------------
 fs/btrfs/volumes.c     |  6 +++---
 4 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e9d77115e695..70631d773669 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -705,6 +705,11 @@ struct btrfs_delayed_root;
 #define BTRFS_FS_BTREE_ERR			11
 #define BTRFS_FS_LOG1_ERR			12
 #define BTRFS_FS_LOG2_ERR			13
+/*
+ * Indicate that a whole-filesystem exclusive operation is running
+ * (device replace, resize, device add/delete, balance)
+ */
+#define BTRFS_FS_EXCL_OP			14
 
 struct btrfs_fs_info {
 	u8 fsid[BTRFS_FSID_SIZE];
@@ -1070,8 +1075,6 @@ struct btrfs_fs_info {
 	/* device replace state */
 	struct btrfs_dev_replace dev_replace;
 
-	atomic_t mutually_exclusive_operation_running;
-
 	struct percpu_counter bio_counter;
 	wait_queue_head_t replace_wait;
 
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index e653921f05d9..de7b2c897fe0 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -784,8 +784,7 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
 	}
 	btrfs_dev_replace_unlock(dev_replace, 1);
 
-	WARN_ON(atomic_xchg(
-		&fs_info->mutually_exclusive_operation_running, 1));
+	WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
 	task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
 	return PTR_ERR_OR_ZERO(task);
 }
@@ -814,7 +813,7 @@ static int btrfs_dev_replace_kthread(void *data)
 			(unsigned int)progress);
 	}
 	btrfs_dev_replace_continue_on_mount(fs_info);
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 
 	return 0;
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index dabfc7ac48a6..a29dc3fd7152 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1504,7 +1504,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 	if (ret)
 		return ret;
 
-	if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 		mnt_drop_write_file(file);
 		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 	}
@@ -1619,7 +1619,7 @@ out_free:
 	kfree(vol_args);
 out:
 	mutex_unlock(&fs_info->volume_mutex);
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 	mnt_drop_write_file(file);
 	return ret;
 }
@@ -2661,7 +2661,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1))
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
 		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 
 	mutex_lock(&fs_info->volume_mutex);
@@ -2680,7 +2680,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
 	kfree(vol_args);
 out:
 	mutex_unlock(&fs_info->volume_mutex);
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 	return ret;
 }
 
@@ -2708,7 +2708,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 	if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
 		return -EOPNOTSUPP;
 
-	if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 		goto out;
 	}
@@ -2721,7 +2721,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 		ret = btrfs_rm_device(fs_info, vol_args->name, 0);
 	}
 	mutex_unlock(&fs_info->volume_mutex);
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 
 	if (!ret) {
 		if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
@@ -2752,7 +2752,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 	if (ret)
 		return ret;
 
-	if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 		goto out_drop_write;
 	}
@@ -2772,7 +2772,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 		btrfs_info(fs_info, "disk deleted %s", vol_args->name);
 	kfree(vol_args);
 out:
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 out_drop_write:
 	mnt_drop_write_file(file);
 
@@ -4442,13 +4442,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
 			ret = -EROFS;
 			goto out;
 		}
-		if (atomic_xchg(
-			&fs_info->mutually_exclusive_operation_running, 1)) {
+		if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 			ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 		} else {
 			ret = btrfs_dev_replace_by_ioctl(fs_info, p);
-			atomic_set(
-			 &fs_info->mutually_exclusive_operation_running, 0);
+			clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 		}
 		break;
 	case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
@@ -4643,7 +4641,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
 		return ret;
 
 again:
-	if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+	if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 		mutex_lock(&fs_info->volume_mutex);
 		mutex_lock(&fs_info->balance_mutex);
 		need_unlock = true;
@@ -4689,7 +4687,7 @@ again:
 	}
 
 locked:
-	BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
+	BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
 
 	if (arg) {
 		bargs = memdup_user(arg, sizeof(*bargs));
@@ -4745,11 +4743,10 @@ locked:
 
 do_balance:
 	/*
-	 * Ownership of bctl and mutually_exclusive_operation_running
+	 * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP
 	 * goes to to btrfs_balance.  bctl is freed in __cancel_balance,
 	 * or, if restriper was paused all the way until unmount, in
-	 * free_fs_info.  mutually_exclusive_operation_running is
-	 * cleared in __cancel_balance.
+	 * free_fs_info.  The flag is cleared in __cancel_balance.
 	 */
 	need_unlock = false;
 
@@ -4769,7 +4766,7 @@ out_unlock:
 	mutex_unlock(&fs_info->balance_mutex);
 	mutex_unlock(&fs_info->volume_mutex);
 	if (need_unlock)
-		atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+		clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 out:
 	mnt_drop_write_file(file);
 	return ret;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index df50a63bbc3f..2e425a909125 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3761,7 +3761,7 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
 	if (ret)
 		btrfs_handle_fs_error(fs_info, ret, NULL);
 
-	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 }
 
 /* Non-zero return value signifies invalidity */
@@ -3941,7 +3941,7 @@ out:
 		__cancel_balance(fs_info);
 	else {
 		kfree(bctl);
-		atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+		clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 	}
 	return ret;
 }
@@ -4031,7 +4031,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
 	btrfs_balance_sys(leaf, item, &disk_bargs);
 	btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
 
-	WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
+	WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
 
 	mutex_lock(&fs_info->volume_mutex);
 	mutex_lock(&fs_info->balance_mutex);
-- 
cgit v1.2.3


From 1bcd7aa17fc1fa29009feec3e0ac4230d0d9f700 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 29 Mar 2017 10:55:16 -0700
Subject: Btrfs: set scrub page's io_error if failing to submit io

Scrub repairs data by the unit called scrub_block, which may contain
several pages.  Scrub always tries to look up a good copy of a whole
block, but if there's no such copy, it tries to do repair page by page.

If we don't set page's io_error when checking this bad copy, in the last
step, we may skip this page when repairing bad copy from good copy.

Cc: David Sterba <dsterba@suse.cz>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6ba0c9ddf777..f499ed782671 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1497,14 +1497,18 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
 
 		bio_add_page(bio, page->page, PAGE_SIZE, 0);
 		if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
-			if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
+			if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) {
+				page->io_error = 1;
 				sblock->no_io_error_seen = 0;
+			}
 		} else {
 			bio->bi_iter.bi_sector = page->physical >> 9;
 			bio_set_op_attrs(bio, REQ_OP_READ, 0);
 
-			if (btrfsic_submit_bio_wait(bio))
+			if (btrfsic_submit_bio_wait(bio)) {
+				page->io_error = 1;
 				sblock->no_io_error_seen = 0;
+			}
 		}
 
 		bio_put(bio);
-- 
cgit v1.2.3


From abad60c6013d62f4ddd19667260e2e4a957a9c2a Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 29 Mar 2017 10:54:26 -0700
Subject: Btrfs: fix wrong failed mirror_num of read-repair on raid56

In raid56 scenario, after trying parity recovery, we didn't set
mirror_num for btrfs_bio with failed mirror_num, hence
end_bio_extent_readpage() will report a random mirror_num in dmesg
log.

Cc: David Sterba <dsterba@suse.cz>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/raid56.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index a8954f5188b4..7bf65d9a9bd3 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2117,6 +2117,11 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
 	struct btrfs_raid_bio *rbio;
 	int ret;
 
+	if (generic_io) {
+		ASSERT(bbio->mirror_num == mirror_num);
+		btrfs_io_bio(bio)->mirror_num = mirror_num;
+	}
+
 	rbio = alloc_rbio(fs_info, bbio, stripe_len);
 	if (IS_ERR(rbio)) {
 		if (generic_io)
-- 
cgit v1.2.3


From 176ef8f5e662a79471f437839cd9a0c5b95b1e8d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 28 Mar 2017 14:35:01 +0200
Subject: btrfs: sink GFP flags parameter to tree_mod_log_insert_move

All (1) callers pass the same value.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 7dc8844037e0..d034d47c5470 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -567,7 +567,7 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
 static noinline int
 tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 			 struct extent_buffer *eb, int dst_slot, int src_slot,
-			 int nr_items, gfp_t flags)
+			 int nr_items)
 {
 	struct tree_mod_elem *tm = NULL;
 	struct tree_mod_elem **tm_list = NULL;
@@ -578,11 +578,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 	if (!tree_mod_need_log(fs_info, eb))
 		return 0;
 
-	tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags);
+	tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
 	if (!tm_list)
 		return -ENOMEM;
 
-	tm = kzalloc(sizeof(*tm), flags);
+	tm = kzalloc(sizeof(*tm), GFP_NOFS);
 	if (!tm) {
 		ret = -ENOMEM;
 		goto free_tms;
@@ -596,7 +596,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 
 	for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
 		tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
-		    MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
+		    MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
 		if (!tm_list[i]) {
 			ret = -ENOMEM;
 			goto free_tms;
@@ -873,7 +873,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
 {
 	int ret;
 	ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
-				       nr_items, GFP_NOFS);
+				       nr_items);
 	BUG_ON(ret < 0);
 }
 
-- 
cgit v1.2.3


From bcc8e07f9ef1cb3a302bed1d41d27ca72eaacc33 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 28 Mar 2017 14:35:42 +0200
Subject: btrfs: sink GFP flags parameter to tree_mod_log_insert_root

All (1) callers pass the same value.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d034d47c5470..165e7ec12af7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -663,7 +663,7 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
 static noinline int
 tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 			 struct extent_buffer *old_root,
-			 struct extent_buffer *new_root, gfp_t flags,
+			 struct extent_buffer *new_root,
 			 int log_removal)
 {
 	struct tree_mod_elem *tm = NULL;
@@ -678,14 +678,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 	if (log_removal && btrfs_header_level(old_root) > 0) {
 		nritems = btrfs_header_nritems(old_root);
 		tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
-				  flags);
+				  GFP_NOFS);
 		if (!tm_list) {
 			ret = -ENOMEM;
 			goto free_tms;
 		}
 		for (i = 0; i < nritems; i++) {
 			tm_list[i] = alloc_tree_mod_elem(old_root, i,
-			    MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
+			    MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
 			if (!tm_list[i]) {
 				ret = -ENOMEM;
 				goto free_tms;
@@ -693,7 +693,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 		}
 	}
 
-	tm = kzalloc(sizeof(*tm), flags);
+	tm = kzalloc(sizeof(*tm), GFP_NOFS);
 	if (!tm) {
 		ret = -ENOMEM;
 		goto free_tms;
@@ -943,7 +943,7 @@ tree_mod_log_set_root_pointer(struct btrfs_root *root,
 {
 	int ret;
 	ret = tree_mod_log_insert_root(root->fs_info, root->node,
-				       new_root_node, GFP_NOFS, log_removal);
+				       new_root_node, log_removal);
 	BUG_ON(ret < 0);
 }
 
-- 
cgit v1.2.3


From 825ad4c96432a3908625dd1a79d6528e969e1a09 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 28 Mar 2017 14:45:22 +0200
Subject: btrfs: drop redundant parameters from btrfs_map_sblock

All callers pass 0 for mirror_num and 1 for need_raid_map.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c   | 6 +++---
 fs/btrfs/volumes.c | 6 ++----
 fs/btrfs/volumes.h | 3 +--
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f499ed782671..fe1c793877dd 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1331,7 +1331,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 		 * represents one mirror
 		 */
 		ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
-				logical, &mapped_length, &bbio, 0, 1);
+				logical, &mapped_length, &bbio);
 		if (ret || !bbio || mapped_length < sublen) {
 			btrfs_put_bbio(bbio);
 			return -EIO;
@@ -2192,7 +2192,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
 	int i;
 
 	ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
-			&length, &bbio, 0, 1);
+			&length, &bbio);
 	if (ret || !bbio || !bbio->raid_map)
 		goto bbio_out;
 
@@ -2780,7 +2780,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 
 	length = sparity->logic_end - sparity->logic_start;
 	ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
-			       &length, &bbio, 0, 1);
+			       &length, &bbio);
 	if (ret || !bbio || !bbio->raid_map)
 		goto bbio_out;
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2e425a909125..3b3be8eba776 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5952,11 +5952,9 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 /* For Scrub/replace */
 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		     u64 logical, u64 *length,
-		     struct btrfs_bio **bbio_ret, int mirror_num,
-		     int need_raid_map)
+		     struct btrfs_bio **bbio_ret)
 {
-	return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
-				 mirror_num, need_raid_map);
+	return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
 }
 
 int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 9ee6b706db31..def39b5a8d9e 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -400,8 +400,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		    struct btrfs_bio **bbio_ret, int mirror_num);
 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		     u64 logical, u64 *length,
-		     struct btrfs_bio **bbio_ret, int mirror_num,
-		     int need_raid_map);
+		     struct btrfs_bio **bbio_ret);
 int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
 		     u64 chunk_start, u64 physical, u64 devid,
 		     u64 **logical, int *naddrs, int *stripe_len);
-- 
cgit v1.2.3


From 3159fe7baef3a50fc332455e252d8a01a18f1ff1 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Mon, 13 Mar 2017 15:52:08 +0800
Subject: btrfs: qgroup: Add trace point for qgroup reserved space

Introduce the following trace points:
qgroup_update_reserve
qgroup_meta_reserve

These trace points are handy to trace qgroup reserve space related
problems.

Also export btrfs_qgroup structure, as now we directly pass btrfs_qgroup
structure to trace points, so that structure needs to be exported.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c            | 52 +++++++-------------------------------------
 fs/btrfs/qgroup.h            | 44 +++++++++++++++++++++++++++++++++++++
 include/trace/events/btrfs.h | 44 +++++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 44 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1d7942c5a38a..27a229619808 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -47,50 +47,6 @@
  *  - check all ioctl parameters
  */
 
-/*
- * one struct for each qgroup, organized in fs_info->qgroup_tree.
- */
-struct btrfs_qgroup {
-	u64 qgroupid;
-
-	/*
-	 * state
-	 */
-	u64 rfer;	/* referenced */
-	u64 rfer_cmpr;	/* referenced compressed */
-	u64 excl;	/* exclusive */
-	u64 excl_cmpr;	/* exclusive compressed */
-
-	/*
-	 * limits
-	 */
-	u64 lim_flags;	/* which limits are set */
-	u64 max_rfer;
-	u64 max_excl;
-	u64 rsv_rfer;
-	u64 rsv_excl;
-
-	/*
-	 * reservation tracking
-	 */
-	u64 reserved;
-
-	/*
-	 * lists
-	 */
-	struct list_head groups;  /* groups this group is member of */
-	struct list_head members; /* groups that are members of this group */
-	struct list_head dirty;   /* dirty groups */
-	struct rb_node node;	  /* tree of qgroups */
-
-	/*
-	 * temp variables for accounting operations
-	 * Refer to qgroup_shared_accounting() for details.
-	 */
-	u64 old_refcnt;
-	u64 new_refcnt;
-};
-
 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
 					   int mod)
 {
@@ -1075,6 +1031,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 	qgroup->excl += sign * num_bytes;
 	qgroup->excl_cmpr += sign * num_bytes;
 	if (sign > 0) {
+		trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes);
 		if (WARN_ON(qgroup->reserved < num_bytes))
 			report_reserved_underflow(fs_info, qgroup, num_bytes);
 		else
@@ -1100,6 +1057,8 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 		WARN_ON(sign < 0 && qgroup->excl < num_bytes);
 		qgroup->excl += sign * num_bytes;
 		if (sign > 0) {
+			trace_qgroup_update_reserve(fs_info, qgroup,
+						    -(s64)num_bytes);
 			if (WARN_ON(qgroup->reserved < num_bytes))
 				report_reserved_underflow(fs_info, qgroup,
 							  num_bytes);
@@ -2446,6 +2405,7 @@ retry:
 
 		qg = unode_aux_to_qgroup(unode);
 
+		trace_qgroup_update_reserve(fs_info, qg, num_bytes);
 		qg->reserved += num_bytes;
 	}
 
@@ -2491,6 +2451,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 
 		qg = unode_aux_to_qgroup(unode);
 
+		trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
 		if (WARN_ON(qg->reserved < num_bytes))
 			report_reserved_underflow(fs_info, qg, num_bytes);
 		else
@@ -2955,6 +2916,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
 		return 0;
 
 	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
+	trace_qgroup_meta_reserve(root, (s64)num_bytes);
 	ret = qgroup_reserve(root, num_bytes, enforce);
 	if (ret < 0)
 		return ret;
@@ -2974,6 +2936,7 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
 	reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
 	if (reserved == 0)
 		return;
+	trace_qgroup_meta_reserve(root, -(s64)reserved);
 	btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
 }
 
@@ -2988,6 +2951,7 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
 	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
 	WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
 	atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
+	trace_qgroup_meta_reserve(root, -(s64)num_bytes);
 	btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
 }
 
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 96fc56ebf55a..31e468b16175 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -61,6 +61,50 @@ struct btrfs_qgroup_extent_record {
 	struct ulist *old_roots;
 };
 
+/*
+ * one struct for each qgroup, organized in fs_info->qgroup_tree.
+ */
+struct btrfs_qgroup {
+	u64 qgroupid;
+
+	/*
+	 * state
+	 */
+	u64 rfer;	/* referenced */
+	u64 rfer_cmpr;	/* referenced compressed */
+	u64 excl;	/* exclusive */
+	u64 excl_cmpr;	/* exclusive compressed */
+
+	/*
+	 * limits
+	 */
+	u64 lim_flags;	/* which limits are set */
+	u64 max_rfer;
+	u64 max_excl;
+	u64 rsv_rfer;
+	u64 rsv_excl;
+
+	/*
+	 * reservation tracking
+	 */
+	u64 reserved;
+
+	/*
+	 * lists
+	 */
+	struct list_head groups;  /* groups this group is member of */
+	struct list_head members; /* groups that are members of this group */
+	struct list_head dirty;   /* dirty groups */
+	struct rb_node node;	  /* tree of qgroups */
+
+	/*
+	 * temp variables for accounting operations
+	 * Refer to qgroup_shared_accounting() for details.
+	 */
+	u64 old_refcnt;
+	u64 new_refcnt;
+};
+
 /*
  * For qgroup event trace points only
  */
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index cef39e2baf21..e37973526153 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -25,6 +25,7 @@ struct extent_buffer;
 struct btrfs_work;
 struct __btrfs_workqueue;
 struct btrfs_qgroup_extent_record;
+struct btrfs_qgroup;
 
 #define show_ref_type(type)						\
 	__print_symbolic(type,						\
@@ -1614,6 +1615,49 @@ TRACE_EVENT(qgroup_update_counters,
 		  __entry->cur_new_count)
 );
 
+TRACE_EVENT(qgroup_update_reserve,
+
+	TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
+		 s64 diff),
+
+	TP_ARGS(fs_info, qgroup, diff),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	qgid			)
+		__field(	u64,	cur_reserved		)
+		__field(	s64,	diff			)
+	),
+
+	TP_fast_assign_btrfs(fs_info,
+		__entry->qgid		= qgroup->qgroupid;
+		__entry->cur_reserved	= qgroup->reserved;
+		__entry->diff		= diff;
+	),
+
+	TP_printk_btrfs("qgid=%llu cur_reserved=%llu diff=%lld",
+		__entry->qgid, __entry->cur_reserved, __entry->diff)
+);
+
+TRACE_EVENT(qgroup_meta_reserve,
+
+	TP_PROTO(struct btrfs_root *root, s64 diff),
+
+	TP_ARGS(root, diff),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	refroot			)
+		__field(	s64,	diff			)
+	),
+
+	TP_fast_assign_btrfs(root->fs_info,
+		__entry->refroot	= root->objectid;
+		__entry->diff		= diff;
+	),
+
+	TP_printk_btrfs("refroot=%llu(%s) diff=%lld",
+		show_root_type(__entry->refroot), __entry->diff)
+);
+
 #endif /* _TRACE_BTRFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From d51ea5dd222d13dc46c76b2446aa59c4183e3922 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Mon, 13 Mar 2017 15:52:09 +0800
Subject: btrfs: qgroup: Re-arrange tracepoint timing to co-operate with
 reserved space tracepoint

Newly introduced qgroup reserved space trace points are normally nested
into several common qgroup operations.

While some other trace points are not well placed to co-operate with
them, causing confusing output.

This patch re-arrange trace_btrfs_qgroup_release_data() and
trace_btrfs_qgroup_free_delayed_ref() trace points so they are triggered
before reserved space ones.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 10 +++++-----
 fs/btrfs/qgroup.h |  6 +-----
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 27a229619808..3f75b5cbbfef 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2857,14 +2857,14 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
 	if (ret < 0)
 		goto out;
 
-	if (free) {
-		btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
-				BTRFS_I(inode)->root->objectid,
-				changeset.bytes_changed);
+	if (free)
 		trace_op = QGROUP_FREE;
-	}
 	trace_btrfs_qgroup_release_data(inode, start, len,
 					changeset.bytes_changed, trace_op);
+	if (free)
+		btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
+				BTRFS_I(inode)->root->objectid,
+				changeset.bytes_changed);
 out:
 	ulist_release(&changeset.range_changed);
 	return ret;
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 31e468b16175..fe04d3f295c6 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -230,15 +230,11 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 			 struct btrfs_qgroup_inherit *inherit);
 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 			       u64 ref_root, u64 num_bytes);
-/*
- * TODO: Add proper trace point for it, as btrfs_qgroup_free() is
- * called by everywhere, can't provide good trace for delayed ref case.
- */
 static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
 						 u64 ref_root, u64 num_bytes)
 {
-	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
 	trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
+	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
 }
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-- 
cgit v1.2.3


From 9a33944bdf075ca93062cde206cb25e62044890e Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Wed, 29 Mar 2017 09:33:20 +0800
Subject: btrfs: scrub: Don't append on-disk pages for raid56 scrub

In the following situation, scrub will calculate wrong parity to
overwrite the correct one:

RAID5 full stripe:

Before
|     Dev 1      |     Dev  2     |     Dev 3     |
| Data stripe 1  | Data stripe 2  | Parity Stripe |
--------------------------------------------------- 0
| 0x0000 (Bad)   |     0xcdcd     |     0x0000    |
--------------------------------------------------- 4K
|     0xcdcd     |     0xcdcd     |     0x0000    |
...
|     0xcdcd     |     0xcdcd     |     0x0000    |
--------------------------------------------------- 64K

After scrubbing dev3 only:

|     Dev 1      |     Dev  2     |     Dev 3     |
| Data stripe 1  | Data stripe 2  | Parity Stripe |
--------------------------------------------------- 0
| 0xcdcd (Good)  |     0xcdcd     | 0xcdcd (Bad)  |
--------------------------------------------------- 4K
|     0xcdcd     |     0xcdcd     |     0x0000    |
...
|     0xcdcd     |     0xcdcd     |     0x0000    |
--------------------------------------------------- 64K

The reason is that after raid56 read rebuild rbio->stripe_pages are all
correctly recovered (0xcd for data stripes).

However when we check and repair parity in
scrub_parity_check_and_repair(), we will append pages in sparity->spages
list to rbio->bio_pages[], which contains old on-disk data.

And when we submit parity data to disk, we calculate parity using
rbio->bio_pages[] first, if rbio->bio_pages[] not found, then fallback
to rbio->stripe_pages[].

The patch fix it by not appending pages from sparity->spages.
So finish_parity_scrub() will use rbio->stripe_pages[] which is correct.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index fe1c793877dd..26dbe563fed0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2769,7 +2769,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
 	struct bio *bio;
 	struct btrfs_raid_bio *rbio;
-	struct scrub_page *spage;
 	struct btrfs_bio *bbio = NULL;
 	u64 length;
 	int ret;
@@ -2799,9 +2798,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 	if (!rbio)
 		goto rbio_out;
 
-	list_for_each_entry(spage, &sparity->spages, list)
-		raid56_add_scrub_pages(rbio, spage->page, spage->logical);
-
 	scrub_pending_bio_inc(sctx);
 	raid56_parity_submit_scrub_rbio(rbio);
 	return;
-- 
cgit v1.2.3


From ae6529c35bcc1c65c12131cef2aea63d8e2ea950 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Wed, 29 Mar 2017 09:33:21 +0800
Subject: btrfs: Wait for in-flight bios before freeing target device for
 raid56

When raid56 dev-replace is cancelled by running scrub, we will free
target device without waiting for in-flight bios, causing the following
NULL pointer deference or general protection failure.

 BUG: unable to handle kernel NULL pointer dereference at 00000000000005e0
 IP: generic_make_request_checks+0x4d/0x610
 CPU: 1 PID: 11676 Comm: kworker/u4:14 Tainted: G  O    4.11.0-rc2 #72
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.10.2-20170228_101828-anatol 04/01/2014
 Workqueue: btrfs-endio-raid56 btrfs_endio_raid56_helper [btrfs]
 task: ffff88002875b4c0 task.stack: ffffc90001334000
 RIP: 0010:generic_make_request_checks+0x4d/0x610
 Call Trace:
  ? generic_make_request+0xc7/0x360
  generic_make_request+0x24/0x360
  ? generic_make_request+0xc7/0x360
  submit_bio+0x64/0x120
  ? page_in_rbio+0x4d/0x80 [btrfs]
  ? rbio_orig_end_io+0x80/0x80 [btrfs]
  finish_rmw+0x3f4/0x540 [btrfs]
  validate_rbio_for_rmw+0x36/0x40 [btrfs]
  raid_rmw_end_io+0x7a/0x90 [btrfs]
  bio_endio+0x56/0x60
  end_workqueue_fn+0x3c/0x40 [btrfs]
  btrfs_scrubparity_helper+0xef/0x620 [btrfs]
  btrfs_endio_raid56_helper+0xe/0x10 [btrfs]
  process_one_work+0x2af/0x720
  ? process_one_work+0x22b/0x720
  worker_thread+0x4b/0x4f0
  kthread+0x10f/0x150
  ? process_one_work+0x720/0x720
  ? kthread_create_on_node+0x40/0x40
  ret_from_fork+0x2e/0x40
 RIP: generic_make_request_checks+0x4d/0x610 RSP: ffffc90001337bb8

In btrfs_dev_replace_finishing(), we will call
btrfs_rm_dev_replace_blocked() to wait bios before destroying the target
device when scrub is finished normally.

However when dev-replace is aborted, either due to error or cancelled by
scrub, we didn't wait for bios, this can lead to use-after-free if there
are bios holding the target device.

Furthermore, for raid56 scrub, at least 2 places are calling
btrfs_map_sblock() without protection of bio_counter, leading to the
problem.

This patch fixes the problem:
1) Wait for bio_counter before freeing target device when canceling
   replace
2) When calling btrfs_map_sblock() for raid56, use bio_counter to
   protect the call.

Cc: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c |  2 ++
 fs/btrfs/raid56.c      | 14 ++++++++++++++
 fs/btrfs/scrub.c       |  5 +++++
 3 files changed, 21 insertions(+)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index de7b2c897fe0..32b2ffbb5717 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -546,8 +546,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 		mutex_unlock(&fs_info->chunk_mutex);
 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 		mutex_unlock(&uuid_mutex);
+		btrfs_rm_dev_replace_blocked(fs_info);
 		if (tgt_device)
 			btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+		btrfs_rm_dev_replace_unblocked(fs_info);
 		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 
 		return scrub_ret;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 7bf65d9a9bd3..d8ea0eb76325 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2198,6 +2198,8 @@ static void read_rebuild_work(struct btrfs_work *work)
 /*
  * The following code is used to scrub/replace the parity stripe
  *
+ * Caller must have already increased bio_counter for getting @bbio.
+ *
  * Note: We need make sure all the pages that add into the scrub/replace
  * raid bio are correct and not be changed during the scrub/replace. That
  * is those pages just hold metadata or file data with checksum.
@@ -2235,6 +2237,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	ASSERT(rbio->stripe_npages == stripe_nsectors);
 	bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
 
+	/*
+	 * We have already increased bio_counter when getting bbio, record it
+	 * so we can free it at rbio_orig_end_io().
+	 */
+	rbio->generic_bio_cnt = 1;
+
 	return rbio;
 }
 
@@ -2677,6 +2685,12 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
 		return NULL;
 	}
 
+	/*
+	 * When we get bbio, we have already increased bio_counter, record it
+	 * so we can free it at rbio_orig_end_io()
+	 */
+	rbio->generic_bio_cnt = 1;
+
 	return rbio;
 }
 
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 26dbe563fed0..e9612016325f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2191,6 +2191,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
 	int ret;
 	int i;
 
+	btrfs_bio_counter_inc_blocked(fs_info);
 	ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
 			&length, &bbio);
 	if (ret || !bbio || !bbio->raid_map)
@@ -2235,6 +2236,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
 rbio_out:
 	bio_put(bio);
 bbio_out:
+	btrfs_bio_counter_dec(fs_info);
 	btrfs_put_bbio(bbio);
 	spin_lock(&sctx->stat_lock);
 	sctx->stat.malloc_errors++;
@@ -2778,6 +2780,8 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 		goto out;
 
 	length = sparity->logic_end - sparity->logic_start;
+
+	btrfs_bio_counter_inc_blocked(fs_info);
 	ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
 			       &length, &bbio);
 	if (ret || !bbio || !bbio->raid_map)
@@ -2805,6 +2809,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 rbio_out:
 	bio_put(bio);
 bbio_out:
+	btrfs_bio_counter_dec(fs_info);
 	btrfs_put_bbio(bbio);
 	bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
 		  sparity->nsectors);
-- 
cgit v1.2.3


From e501bfe323356ea3f7ef79d4b0d95389b70a7193 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Wed, 29 Mar 2017 09:33:22 +0800
Subject: btrfs: Prevent scrub recheck from racing with dev replace

scrub_setup_recheck_block() calls btrfs_map_sblock() and then accesses
bbio without protection of bio_counter.

This can lead to use-after-free if racing with dev replace cancel.

Fix it by increasing bio_counter before calling btrfs_map_sblock() and
decreasing the bio_counter when corresponding recover is finished.

Cc: Liu Bo <bo.li.liu@oracle.com>
Reported-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index e9612016325f..c4d1e60e831e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -860,9 +860,11 @@ static inline void scrub_get_recover(struct scrub_recover *recover)
 	refcount_inc(&recover->refs);
 }
 
-static inline void scrub_put_recover(struct scrub_recover *recover)
+static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
+				     struct scrub_recover *recover)
 {
 	if (refcount_dec_and_test(&recover->refs)) {
+		btrfs_bio_counter_dec(fs_info);
 		btrfs_put_bbio(recover->bbio);
 		kfree(recover);
 	}
@@ -1241,7 +1243,7 @@ out:
 				sblock->pagev[page_index]->sblock = NULL;
 				recover = sblock->pagev[page_index]->recover;
 				if (recover) {
-					scrub_put_recover(recover);
+					scrub_put_recover(fs_info, recover);
 					sblock->pagev[page_index]->recover =
 									NULL;
 				}
@@ -1330,16 +1332,19 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 		 * with a length of PAGE_SIZE, each returned stripe
 		 * represents one mirror
 		 */
+		btrfs_bio_counter_inc_blocked(fs_info);
 		ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
 				logical, &mapped_length, &bbio);
 		if (ret || !bbio || mapped_length < sublen) {
 			btrfs_put_bbio(bbio);
+			btrfs_bio_counter_dec(fs_info);
 			return -EIO;
 		}
 
 		recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
 		if (!recover) {
 			btrfs_put_bbio(bbio);
+			btrfs_bio_counter_dec(fs_info);
 			return -ENOMEM;
 		}
 
@@ -1365,7 +1370,7 @@ leave_nomem:
 				spin_lock(&sctx->stat_lock);
 				sctx->stat.malloc_errors++;
 				spin_unlock(&sctx->stat_lock);
-				scrub_put_recover(recover);
+				scrub_put_recover(fs_info, recover);
 				return -ENOMEM;
 			}
 			scrub_page_get(page);
@@ -1407,7 +1412,7 @@ leave_nomem:
 			scrub_get_recover(recover);
 			page->recover = recover;
 		}
-		scrub_put_recover(recover);
+		scrub_put_recover(fs_info, recover);
 		length -= sublen;
 		logical += sublen;
 		page_index++;
-- 
cgit v1.2.3


From 619a974292387343c817f5a36e9df6daeb3ccc60 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 29 Mar 2017 20:48:44 +0200
Subject: btrfs: use clear_page where appropriate

There's a helper to clear whole page, with a arch-specific optimized
code. The replaced cases do not seem to be in performace critical code,
but we still might get some percent gain.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/free-space-cache.c | 2 +-
 fs/btrfs/scrub.c            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index da6841efac26..c5e6180cdb8c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -355,7 +355,7 @@ static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
 	io_ctl->orig = io_ctl->cur;
 	io_ctl->size = PAGE_SIZE;
 	if (clear)
-		memset(io_ctl->cur, 0, PAGE_SIZE);
+		clear_page(io_ctl->cur);
 }
 
 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c4d1e60e831e..48dd6f170c36 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1643,7 +1643,7 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
 	if (spage->io_error) {
 		void *mapped_buffer = kmap_atomic(spage->page);
 
-		memset(mapped_buffer, 0, PAGE_SIZE);
+		clear_page(mapped_buffer);
 		flush_dcache_page(spage->page);
 		kunmap_atomic(mapped_buffer);
 	}
-- 
cgit v1.2.3


From c725328c55443f150577efcf445a7924687342e4 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 29 Mar 2017 10:53:58 -0700
Subject: Btrfs: enable repair during read for raid56 profile

Now that scrub can fix data errors with the help of parity for raid56
profile, repair during read is able to as well.

Although the mirror num in raid56 scenario has different meanings, i.e.
0 or 1: read data directly
> 1:    do recover with parity,
it could be fit into how we repair bad block during read.

The trick is to use BTRFS_MAP_READ instead of BTRFS_MAP_WRITE to get the
device and position on it.

Cc: David Sterba <dsterba@suse.cz>
Tested-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 77bd016ba18b..d8da3edf2ac3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2009,10 +2009,6 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
 	ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
 	BUG_ON(!mirror_num);
 
-	/* we can't repair anything in raid56 yet */
-	if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num))
-		return 0;
-
 	bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
 	if (!bio)
 		return -EIO;
@@ -2025,17 +2021,35 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
 	 * read repair operation.
 	 */
 	btrfs_bio_counter_inc_blocked(fs_info);
-	ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
-			      &map_length, &bbio, mirror_num);
-	if (ret) {
-		btrfs_bio_counter_dec(fs_info);
-		bio_put(bio);
-		return -EIO;
+	if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num)) {
+		/*
+		 * Note that we don't use BTRFS_MAP_WRITE because it's supposed
+		 * to update all raid stripes, but here we just want to correct
+		 * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
+		 * stripe's dev and sector.
+		 */
+		ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
+				      &map_length, &bbio, 0);
+		if (ret) {
+			btrfs_bio_counter_dec(fs_info);
+			bio_put(bio);
+			return -EIO;
+		}
+		ASSERT(bbio->mirror_num == 1);
+	} else {
+		ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
+				      &map_length, &bbio, mirror_num);
+		if (ret) {
+			btrfs_bio_counter_dec(fs_info);
+			bio_put(bio);
+			return -EIO;
+		}
+		BUG_ON(mirror_num != bbio->mirror_num);
 	}
-	BUG_ON(mirror_num != bbio->mirror_num);
-	sector = bbio->stripes[mirror_num-1].physical >> 9;
+
+	sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
 	bio->bi_iter.bi_sector = sector;
-	dev = bbio->stripes[mirror_num-1].dev;
+	dev = bbio->stripes[bbio->mirror_num - 1].dev;
 	btrfs_put_bbio(bbio);
 	if (!dev || !dev->bdev || !dev->writeable) {
 		btrfs_bio_counter_dec(fs_info);
-- 
cgit v1.2.3


From 972d7219398651aaf7ae086b4f17c3416b51c7db Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Mon, 3 Apr 2017 13:45:33 -0700
Subject: Btrfs: update scrub_parity to use u64 stripe_len

Commit 3d8da6781760 ("Btrfs: fix divide error upon chunk's stripe_len")
changed stripe_len in struct map_lookup to u64, but didn't update
stripe_len in struct scrub_parity.

This updates the type and switches to div64_u64_rem to match u64 divisor.

Cc: David Sterba <dsterba@suse.cz>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 48dd6f170c36..58c5864bf709 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -140,7 +140,7 @@ struct scrub_parity {
 
 	int			nsectors;
 
-	int			stripe_len;
+	u64			stripe_len;
 
 	refcount_t		refs;
 
@@ -2396,7 +2396,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
 				       unsigned long *bitmap,
 				       u64 start, u64 len)
 {
-	u32 offset;
+	u64 offset;
 	int nsectors;
 	int sectorsize = sparity->sctx->fs_info->sectorsize;
 
@@ -2406,8 +2406,8 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
 	}
 
 	start -= sparity->logic_start;
-	start = div_u64_rem(start, sparity->stripe_len, &offset);
-	offset /= sectorsize;
+	start = div64_u64_rem(start, sparity->stripe_len, &offset);
+	offset = div_u64(offset, sectorsize);
 	nsectors = (int)len / sectorsize;
 
 	if (offset + nsectors <= sparity->nsectors) {
-- 
cgit v1.2.3


From 42c61ab6760f5f6929ebf5a73b7e32b9aa51fbd5 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Mon, 3 Apr 2017 13:45:24 -0700
Subject: Btrfs: switch to div64_u64 if with a u64 divisor

This is fixing code pieces where we use div_u64 when passing a u64 divisor.

Cc: David Sterba <dsterba@suse.cz>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c | 2 +-
 fs/btrfs/scrub.c       | 4 ++--
 fs/btrfs/volumes.c     | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 32b2ffbb5717..5fe1ca8abc70 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -667,7 +667,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
 		srcdev = dev_replace->srcdev;
-		args->status.progress_1000 = div_u64(dev_replace->cursor_left,
+		args->status.progress_1000 = div64_u64(dev_replace->cursor_left,
 			div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
 		break;
 	}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 58c5864bf709..4786eff53011 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2705,7 +2705,7 @@ static int get_raid56_logic_offset(u64 physical, int num,
 	for (i = 0; i < nr_data_stripes(map); i++) {
 		*offset = last_offset + i * map->stripe_len;
 
-		stripe_nr = div_u64(*offset, map->stripe_len);
+		stripe_nr = div64_u64(*offset, map->stripe_len);
 		stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
 
 		/* Work out the disk rotation on this stripe-set */
@@ -3108,7 +3108,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
 	physical = map->stripes[num].physical;
 	offset = 0;
-	nstripes = div_u64(length, map->stripe_len);
+	nstripes = div64_u64(length, map->stripe_len);
 	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
 		offset = map->stripe_len * num;
 		increment = map->stripe_len * map->num_stripes;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3b3be8eba776..2a4eada25743 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4816,7 +4816,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	stripe_size = div_u64(stripe_size, dev_stripes);
 
 	/* align to BTRFS_STRIPE_LEN */
-	stripe_size = div_u64(stripe_size, raid_stripe_len);
+	stripe_size = div64_u64(stripe_size, raid_stripe_len);
 	stripe_size *= raid_stripe_len;
 
 	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -5361,7 +5361,7 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
 	stripe_offset = offset - stripe_nr * stripe_len;
 
 	stripe_nr_end = round_up(offset + length, map->stripe_len);
-	stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
+	stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
 	stripe_cnt = stripe_nr_end - stripe_nr;
 	stripe_end_offset = stripe_nr_end * map->stripe_len -
 			    (offset + length);
@@ -5801,7 +5801,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		    (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS ||
 		     mirror_num > 1)) {
 			/* push stripe_nr back to the start of the full stripe */
-			stripe_nr = div_u64(raid56_full_stripe_start,
+			stripe_nr = div64_u64(raid56_full_stripe_start,
 					stripe_len * nr_data_stripes(map));
 
 			/* RAID[56] write or recovery. Return all stripes */
@@ -5998,7 +5998,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
 			continue;
 
 		stripe_nr = physical - map->stripes[i].physical;
-		stripe_nr = div_u64(stripe_nr, map->stripe_len);
+		stripe_nr = div64_u64(stripe_nr, map->stripe_len);
 
 		if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
 			stripe_nr = stripe_nr * map->num_stripes + i;
-- 
cgit v1.2.3


From e884f4f06e89616ba32badcc7d87762e39a792e5 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 4 Apr 2017 18:40:19 +0800
Subject: btrfs: use q which is already obtained from bdev_get_queue

We have already assigned q from bdev_get_queue() so use it.
And rearrange the code for better view.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2a4eada25743..6cad3233181c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1013,14 +1013,13 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		q = bdev_get_queue(bdev);
 		if (blk_queue_discard(q))
 			device->can_discard = 1;
+		if (!blk_queue_nonrot(q))
+			fs_devices->rotating = 1;
 
 		device->bdev = bdev;
 		device->in_fs_metadata = 0;
 		device->mode = flags;
 
-		if (!blk_queue_nonrot(bdev_get_queue(bdev)))
-			fs_devices->rotating = 1;
-
 		fs_devices->open_devices++;
 		if (device->writeable &&
 		    device->devid != BTRFS_DEV_REPLACE_DEVID) {
@@ -2422,7 +2421,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	fs_info->free_chunk_space += device->total_bytes;
 	spin_unlock(&fs_info->free_chunk_lock);
 
-	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+	if (!blk_queue_nonrot(q))
 		fs_info->fs_devices->rotating = 1;
 
 	tmp = btrfs_super_total_bytes(fs_info->super_copy);
-- 
cgit v1.2.3


From 82bafb38c2d6bf3b9ab91bde448c08b8154660c1 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Mon, 27 Feb 2017 15:10:33 +0800
Subject: btrfs: qgroup: Fix qgroup corruption caused by inode_cache mount
 option

[BUG]
The easist way to reproduce the bug is:
------
 # mkfs.btrfs -f $dev -n 16K
 # mount $dev $mnt -o inode_cache
 # btrfs quota enable $mnt
 # btrfs quota rescan -w $mnt
 # btrfs qgroup show $mnt
qgroupid         rfer         excl
--------         ----         ----
0/5          32.00KiB     32.00KiB
             ^^ Twice the correct value
------

And fstests/btrfs qgroup test group can easily detect them with
inode_cache mount option.
Although some of them are false alerts since old test cases are using
fixed golden output.
While new test cases will use "btrfs check" to detect qgroup mismatch.

[CAUSE]
Inode_cache mount option will make commit_fs_roots() to call
btrfs_save_ino_cache() to update fs/subvol trees, and generate new
delayed refs.

However we call btrfs_qgroup_prepare_account_extents() too early, before
commit_fs_roots().
This makes the "old_roots" for newly generated extents are always NULL.
For freeing extent case, this makes both new_roots and old_roots to be
empty, while correct old_roots should not be empty.
This causing qgroup numbers not decreased correctly.

[FIX]
Modify the timing of calling btrfs_qgroup_prepare_account_extents() to
just before btrfs_qgroup_account_extents(), and add needed delayed_refs
handler.
So qgroup can handle inode_map mount options correctly.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/transaction.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ee5844855c15..2168654c90a1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2128,13 +2128,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 		goto scrub_continue;
 	}
 
-	/* Reocrd old roots for later qgroup accounting */
-	ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
-	if (ret) {
-		mutex_unlock(&fs_info->reloc_mutex);
-		goto scrub_continue;
-	}
-
 	/*
 	 * make sure none of the code above managed to slip in a
 	 * delayed item
@@ -2176,6 +2169,24 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	 */
 	btrfs_free_log_root_tree(trans, fs_info);
 
+	/*
+	 * commit_fs_roots() can call btrfs_save_ino_cache(), which generates
+	 * new delayed refs. Must handle them or qgroup can be wrong.
+	 */
+	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
+	if (ret) {
+		mutex_unlock(&fs_info->tree_log_mutex);
+		mutex_unlock(&fs_info->reloc_mutex);
+		goto scrub_continue;
+	}
+
+	ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
+	if (ret) {
+		mutex_unlock(&fs_info->tree_log_mutex);
+		mutex_unlock(&fs_info->reloc_mutex);
+		goto scrub_continue;
+	}
+
 	/*
 	 * Since fs roots are all committed, we can get a quite accurate
 	 * new_roots. So let's do quota accounting.
-- 
cgit v1.2.3


From 9986277e0e4ce10fd37bf853fe22ba429a967a45 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 11 Apr 2017 11:57:15 +0300
Subject: Btrfs: handle only applicable errors returned by btrfs_get_extent

btrfs_get_extent() never returns NULL pointers, so this code introduces
a static checker warning.

The btrfs_get_extent() is a bit complex, but trust me that it doesn't
return NULLs and also if it did we would trigger the BUG_ON(!em) before
the last return statement.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
[ updated subject ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c  | 16 ++++------------
 fs/btrfs/inode.c | 25 +++++++++++--------------
 2 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 520cb7230b2d..3cedbfd08a3a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2342,13 +2342,8 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
 	int ret = 0;
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, *start, *len, 0);
-	if (IS_ERR_OR_NULL(em)) {
-		if (!em)
-			ret = -ENOMEM;
-		else
-			ret = PTR_ERR(em);
-		return ret;
-	}
+	if (IS_ERR(em))
+		return PTR_ERR(em);
 
 	/* Hole or vacuum extent(only exists in no-hole mode) */
 	if (em->block_start == EXTENT_MAP_HOLE) {
@@ -2835,11 +2830,8 @@ static long btrfs_fallocate(struct file *file, int mode,
 	while (1) {
 		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
 				      alloc_end - cur_offset, 0);
-		if (IS_ERR_OR_NULL(em)) {
-			if (!em)
-				ret = -ENOMEM;
-			else
-				ret = PTR_ERR(em);
+		if (IS_ERR(em)) {
+			ret = PTR_ERR(em);
 			break;
 		}
 		last_byte = min(extent_map_end(em), alloc_end);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index da71119b5b7c..31759d48e880 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6736,7 +6736,6 @@ static noinline int uncompress_inline(struct btrfs_path *path,
  *
  * This also copies inline extents directly into the page.
  */
-
 struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
 		struct page *page,
 	    size_t pg_offset, u64 start, u64 len,
@@ -7045,19 +7044,17 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
 	em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
 	if (IS_ERR(em))
 		return em;
-	if (em) {
-		/*
-		 * if our em maps to
-		 * -  a hole or
-		 * -  a pre-alloc extent,
-		 * there might actually be delalloc bytes behind it.
-		 */
-		if (em->block_start != EXTENT_MAP_HOLE &&
-		    !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
-			return em;
-		else
-			hole_em = em;
-	}
+	/*
+	 * If our em maps to:
+	 * - a hole or
+	 * - a pre-alloc extent,
+	 * there might actually be delalloc bytes behind it.
+	 */
+	if (em->block_start != EXTENT_MAP_HOLE &&
+	    !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+		return em;
+	else
+		hole_em = em;
 
 	/* check to see if we've wrapped (len == -1 or similar) */
 	end = start + len;
-- 
cgit v1.2.3


From fa7aede2ab5f54352c7aec056930dd17b28f3a78 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Fri, 7 Apr 2017 17:57:05 -0700
Subject: btrfs: Use ktime_get_real_ts for root ctime

btrfs_root_item maintains the ctime for root updates.  This is not part
of vfs_inode.

Since current_time() uses struct inode* as an argument as Linus
suggested, this cannot be used to update root times unless, we modify
the signature to use inode.

Since btrfs uses nanosecond time granularity, it can also use
ktime_get_real_ts directly to obtain timestamp for the root. It is
necessary to use the timespec time api here because the same
btrfs_set_stack_timespec_*() apis are used for vfs inode times as well.
These can be transitioned to using timespec64 when btrfs internally
changes to use timespec64 as well.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: David Sterba <dsterba@suse.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/root-tree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index a08224eab8b4..7d6bc308bf43 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -501,8 +501,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root)
 {
 	struct btrfs_root_item *item = &root->root_item;
-	struct timespec ct = current_fs_time(root->fs_info->sb);
+	struct timespec ct;
 
+	ktime_get_real_ts(&ct);
 	spin_lock(&root->root_item_lock);
 	btrfs_set_root_ctransid(item, trans->transid);
 	btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);
-- 
cgit v1.2.3


From 0966a7b1300f953b04b436aa82486d3d1b17c96d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Fri, 14 Apr 2017 08:35:54 +0800
Subject: btrfs: scrub: Introduce full stripe lock for RAID56

Unlike mirror based profiles, RAID5/6 recovery needs to read out the
whole full stripe.

And if we don't do proper protection, it can easily cause race condition.

Introduce 2 new functions: lock_full_stripe() and unlock_full_stripe()
for RAID5/6.
Which store a rb_tree of mutexes for full stripes, so scrub callers can
use them to lock a full stripe to avoid race.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ minor comment adjustments ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  17 ++++
 fs/btrfs/extent-tree.c |  11 +++
 fs/btrfs/scrub.c       | 223 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 251 insertions(+)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 70631d773669..1e82516fe2d8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -539,6 +539,14 @@ struct btrfs_io_ctl {
 	unsigned check_crcs:1;
 };
 
+/*
+ * Tree to record all locked full stripes of a RAID5/6 block group
+ */
+struct btrfs_full_stripe_locks_tree {
+	struct rb_root root;
+	struct mutex lock;
+};
+
 struct btrfs_block_group_cache {
 	struct btrfs_key key;
 	struct btrfs_block_group_item item;
@@ -649,6 +657,9 @@ struct btrfs_block_group_cache {
 	 * Protected by free_space_lock.
 	 */
 	int needs_free_space;
+
+	/* Record locked full stripes for RAID5/6 block group */
+	struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
 };
 
 /* delayed seq elem */
@@ -3653,6 +3664,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
 			   struct btrfs_device *dev);
 int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
 			 struct btrfs_scrub_progress *progress);
+static inline void btrfs_init_full_stripe_locks_tree(
+			struct btrfs_full_stripe_locks_tree *locks_root)
+{
+	locks_root->root = RB_ROOT;
+	mutex_init(&locks_root->lock);
+}
 
 /* dev-replace.c */
 void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e870e60e33bc..e390451c72e6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -131,6 +131,16 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
 	if (atomic_dec_and_test(&cache->count)) {
 		WARN_ON(cache->pinned > 0);
 		WARN_ON(cache->reserved > 0);
+
+		/*
+		 * If not empty, someone is still holding mutex of
+		 * full_stripe_lock, which can only be released by caller.
+		 * And it will definitely cause use-after-free when caller
+		 * tries to release full stripe lock.
+		 *
+		 * No better way to resolve, but only to warn.
+		 */
+		WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
 		kfree(cache->free_space_ctl);
 		kfree(cache);
 	}
@@ -9917,6 +9927,7 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
 	btrfs_init_free_space_ctl(cache);
 	atomic_set(&cache->trimming, 0);
 	mutex_init(&cache->free_space_lock);
+	btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
 
 	return cache;
 }
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 4786eff53011..34160d350c77 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -240,6 +240,13 @@ struct scrub_warning {
 	struct btrfs_device	*dev;
 };
 
+struct full_stripe_lock {
+	struct rb_node node;
+	u64 logical;
+	u64 refs;
+	struct mutex mutex;
+};
+
 static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
 static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
 static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
@@ -348,6 +355,222 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
 	scrub_pause_off(fs_info);
 }
 
+/*
+ * Insert new full stripe lock into full stripe locks tree
+ *
+ * Return pointer to existing or newly inserted full_stripe_lock structure if
+ * everything works well.
+ * Return ERR_PTR(-ENOMEM) if we failed to allocate memory
+ *
+ * NOTE: caller must hold full_stripe_locks_root->lock before calling this
+ * function
+ */
+static struct full_stripe_lock *insert_full_stripe_lock(
+		struct btrfs_full_stripe_locks_tree *locks_root,
+		u64 fstripe_logical)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct full_stripe_lock *entry;
+	struct full_stripe_lock *ret;
+
+	WARN_ON(!mutex_is_locked(&locks_root->lock));
+
+	p = &locks_root->root.rb_node;
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct full_stripe_lock, node);
+		if (fstripe_logical < entry->logical) {
+			p = &(*p)->rb_left;
+		} else if (fstripe_logical > entry->logical) {
+			p = &(*p)->rb_right;
+		} else {
+			entry->refs++;
+			return entry;
+		}
+	}
+
+	/* Insert new lock */
+	ret = kmalloc(sizeof(*ret), GFP_KERNEL);
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+	ret->logical = fstripe_logical;
+	ret->refs = 1;
+	mutex_init(&ret->mutex);
+
+	rb_link_node(&ret->node, parent, p);
+	rb_insert_color(&ret->node, &locks_root->root);
+	return ret;
+}
+
+/*
+ * Search for a full stripe lock of a block group
+ *
+ * Return pointer to existing full stripe lock if found
+ * Return NULL if not found
+ */
+static struct full_stripe_lock *search_full_stripe_lock(
+		struct btrfs_full_stripe_locks_tree *locks_root,
+		u64 fstripe_logical)
+{
+	struct rb_node *node;
+	struct full_stripe_lock *entry;
+
+	WARN_ON(!mutex_is_locked(&locks_root->lock));
+
+	node = locks_root->root.rb_node;
+	while (node) {
+		entry = rb_entry(node, struct full_stripe_lock, node);
+		if (fstripe_logical < entry->logical)
+			node = node->rb_left;
+		else if (fstripe_logical > entry->logical)
+			node = node->rb_right;
+		else
+			return entry;
+	}
+	return NULL;
+}
+
+/*
+ * Helper to get full stripe logical from a normal bytenr.
+ *
+ * Caller must ensure @cache is a RAID56 block group.
+ */
+static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
+				   u64 bytenr)
+{
+	u64 ret;
+
+	/*
+	 * Due to chunk item size limit, full stripe length should not be
+	 * larger than U32_MAX. Just a sanity check here.
+	 */
+	WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
+
+	/*
+	 * round_down() can only handle power of 2, while RAID56 full
+	 * stripe length can be 64KiB * n, so we need to manually round down.
+	 */
+	ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
+		cache->full_stripe_len + cache->key.objectid;
+	return ret;
+}
+
+/*
+ * Lock a full stripe to avoid concurrency of recovery and read
+ *
+ * It's only used for profiles with parities (RAID5/6), for other profiles it
+ * does nothing.
+ *
+ * Return 0 if we locked full stripe covering @bytenr, with a mutex held.
+ * So caller must call unlock_full_stripe() at the same context.
+ *
+ * Return <0 if encounters error.
+ */
+static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
+			    bool *locked_ret)
+{
+	struct btrfs_block_group_cache *bg_cache;
+	struct btrfs_full_stripe_locks_tree *locks_root;
+	struct full_stripe_lock *existing;
+	u64 fstripe_start;
+	int ret = 0;
+
+	*locked_ret = false;
+	bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
+	if (!bg_cache) {
+		ASSERT(0);
+		return -ENOENT;
+	}
+
+	/* Profiles not based on parity don't need full stripe lock */
+	if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
+		goto out;
+	locks_root = &bg_cache->full_stripe_locks_root;
+
+	fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
+
+	/* Now insert the full stripe lock */
+	mutex_lock(&locks_root->lock);
+	existing = insert_full_stripe_lock(locks_root, fstripe_start);
+	mutex_unlock(&locks_root->lock);
+	if (IS_ERR(existing)) {
+		ret = PTR_ERR(existing);
+		goto out;
+	}
+	mutex_lock(&existing->mutex);
+	*locked_ret = true;
+out:
+	btrfs_put_block_group(bg_cache);
+	return ret;
+}
+
+/*
+ * Unlock a full stripe.
+ *
+ * NOTE: Caller must ensure it's the same context calling corresponding
+ * lock_full_stripe().
+ *
+ * Return 0 if we unlock full stripe without problem.
+ * Return <0 for error
+ */
+static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
+			      bool locked)
+{
+	struct btrfs_block_group_cache *bg_cache;
+	struct btrfs_full_stripe_locks_tree *locks_root;
+	struct full_stripe_lock *fstripe_lock;
+	u64 fstripe_start;
+	bool freeit = false;
+	int ret = 0;
+
+	/* If we didn't acquire full stripe lock, no need to continue */
+	if (!locked)
+		return 0;
+
+	bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
+	if (!bg_cache) {
+		ASSERT(0);
+		return -ENOENT;
+	}
+	if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
+		goto out;
+
+	locks_root = &bg_cache->full_stripe_locks_root;
+	fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
+
+	mutex_lock(&locks_root->lock);
+	fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
+	/* Unpaired unlock_full_stripe() detected */
+	if (!fstripe_lock) {
+		WARN_ON(1);
+		ret = -ENOENT;
+		mutex_unlock(&locks_root->lock);
+		goto out;
+	}
+
+	if (fstripe_lock->refs == 0) {
+		WARN_ON(1);
+		btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
+			fstripe_lock->logical);
+	} else {
+		fstripe_lock->refs--;
+	}
+
+	if (fstripe_lock->refs == 0) {
+		rb_erase(&fstripe_lock->node, &locks_root->root);
+		freeit = true;
+	}
+	mutex_unlock(&locks_root->lock);
+
+	mutex_unlock(&fstripe_lock->mutex);
+	if (freeit)
+		kfree(fstripe_lock);
+out:
+	btrfs_put_block_group(bg_cache);
+	return ret;
+}
+
 /*
  * used for workers that require transaction commits (i.e., for the
  * NOCOW case)
-- 
cgit v1.2.3


From 28d70e237dac905cd8d1896af10216b7d2bced24 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Fri, 14 Apr 2017 08:35:55 +0800
Subject: btrfs: scrub: Fix RAID56 recovery race condition

When scrubbing a RAID5 which has recoverable data corruption (only one
data stripe is corrupted), sometimes scrub will report more csum errors
than expected. Sometimes even unrecoverable error will be reported.

The problem can be easily reproduced by the following steps:
1) Create a btrfs with RAID5 data profile with 3 devs
2) Mount it with nospace_cache or space_cache=v2
   To avoid extra data space usage.
3) Create a 128K file and sync the fs, unmount it
   Now the 128K file lies at the beginning of the data chunk
4) Locate the physical bytenr of data chunk on dev3
   Dev3 is the 1st data stripe.
5) Corrupt the first 64K of the data chunk stripe on dev3
6) Mount the fs and scrub it

The correct csum error number should be 16 (assuming using x86_64).
Larger csum error number can be reported in a 1/3 chance.
And unrecoverable error can also be reported in a 1/10 chance.

The root cause of the problem is RAID5/6 recover code has race
condition, due to the fact that full scrub is initiated per device.

While for other mirror based profiles, each mirror is independent with
each other, so race won't cause any big problem.

For example:
        Corrupted       |       Correct          |      Correct        |
|   Scrub dev3 (D1)     |    Scrub dev2 (D2)     |    Scrub dev1(P)    |
------------------------------------------------------------------------
Read out D1             |Read out D2             |Read full stripe     |
Check csum              |Check csum              |Check parity         |
Csum mismatch           |Csum match, continue    |Parity mismatch      |
handle_errored_block    |                        |handle_errored_block |
 Read out full stripe   |                        | Read out full stripe|
 D1 csum error(err++)   |                        | D1 csum error(err++)|
 Recover D1             |                        | Recover D1          |

So D1's csum error is accounted twice, just because
handle_errored_block() doesn't have enough protection, and race can happen.

On even worse case, for example D1's recovery code is re-writing
D1/D2/P, and P's recovery code is just reading out full stripe, then we
can cause unrecoverable error.

This patch will use previously introduced lock_full_stripe() and
unlock_full_stripe() to protect the whole scrub_handle_errored_block()
function for RAID56 recovery.
So no extra csum error nor unrecoverable error.

Reported-by: Goffredo Baroncelli <kreijack@libero.it>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 34160d350c77..c7b45eb2403d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1117,6 +1117,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 	int mirror_index;
 	int page_num;
 	int success;
+	bool full_stripe_locked;
 	static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
 				      DEFAULT_RATELIMIT_BURST);
 
@@ -1142,6 +1143,24 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 	have_csum = sblock_to_check->pagev[0]->have_csum;
 	dev = sblock_to_check->pagev[0]->dev;
 
+	/*
+	 * For RAID5/6, race can happen for a different device scrub thread.
+	 * For data corruption, Parity and Data threads will both try
+	 * to recovery the data.
+	 * Race can lead to doubly added csum error, or even unrecoverable
+	 * error.
+	 */
+	ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
+	if (ret < 0) {
+		spin_lock(&sctx->stat_lock);
+		if (ret == -ENOMEM)
+			sctx->stat.malloc_errors++;
+		sctx->stat.read_errors++;
+		sctx->stat.uncorrectable_errors++;
+		spin_unlock(&sctx->stat_lock);
+		return ret;
+	}
+
 	if (sctx->is_dev_replace && !is_metadata && !have_csum) {
 		sblocks_for_recheck = NULL;
 		goto nodatasum_case;
@@ -1476,6 +1495,9 @@ out:
 		kfree(sblocks_for_recheck);
 	}
 
+	ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
+	if (ret < 0)
+		return ret;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 13e88e1560d6014838e2dd9f8b9cf8ec9a8d86e6 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Thu, 6 Apr 2017 11:22:52 +0800
Subject: btrfs: delete unused member nobarriers

The last consumer of nobarriers is removed by the commit [1] and sync
won't fail with EOPNOTSUPP anymore. Thus, now when write cache is write
through it just return success without actually transpiring such a
request to the block device/lun.

[1]
commit b25de9d6da49b1a8760a89672283128aa8c78345
block: remove BIO_EOPNOTSUPP

And, as the device/lun write cache state may change dynamically saving
such as state won't help either. So deleting the member nobarriers.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 3 ---
 fs/btrfs/volumes.h | 1 -
 2 files changed, 4 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a9314fe494c9..e070e463ad8b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3521,9 +3521,6 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 	struct bio *bio;
 	int ret = 0;
 
-	if (device->nobarriers)
-		return 0;
-
 	if (wait) {
 		bio = device->flush_bio;
 		if (!bio)
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index def39b5a8d9e..c7d0fbc915ca 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -123,7 +123,6 @@ struct btrfs_device {
 	struct list_head resized_list;
 
 	/* for sending down flush barriers */
-	int nobarriers;
 	struct bio *flush_bio;
 	struct completion flush_wait;
 
-- 
cgit v1.2.3


From c2a9c7ab475bc3aaf06521a39ac65bc48c8cad4f Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Thu, 6 Apr 2017 11:22:53 +0800
Subject: btrfs: check if the device is flush capable

The block layer call chain from submit_bio will check if the write cache
is enabled for the given queue before submitting the flush. This will
add a code to fail fast if its not.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ updated changelog to reflect current code stat, blkdev_issue_flush is
  not used yet ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e070e463ad8b..ba7bd65693a3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3518,9 +3518,13 @@ static void btrfs_end_empty_barrier(struct bio *bio)
  */
 static int write_dev_flush(struct btrfs_device *device, int wait)
 {
+	struct request_queue *q = bdev_get_queue(device->bdev);
 	struct bio *bio;
 	int ret = 0;
 
+	if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+		return 0;
+
 	if (wait) {
 		bio = device->flush_bio;
 		if (!bio)
-- 
cgit v1.2.3


From 90ad4052e85cece1bbae064ff4b14088de35df29 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 14 Apr 2017 15:17:26 +0900
Subject: kbuild: avoid conflict between -ffunction-sections and -pg on gcc-4.7

Arnd Bergmann reported:
  "When ftrace is enabled and we build with gcc-4.7 or older, we
  get a warning for each file on architectures that select
  CONFIG_LD_DEAD_CODE_DATA_ELIMINATION:

  warning: -ffunction-sections disabled; it makes profiling impossible [enabled by default]
  "

Since commit c3f0d0bc5b01 ("kbuild, LLVMLinux: Add -Werror to
cc-option to support clang"), warnings are treated as errors in
cc-option checks.  CC_FLAGS_FTRACE is blindly added to KBUILD_CFLAGS,
so $(call cc-option,-ffunction-sections,) should be moved below it
in order to detect the conflict between the two options.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index d26618608633..245852f7b312 100644
--- a/Makefile
+++ b/Makefile
@@ -632,11 +632,6 @@ include arch/$(SRCARCH)/Makefile
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 
-ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
-KBUILD_CFLAGS	+= $(call cc-option,-ffunction-sections,)
-KBUILD_CFLAGS	+= $(call cc-option,-fdata-sections,)
-endif
-
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= $(call cc-option,-Oz,-Os)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
@@ -768,6 +763,11 @@ ifdef CONFIG_DEBUG_SECTION_MISMATCH
 KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
 endif
 
+ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+KBUILD_CFLAGS	+= $(call cc-option,-ffunction-sections,)
+KBUILD_CFLAGS	+= $(call cc-option,-fdata-sections,)
+endif
+
 # arch Makefile may override CC so keep this after arch Makefile is included
 NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
 CHECKFLAGS     += $(NOSTDINC_FLAGS)
-- 
cgit v1.2.3


From 4e8c2eb54327a6f8b0ef6d6afb28ab24b721dbe0 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 11:04:03 +0200
Subject: fuse: convert fuse_file.count from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/file.c   | 8 ++++----
 fs/fuse/fuse_i.h | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ec238fb5a584..70770a0db734 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -58,7 +58,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 	}
 
 	INIT_LIST_HEAD(&ff->write_entry);
-	atomic_set(&ff->count, 1);
+	refcount_set(&ff->count, 1);
 	RB_CLEAR_NODE(&ff->polled_node);
 	init_waitqueue_head(&ff->poll_wait);
 
@@ -77,7 +77,7 @@ void fuse_file_free(struct fuse_file *ff)
 
 static struct fuse_file *fuse_file_get(struct fuse_file *ff)
 {
-	atomic_inc(&ff->count);
+	refcount_inc(&ff->count);
 	return ff;
 }
 
@@ -88,7 +88,7 @@ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 
 static void fuse_file_put(struct fuse_file *ff, bool sync)
 {
-	if (atomic_dec_and_test(&ff->count)) {
+	if (refcount_dec_and_test(&ff->count)) {
 		struct fuse_req *req = ff->reserved_req;
 
 		if (ff->fc->no_open) {
@@ -293,7 +293,7 @@ static int fuse_release(struct inode *inode, struct file *file)
 
 void fuse_sync_release(struct fuse_file *ff, int flags)
 {
-	WARN_ON(atomic_read(&ff->count) != 1);
+	WARN_ON(refcount_read(&ff->count) > 1);
 	fuse_prepare_release(ff, flags, FUSE_RELEASE);
 	/*
 	 * iput(NULL) is a no-op and since the refcount is 1 and everything's
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 32ac2c9b09c0..1d6d67e64f49 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -24,6 +24,7 @@
 #include <linux/workqueue.h>
 #include <linux/kref.h>
 #include <linux/xattr.h>
+#include <linux/refcount.h>
 
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
@@ -137,7 +138,7 @@ struct fuse_file {
 	u64 nodeid;
 
 	/** Refcount */
-	atomic_t count;
+	refcount_t count;
 
 	/** FOPEN_* flags returned by open */
 	u32 open_flags;
-- 
cgit v1.2.3


From ec99f6d31f2590a4c0ff2dae8fb1fa27f0647a42 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 11:04:04 +0200
Subject: fuse: convert fuse_req.count from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dev.c    | 9 ++++-----
 fs/fuse/fuse_i.h | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index b681b43c766e..5e815072be1b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -45,7 +45,7 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages,
 	INIT_LIST_HEAD(&req->list);
 	INIT_LIST_HEAD(&req->intr_entry);
 	init_waitqueue_head(&req->waitq);
-	atomic_set(&req->count, 1);
+	refcount_set(&req->count, 1);
 	req->pages = pages;
 	req->page_descs = page_descs;
 	req->max_pages = npages;
@@ -102,14 +102,13 @@ void fuse_request_free(struct fuse_req *req)
 
 void __fuse_get_request(struct fuse_req *req)
 {
-	atomic_inc(&req->count);
+	refcount_inc(&req->count);
 }
 
 /* Must be called with > 1 refcount */
 static void __fuse_put_request(struct fuse_req *req)
 {
-	BUG_ON(atomic_read(&req->count) < 2);
-	atomic_dec(&req->count);
+	refcount_dec(&req->count);
 }
 
 static void fuse_req_init_context(struct fuse_req *req)
@@ -264,7 +263,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
 
 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 {
-	if (atomic_dec_and_test(&req->count)) {
+	if (refcount_dec_and_test(&req->count)) {
 		if (test_bit(FR_BACKGROUND, &req->flags)) {
 			/*
 			 * We get here in the unlikely case that a background
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1d6d67e64f49..9d4374032290 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -307,7 +307,7 @@ struct fuse_req {
 	struct list_head intr_entry;
 
 	/** refcount */
-	atomic_t count;
+	refcount_t count;
 
 	/** Unique ID for the interrupt request */
 	u64 intr_unique;
-- 
cgit v1.2.3


From 095fc40ace5ffccd306f39fdd1a40b4faa41b8a0 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 11:04:05 +0200
Subject: fuse: convert fuse_conn.count from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/fuse_i.h | 2 +-
 fs/fuse/inode.c  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 9d4374032290..6c649f0c58f9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -449,7 +449,7 @@ struct fuse_conn {
 	spinlock_t lock;
 
 	/** Refcount */
-	atomic_t count;
+	refcount_t count;
 
 	/** Number of fuse_dev's */
 	atomic_t dev_count;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6fe6a88ecb4a..3961c5f886be 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -608,7 +608,7 @@ void fuse_conn_init(struct fuse_conn *fc)
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
 	init_rwsem(&fc->killsb);
-	atomic_set(&fc->count, 1);
+	refcount_set(&fc->count, 1);
 	atomic_set(&fc->dev_count, 1);
 	init_waitqueue_head(&fc->blocked_waitq);
 	init_waitqueue_head(&fc->reserved_req_waitq);
@@ -631,7 +631,7 @@ EXPORT_SYMBOL_GPL(fuse_conn_init);
 
 void fuse_conn_put(struct fuse_conn *fc)
 {
-	if (atomic_dec_and_test(&fc->count)) {
+	if (refcount_dec_and_test(&fc->count)) {
 		if (fc->destroy_req)
 			fuse_request_free(fc->destroy_req);
 		fc->release(fc);
@@ -641,7 +641,7 @@ EXPORT_SYMBOL_GPL(fuse_conn_put);
 
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
 {
-	atomic_inc(&fc->count);
+	refcount_inc(&fc->count);
 	return fc;
 }
 EXPORT_SYMBOL_GPL(fuse_conn_get);
-- 
cgit v1.2.3


From 0b6e9ea041e6c932f5b3a86fae2d60cbcfad4dd2 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Wed, 2 Jul 2014 16:29:19 -0500
Subject: fuse: Add support for pid namespaces

When the userspace process servicing fuse requests is running in
a pid namespace then pids passed via the fuse fd are not being
translated into that process' namespace. Translation is necessary
for the pid to be useful to that process.

Since no use case currently exists for changing namespaces all
translations can be done relative to the pid namespace in use
when fuse_conn_init() is called. For fuse this translates to
mount time, and for cuse this is when /dev/cuse is opened. IO for
this connection from another namespace will return errors.

Requests from processes whose pid cannot be translated into the
target namespace will have a value of 0 for in.h.pid.

File locking changes based on previous work done by Eric
Biederman.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dev.c    | 15 +++++++++++----
 fs/fuse/file.c   | 22 +++++++++++++++++-----
 fs/fuse/fuse_i.h |  4 ++++
 fs/fuse/inode.c  |  3 +++
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5e815072be1b..f95682115bc2 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -20,6 +20,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/swap.h>
 #include <linux/splice.h>
+#include <linux/sched.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
@@ -111,11 +112,11 @@ static void __fuse_put_request(struct fuse_req *req)
 	refcount_dec(&req->count);
 }
 
-static void fuse_req_init_context(struct fuse_req *req)
+static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req)
 {
 	req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
 	req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
-	req->in.h.pid = current->pid;
+	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
 }
 
 void fuse_set_initialized(struct fuse_conn *fc)
@@ -162,7 +163,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
 		goto out;
 	}
 
-	fuse_req_init_context(req);
+	fuse_req_init_context(fc, req);
 	__set_bit(FR_WAITING, &req->flags);
 	if (for_background)
 		__set_bit(FR_BACKGROUND, &req->flags);
@@ -255,7 +256,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
 	if (!req)
 		req = get_reserved_req(fc, file);
 
-	fuse_req_init_context(req);
+	fuse_req_init_context(fc, req);
 	__set_bit(FR_WAITING, &req->flags);
 	__clear_bit(FR_BACKGROUND, &req->flags);
 	return req;
@@ -1222,6 +1223,9 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
 	struct fuse_in *in;
 	unsigned reqsize;
 
+	if (task_active_pid_ns(current) != fc->pid_ns)
+		return -EIO;
+
  restart:
 	spin_lock(&fiq->waitq.lock);
 	err = -EAGAIN;
@@ -1820,6 +1824,9 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
 	struct fuse_req *req;
 	struct fuse_out_header oh;
 
+	if (task_active_pid_ns(current) != fc->pid_ns)
+		return -EIO;
+
 	if (nbytes < sizeof(struct fuse_out_header))
 		return -EINVAL;
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 70770a0db734..aa93f09ae6e6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2083,7 +2083,8 @@ static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
 	return generic_file_mmap(file, vma);
 }
 
-static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
+static int convert_fuse_file_lock(struct fuse_conn *fc,
+				  const struct fuse_file_lock *ffl,
 				  struct file_lock *fl)
 {
 	switch (ffl->type) {
@@ -2098,7 +2099,14 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
 
 		fl->fl_start = ffl->start;
 		fl->fl_end = ffl->end;
-		fl->fl_pid = ffl->pid;
+
+		/*
+		 * Convert pid into the caller's pid namespace. If the pid
+		 * does not map into the namespace fl_pid will get set to 0.
+		 */
+		rcu_read_lock();
+		fl->fl_pid = pid_vnr(find_pid_ns(ffl->pid, fc->pid_ns));
+		rcu_read_unlock();
 		break;
 
 	default:
@@ -2147,7 +2155,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
 	args.out.args[0].value = &outarg;
 	err = fuse_simple_request(fc, &args);
 	if (!err)
-		err = convert_fuse_file_lock(&outarg.lk, fl);
+		err = convert_fuse_file_lock(fc, &outarg.lk, fl);
 
 	return err;
 }
@@ -2159,7 +2167,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 	FUSE_ARGS(args);
 	struct fuse_lk_in inarg;
 	int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
-	pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
+	struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
+	pid_t pid_nr = pid_nr_ns(pid, fc->pid_ns);
 	int err;
 
 	if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
@@ -2171,7 +2180,10 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 	if (fl->fl_flags & FL_CLOSE)
 		return 0;
 
-	fuse_lk_fill(&args, file, fl, opcode, pid, flock, &inarg);
+	if (pid && pid_nr == 0)
+		return -EOVERFLOW;
+
+	fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
 	err = fuse_simple_request(fc, &args);
 
 	/* locking is restartable */
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6c649f0c58f9..041521d29d33 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -24,6 +24,7 @@
 #include <linux/workqueue.h>
 #include <linux/kref.h>
 #include <linux/xattr.h>
+#include <linux/pid_namespace.h>
 #include <linux/refcount.h>
 
 /** Max number of pages that can be used in a single read request */
@@ -462,6 +463,9 @@ struct fuse_conn {
 	/** The group id for this mount */
 	kgid_t group_id;
 
+	/** The pid namespace for this mount */
+	struct pid_namespace *pid_ns;
+
 	/** Maximum read size */
 	unsigned max_read;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3961c5f886be..a137f1c99a33 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/exportfs.h>
 #include <linux/posix_acl.h>
+#include <linux/pid_namespace.h>
 
 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -626,6 +627,7 @@ void fuse_conn_init(struct fuse_conn *fc)
 	fc->connected = 1;
 	fc->attr_version = 1;
 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
@@ -634,6 +636,7 @@ void fuse_conn_put(struct fuse_conn *fc)
 	if (refcount_dec_and_test(&fc->count)) {
 		if (fc->destroy_req)
 			fuse_request_free(fc->destroy_req);
+		put_pid_ns(fc->pid_ns);
 		fc->release(fc);
 	}
 }
-- 
cgit v1.2.3


From 20c994e4d806e0c656b5fe0d14b17896b73a086c Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Mon, 3 Apr 2017 12:46:36 -0700
Subject: frv: Use OFFSET macro in DEF_*REG()

Avoid code duplication by using OFFSET() in DEF_*REG() instead of
replicating the macro.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/frv/kernel/asm-offsets.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/arch/frv/kernel/asm-offsets.c b/arch/frv/kernel/asm-offsets.c
index 8414293f213a..20c5b79b55f9 100644
--- a/arch/frv/kernel/asm-offsets.c
+++ b/arch/frv/kernel/asm-offsets.c
@@ -14,21 +14,10 @@
 #include <asm/thread_info.h>
 #include <asm/gdb-stub.h>
 
-#define DEF_PTREG(sym, reg) \
-        asm volatile("\n->" #sym " %0 offsetof(struct pt_regs, " #reg ")" \
-		     : : "i" (offsetof(struct pt_regs, reg)))
-
-#define DEF_IREG(sym, reg) \
-        asm volatile("\n->" #sym " %0 offsetof(struct user_context, " #reg ")" \
-		     : : "i" (offsetof(struct user_context, reg)))
-
-#define DEF_FREG(sym, reg) \
-        asm volatile("\n->" #sym " %0 offsetof(struct user_context, " #reg ")" \
-		     : : "i" (offsetof(struct user_context, reg)))
-
-#define DEF_0REG(sym, reg) \
-        asm volatile("\n->" #sym " %0 offsetof(struct frv_frame0, " #reg ")" \
-		     : : "i" (offsetof(struct frv_frame0, reg)))
+#define DEF_PTREG(sym, reg) OFFSET(sym, pt_regs, reg)
+#define DEF_IREG(sym, reg) OFFSET(sym, user_context, reg)
+#define DEF_FREG(sym, reg) OFFSET(sym, user_context, reg)
+#define DEF_0REG(sym, reg) OFFSET(sym, frv_frame0, reg)
 
 void foo(void)
 {
-- 
cgit v1.2.3


From b8c17e6664c461e4aed545a943304c3b32dd309c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 Nov 2016 14:25:21 -0800
Subject: rcu: Maintain special bits at bottom of ->dynticks counter

Currently, IPIs are used to force other CPUs to invalidate their TLBs
in response to a kernel virtual-memory mapping change.  This works, but
degrades both battery lifetime (for idle CPUs) and real-time response
(for nohz_full CPUs), and in addition results in unnecessary IPIs due to
the fact that CPUs executing in usermode are unaffected by stale kernel
mappings.  It would be better to cause a CPU executing in usermode to
wait until it is entering kernel mode to do the flush, first to avoid
interrupting usemode tasks and second to handle multiple flush requests
with a single flush in the case of a long-running user task.

This commit therefore reserves a bit at the bottom of the ->dynticks
counter, which is checked upon exit from extended quiescent states.
If it is set, it is cleared and then a new rcu_eqs_special_exit() macro is
invoked, which, if not supplied, is an empty single-pass do-while loop.
If this bottom bit is set on -entry- to an extended quiescent state,
then a WARN_ON_ONCE() triggers.

This bottom bit may be set using a new rcu_eqs_special_set() function,
which returns true if the bit was set, or false if the CPU turned
out to not be in an extended quiescent state.  Please note that this
function refuses to set the bit for a non-nohz_full CPU when that CPU
is executing in usermode because usermode execution is tracked by RCU
as a dyntick-idle extended quiescent state only for nohz_full CPUs.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcutiny.h |  5 ++++
 kernel/rcu/tree.c       | 77 +++++++++++++++++++++++++++++++++++++++----------
 kernel/rcu/tree.h       |  1 +
 3 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index b452953e21c8..6c9d941e3962 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -33,6 +33,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 	return 0;
 }
 
+static inline bool rcu_eqs_special_set(int cpu)
+{
+	return false;  /* Never flag non-existent other CPUs! */
+}
+
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 50fee7689e71..0efad311ded4 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -274,9 +274,19 @@ void rcu_bh_qs(void)
 
 static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
 
+/*
+ * Steal a bit from the bottom of ->dynticks for idle entry/exit
+ * control.  Initially this is for TLB flushing.
+ */
+#define RCU_DYNTICK_CTRL_MASK 0x1
+#define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)
+#ifndef rcu_eqs_special_exit
+#define rcu_eqs_special_exit() do { } while (0)
+#endif
+
 static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
-	.dynticks = ATOMIC_INIT(1),
+	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 	.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
 	.dynticks_idle = ATOMIC_INIT(1),
@@ -290,15 +300,20 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 static void rcu_dynticks_eqs_enter(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-	int special;
+	int seq;
 
 	/*
-	 * CPUs seeing atomic_inc_return() must see prior RCU read-side
+	 * CPUs seeing atomic_add_return() must see prior RCU read-side
 	 * critical sections, and we also must force ordering with the
 	 * next idle sojourn.
 	 */
-	special = atomic_inc_return(&rdtp->dynticks);
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && special & 0x1);
+	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
+	/* Better be in an extended quiescent state! */
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+		     (seq & RCU_DYNTICK_CTRL_CTR));
+	/* Better not have special action (TLB flush) pending! */
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+		     (seq & RCU_DYNTICK_CTRL_MASK));
 }
 
 /*
@@ -308,15 +323,22 @@ static void rcu_dynticks_eqs_enter(void)
 static void rcu_dynticks_eqs_exit(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-	int special;
+	int seq;
 
 	/*
-	 * CPUs seeing atomic_inc_return() must see prior idle sojourns,
+	 * CPUs seeing atomic_add_return() must see prior idle sojourns,
 	 * and we also must force ordering with the next RCU read-side
 	 * critical section.
 	 */
-	special = atomic_inc_return(&rdtp->dynticks);
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1));
+	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+		     !(seq & RCU_DYNTICK_CTRL_CTR));
+	if (seq & RCU_DYNTICK_CTRL_MASK) {
+		atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);
+		smp_mb__after_atomic(); /* _exit after clearing mask. */
+		/* Prefer duplicate flushes to losing a flush. */
+		rcu_eqs_special_exit();
+	}
 }
 
 /*
@@ -333,9 +355,9 @@ static void rcu_dynticks_eqs_online(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
-	if (atomic_read(&rdtp->dynticks) & 0x1)
+	if (atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR)
 		return;
-	atomic_add(0x1, &rdtp->dynticks);
+	atomic_add(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
 }
 
 /*
@@ -347,7 +369,7 @@ bool rcu_dynticks_curr_cpu_in_eqs(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
-	return !(atomic_read(&rdtp->dynticks) & 0x1);
+	return !(atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR);
 }
 
 /*
@@ -358,7 +380,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 {
 	int snap = atomic_add_return(0, &rdtp->dynticks);
 
-	return snap;
+	return snap & ~RCU_DYNTICK_CTRL_MASK;
 }
 
 /*
@@ -367,7 +389,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
  */
 static bool rcu_dynticks_in_eqs(int snap)
 {
-	return !(snap & 0x1);
+	return !(snap & RCU_DYNTICK_CTRL_CTR);
 }
 
 /*
@@ -387,10 +409,33 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap)
 static void rcu_dynticks_momentary_idle(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-	int special = atomic_add_return(2, &rdtp->dynticks);
+	int special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
+					&rdtp->dynticks);
 
 	/* It is illegal to call this from idle state. */
-	WARN_ON_ONCE(!(special & 0x1));
+	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
+}
+
+/*
+ * Set the special (bottom) bit of the specified CPU so that it
+ * will take special action (such as flushing its TLB) on the
+ * next exit from an extended quiescent state.  Returns true if
+ * the bit was successfully set, or false if the CPU was not in
+ * an extended quiescent state.
+ */
+bool rcu_eqs_special_set(int cpu)
+{
+	int old;
+	int new;
+	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+	do {
+		old = atomic_read(&rdtp->dynticks);
+		if (old & RCU_DYNTICK_CTRL_CTR)
+			return false;
+		new = old | RCU_DYNTICK_CTRL_MASK;
+	} while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);
+	return true;
 }
 
 DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index ec62a05bfdb3..7468b4de7e0c 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -596,6 +596,7 @@ extern struct rcu_state rcu_preempt_state;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 
 int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
+bool rcu_eqs_special_set(int cpu);
 
 #ifdef CONFIG_RCU_BOOST
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
-- 
cgit v1.2.3


From 77e5849688670280b173bb9e0544e9da7b2acc36 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 14 Jan 2017 13:32:50 -0800
Subject: rcu: Make arch select smp_mb__after_unlock_lock() strength

The definition of smp_mb__after_unlock_lock() is currently smp_mb()
for CONFIG_PPC and a no-op otherwise.  It would be better to instead
provide an architecture-selectable Kconfig option, and select the
strength of smp_mb__after_unlock_lock() based on that option.  This
commit therefore creates ARCH_WEAK_RELEASE_ACQUIRE, has PPC select it,
and bases the definition of smp_mb__after_unlock_lock() on this new
ARCH_WEAK_RELEASE_ACQUIRE Kconfig option.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Boqun Feng <boqun.feng@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <linuxppc-dev@lists.ozlabs.org>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 arch/Kconfig             | 3 +++
 arch/powerpc/Kconfig     | 1 +
 include/linux/rcupdate.h | 6 +++---
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index cd211a14a88f..adefaf344239 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -320,6 +320,9 @@ config HAVE_CMPXCHG_LOCAL
 config HAVE_CMPXCHG_DOUBLE
 	bool
 
+config ARCH_WEAK_RELEASE_ACQUIRE
+	bool
+
 config ARCH_WANT_IPC_PARSE_VERSION
 	bool
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 97a8bc8a095c..7a5c9b764cd2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -99,6 +99,7 @@ config PPC
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_WEAK_RELEASE_ACQUIRE
 	select BINFMT_ELF
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index de88b33c0974..e6146d0074f8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1127,11 +1127,11 @@ do { \
  * if the UNLOCK and LOCK are executed by the same CPU or if the
  * UNLOCK and LOCK operate on the same lock variable.
  */
-#ifdef CONFIG_PPC
+#ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE
 #define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
-#else /* #ifdef CONFIG_PPC */
+#else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
 #define smp_mb__after_unlock_lock()	do { } while (0)
-#endif /* #else #ifdef CONFIG_PPC */
+#endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
 
 
 #endif /* __LINUX_RCUPDATE_H */
-- 
cgit v1.2.3


From cc985822a0b6dbe68ad6cfa641b19c1d303ab455 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 19 Jan 2017 13:33:17 -0800
Subject: srcu: Consolidate batch checking into rcu_all_batches_empty()

The srcu_reschedule() function invokes rcu_batch_empty() on each of
the four rcu_batch structures in the srcu_struct in question twice.
Given that this check will also be needed in cleanup_srcu_struct(), this
commit consolidates these four checks into a new rcu_all_batches_empty()
function.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcu/srcu.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index ef3bcfb15b39..ba41a5d04b49 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -64,6 +64,17 @@ static inline bool rcu_batch_empty(struct rcu_batch *b)
 	return b->tail == &b->head;
 }
 
+/*
+ * Are all batches empty for the specified srcu_struct?
+ */
+static inline bool rcu_all_batches_empty(struct srcu_struct *sp)
+{
+	return rcu_batch_empty(&sp->batch_done) &&
+	       rcu_batch_empty(&sp->batch_check1) &&
+	       rcu_batch_empty(&sp->batch_check0) &&
+	       rcu_batch_empty(&sp->batch_queue);
+}
+
 /*
  * Remove the callback at the head of the specified rcu_batch structure
  * and return a pointer to it, or return NULL if the structure is empty.
@@ -619,15 +630,9 @@ static void srcu_reschedule(struct srcu_struct *sp)
 {
 	bool pending = true;
 
-	if (rcu_batch_empty(&sp->batch_done) &&
-	    rcu_batch_empty(&sp->batch_check1) &&
-	    rcu_batch_empty(&sp->batch_check0) &&
-	    rcu_batch_empty(&sp->batch_queue)) {
+	if (rcu_all_batches_empty(sp)) {
 		spin_lock_irq(&sp->queue_lock);
-		if (rcu_batch_empty(&sp->batch_done) &&
-		    rcu_batch_empty(&sp->batch_check1) &&
-		    rcu_batch_empty(&sp->batch_check0) &&
-		    rcu_batch_empty(&sp->batch_queue)) {
+		if (rcu_all_batches_empty(sp)) {
 			sp->running = false;
 			pending = false;
 		}
-- 
cgit v1.2.3


From 15c68f7f6341da1693a60440338dd4aa8836da48 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 19 Jan 2017 13:40:09 -0800
Subject: srcu: Check for tardy grace-period activity in cleanup_srcu_struct()

Users of SRCU are obliged to complete all grace-period activity before
invoking cleanup_srcu_struct().  This means that all calls to either
synchronize_srcu() or synchronize_srcu_expedited() must have returned,
and all calls to call_srcu() must have returned, and the last call to
call_srcu() must have been followed by a call to srcu_barrier().
Furthermore, the caller must have done something to prevent any
further calls to synchronize_srcu(), synchronize_srcu_expedited(),
and call_srcu().

Therefore, if there has ever been an invocation of call_srcu() on
the srcu_struct in question, the sequence of events must be as
follows:

1.  Prevent any further calls to call_srcu().
2.  Wait for any pre-existing call_srcu() invocations to return.
3.  Invoke srcu_barrier().
4.  It is now safe to invoke cleanup_srcu_struct().

On the other hand, if there has ever been a call to synchronize_srcu()
or synchronize_srcu_expedited(), the sequence of events must be as
follows:

1.  Prevent any further calls to synchronize_srcu() or
    synchronize_srcu_expedited().
2.  Wait for any pre-existing synchronize_srcu() or
    synchronize_srcu_expedited() invocations to return.
3.  It is now safe to invoke cleanup_srcu_struct().

If there have been calls to all both types of functions (call_srcu()
and either of synchronize_srcu() and synchronize_srcu_expedited()), then
the caller must do the first three steps of the call_srcu() procedure
above and the first two steps of the synchronize_s*() procedure above,
and only then invoke cleanup_srcu_struct().

Note that cleanup_srcu_struct() does some probabilistic checks
for the caller failing to follow these procedures, in which case
cleanup_srcu_struct() does WARN_ON() and avoids freeing the per-CPU
structures associated with the specified srcu_struct structure.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcu/srcu.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index ba41a5d04b49..e6da9fc1f0e9 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -254,13 +254,24 @@ static bool srcu_readers_active(struct srcu_struct *sp)
  * cleanup_srcu_struct - deconstruct a sleep-RCU structure
  * @sp: structure to clean up.
  *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory.
+ * Must invoke this only after you are finished using a given srcu_struct
+ * that was initialized via init_srcu_struct().  This code does some
+ * probabalistic checking, spotting late uses of srcu_read_lock(),
+ * synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu().
+ * If any such late uses are detected, the per-CPU memory associated with
+ * the srcu_struct is simply leaked and WARN_ON() is invoked.  If the
+ * caller frees the srcu_struct itself, a use-after-free crash will likely
+ * ensue, but at least there will be a warning printed.
  */
 void cleanup_srcu_struct(struct srcu_struct *sp)
 {
 	if (WARN_ON(srcu_readers_active(sp)))
 		return; /* Leakage unless caller handles error. */
+	if (WARN_ON(!rcu_all_batches_empty(sp)))
+		return; /* Leakage unless caller handles error. */
+	flush_delayed_work(&sp->work);
+	if (WARN_ON(sp->running))
+		return; /* Caller forgot to stop doing call_srcu()? */
 	free_percpu(sp->per_cpu_ref);
 	sp->per_cpu_ref = NULL;
 }
-- 
cgit v1.2.3


From dffd06a7566b379be13a831b27a55bf5b0a7dea5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 23 Jan 2017 11:55:43 -0800
Subject: rcu: Semicolon inside RCU_TRACE() for rcu.h

The current use of "RCU_TRACE(statement);" can cause odd bugs, especially
where "statement" is a local-variable declaration, as it can leave a
misplaced ";" in the source code.  This commit therefore converts these
to "RCU_TRACE(statement;)", which avoids the misplaced ";".

Reported-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 0d6ff3e471be..8700a81daf56 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -109,12 +109,12 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
 
 	rcu_lock_acquire(&rcu_callback_map);
 	if (__is_kfree_rcu_offset(offset)) {
-		RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
+		RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset);)
 		kfree((void *)head - offset);
 		rcu_lock_release(&rcu_callback_map);
 		return true;
 	} else {
-		RCU_TRACE(trace_rcu_invoke_callback(rn, head));
+		RCU_TRACE(trace_rcu_invoke_callback(rn, head);)
 		head->func(head);
 		rcu_lock_release(&rcu_callback_map);
 		return false;
-- 
cgit v1.2.3


From 6c8c148542cf947cd95afb2c385310ce47111427 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 23 Jan 2017 12:02:09 -0800
Subject: rcu: Semicolon inside RCU_TRACE() for Tiny RCU

The current use of "RCU_TRACE(statement);" can cause odd bugs, especially
where "statement" is a local-variable declaration, as it can leave a
misplaced ";" in the source code.  This commit therefore converts these
to "RCU_TRACE(statement;)", which avoids the misplaced ";".

Reported-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tiny.c        | 20 ++++++++++----------
 kernel/rcu/tiny_plugin.h |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 6ad330dbbae2..e5385731e391 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -79,7 +79,7 @@ EXPORT_SYMBOL(__rcu_is_watching);
  */
 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 {
-	RCU_TRACE(reset_cpu_stall_ticks(rcp));
+	RCU_TRACE(reset_cpu_stall_ticks(rcp);)
 	if (rcp->donetail != rcp->curtail) {
 		rcp->donetail = rcp->curtail;
 		return 1;
@@ -125,7 +125,7 @@ void rcu_bh_qs(void)
  */
 void rcu_check_callbacks(int user)
 {
-	RCU_TRACE(check_cpu_stalls());
+	RCU_TRACE(check_cpu_stalls();)
 	if (user)
 		rcu_sched_qs();
 	else if (!in_softirq())
@@ -143,7 +143,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 	const char *rn = NULL;
 	struct rcu_head *next, *list;
 	unsigned long flags;
-	RCU_TRACE(int cb_count = 0);
+	RCU_TRACE(int cb_count = 0;)
 
 	/* Move the ready-to-invoke callbacks to a local list. */
 	local_irq_save(flags);
@@ -152,7 +152,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 		local_irq_restore(flags);
 		return;
 	}
-	RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
+	RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1);)
 	list = rcp->rcucblist;
 	rcp->rcucblist = *rcp->donetail;
 	*rcp->donetail = NULL;
@@ -162,7 +162,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 	local_irq_restore(flags);
 
 	/* Invoke the callbacks on the local list. */
-	RCU_TRACE(rn = rcp->name);
+	RCU_TRACE(rn = rcp->name;)
 	while (list) {
 		next = list->next;
 		prefetch(next);
@@ -171,9 +171,9 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 		__rcu_reclaim(rn, list);
 		local_bh_enable();
 		list = next;
-		RCU_TRACE(cb_count++);
+		RCU_TRACE(cb_count++;)
 	}
-	RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
+	RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count);)
 	RCU_TRACE(trace_rcu_batch_end(rcp->name,
 				      cb_count, 0, need_resched(),
 				      is_idle_task(current),
@@ -221,7 +221,7 @@ static void __call_rcu(struct rcu_head *head,
 	local_irq_save(flags);
 	*rcp->curtail = head;
 	rcp->curtail = &head->next;
-	RCU_TRACE(rcp->qlen++);
+	RCU_TRACE(rcp->qlen++;)
 	local_irq_restore(flags);
 
 	if (unlikely(is_idle_task(current))) {
@@ -254,8 +254,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
 void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
-	RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk));
-	RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk));
+	RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk);)
+	RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk);)
 
 	rcu_early_boot_tests();
 }
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index c64b827ecbca..df3a60e19f07 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -162,8 +162,8 @@ static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
 
 static void check_cpu_stalls(void)
 {
-	RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
-	RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
+	RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk);)
+	RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk);)
 }
 
 #endif /* #ifdef CONFIG_RCU_TRACE */
-- 
cgit v1.2.3


From 88a4976d0e37c0797ff3e6579a5f91cb7dced90d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 23 Jan 2017 12:04:46 -0800
Subject: rcu: Semicolon inside RCU_TRACE() for tree.c

The current use of "RCU_TRACE(statement);" can cause odd bugs, especially
where "statement" is a local-variable declaration, as it can leave a
misplaced ";" in the source code.  This commit therefore converts these
to "RCU_TRACE(statement;)", which avoids the misplaced ";".

Reported-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0efad311ded4..3747277aae67 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2793,14 +2793,14 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
  */
 static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
 {
-	RCU_TRACE(unsigned long mask);
-	RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
-	RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
+	RCU_TRACE(unsigned long mask;)
+	RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda);)
+	RCU_TRACE(struct rcu_node *rnp = rdp->mynode;)
 
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
 		return;
 
-	RCU_TRACE(mask = rdp->grpmask);
+	RCU_TRACE(mask = rdp->grpmask;)
 	trace_rcu_grace_period(rsp->name,
 			       rnp->gpnum + 1 - !!(rnp->qsmask & mask),
 			       TPS("cpuofl"));
-- 
cgit v1.2.3


From abb06b99484a9f5af05c7147c289faf835f68e8e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 26 Jan 2017 13:45:38 -0800
Subject: rcu: Pull rcu_sched_qs_mask into rcu_dynticks structure

The rcu_sched_qs_mask variable is yet another isolated per-CPU variable,
so this commit pulls it into the pre-existing rcu_dynticks per-CPU
structure.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 .../RCU/Design/Data-Structures/Data-Structures.html          |  9 ++++++++-
 kernel/rcu/tree.c                                            | 12 +++++-------
 kernel/rcu/tree.h                                            |  1 +
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index d583c653a703..bf7f266e8888 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -1104,6 +1104,7 @@ Its fields are as follows:
   1   int dynticks_nesting;
   2   int dynticks_nmi_nesting;
   3   atomic_t dynticks;
+  4   int rcu_sched_qs_mask;
 </pre>
 
 <p>The <tt>-&gt;dynticks_nesting</tt> field counts the
@@ -1117,11 +1118,17 @@ NMIs are counted by the <tt>-&gt;dynticks_nmi_nesting</tt>
 field, except that NMIs that interrupt non-dyntick-idle execution
 are not counted.
 
-</p><p>Finally, the <tt>-&gt;dynticks</tt> field counts the corresponding
+</p><p>The <tt>-&gt;dynticks</tt> field counts the corresponding
 CPU's transitions to and from dyntick-idle mode, so that this counter
 has an even value when the CPU is in dyntick-idle mode and an odd
 value otherwise.
 
+</p><p>Finally, the  <tt>-&gt;rcu_sched_qs_mask</tt> field is used
+to record the fact that the RCU core code would really like to
+see a quiescent state from the corresponding CPU.
+This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
+code, which provide a momentary idle sojourn in response.
+
 <table>
 <tr><th>&nbsp;</th></tr>
 <tr><th align="left">Quick Quiz:</th></tr>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3747277aae67..3a0703035874 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -272,8 +272,6 @@ void rcu_bh_qs(void)
 	}
 }
 
-static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
-
 /*
  * Steal a bit from the bottom of ->dynticks for idle entry/exit
  * control.  Initially this is for TLB flushing.
@@ -464,8 +462,8 @@ static void rcu_momentary_dyntick_idle(void)
 	 * Yes, we can lose flag-setting operations.  This is OK, because
 	 * the flag will be set again after some delay.
 	 */
-	resched_mask = raw_cpu_read(rcu_sched_qs_mask);
-	raw_cpu_write(rcu_sched_qs_mask, 0);
+	resched_mask = raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask);
+	raw_cpu_write(rcu_dynticks.rcu_sched_qs_mask, 0);
 
 	/* Find the flavor that needs a quiescent state. */
 	for_each_rcu_flavor(rsp) {
@@ -499,7 +497,7 @@ void rcu_note_context_switch(void)
 	trace_rcu_utilization(TPS("Start context switch"));
 	rcu_sched_qs();
 	rcu_preempt_note_context_switch();
-	if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask)))
 		rcu_momentary_dyntick_idle();
 	trace_rcu_utilization(TPS("End context switch"));
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
@@ -524,7 +522,7 @@ void rcu_all_qs(void)
 	unsigned long flags;
 
 	barrier(); /* Avoid RCU read-side critical sections leaking down. */
-	if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) {
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask))) {
 		local_irq_save(flags);
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
@@ -1351,7 +1349,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * is set too high, we override with half of the RCU CPU stall
 	 * warning delay.
 	 */
-	rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu);
+	rcrmp = &per_cpu(rcu_dynticks.rcu_sched_qs_mask, rdp->cpu);
 	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
 	    time_after(jiffies, rdp->rsp->jiffies_resched)) {
 		if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 7468b4de7e0c..e298281984dc 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -113,6 +113,7 @@ struct rcu_dynticks {
 				    /* Process level is worth LLONG_MAX/2. */
 	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
+	int rcu_sched_qs_mask;      /* GP old, need quiescent state. */
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 	long long dynticks_idle_nesting;
 				    /* irq/process nesting level from idle. */
-- 
cgit v1.2.3


From 9577df9a3122af08fff84b8a1a60dccf524a3891 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 26 Jan 2017 16:18:07 -0800
Subject: rcu: Pull rcu_qs_ctr into rcu_dynticks structure

The rcu_qs_ctr variable is yet another isolated per-CPU variable,
so this commit pulls it into the pre-existing rcu_dynticks per-CPU
structure.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 .../RCU/Design/Data-Structures/Data-Structures.html       | 12 ++++++++++--
 kernel/rcu/tree.c                                         | 15 ++++++---------
 kernel/rcu/tree.h                                         |  3 ++-
 kernel/rcu/tree_trace.c                                   |  4 +---
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index bf7f266e8888..3d0311657533 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -1105,6 +1105,7 @@ Its fields are as follows:
   2   int dynticks_nmi_nesting;
   3   atomic_t dynticks;
   4   int rcu_sched_qs_mask;
+  5   unsigned long rcu_qs_ctr;
 </pre>
 
 <p>The <tt>-&gt;dynticks_nesting</tt> field counts the
@@ -1123,12 +1124,19 @@ CPU's transitions to and from dyntick-idle mode, so that this counter
 has an even value when the CPU is in dyntick-idle mode and an odd
 value otherwise.
 
-</p><p>Finally, the  <tt>-&gt;rcu_sched_qs_mask</tt> field is used
+</p><p>The <tt>-&gt;rcu_sched_qs_mask</tt> field is used
 to record the fact that the RCU core code would really like to
-see a quiescent state from the corresponding CPU.
+see a quiescent state from the corresponding CPU, so much so that
+it is willing to call for heavy-weight dyntick-counter operations.
 This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
 code, which provide a momentary idle sojourn in response.
 
+</p><p>Finally the <tt>-&gt;rcu_qs_ctr</tt> field is used to record
+quiescent states from <tt>cond_resched()</tt>.
+Because <tt>cond_resched()</tt> can execute quite frequently, this
+must be quite lightweight, as in a non-atomic increment of this
+per-CPU field.
+
 <table>
 <tr><th>&nbsp;</th></tr>
 <tr><th align="left">Quick Quiz:</th></tr>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3a0703035874..82a86a67c92a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -436,9 +436,6 @@ bool rcu_eqs_special_set(int cpu)
 	return true;
 }
 
-DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
-EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
-
 /*
  * Let the RCU core know that this CPU has gone through the scheduler,
  * which is a quiescent state.  This is called when the need for a
@@ -542,7 +539,7 @@ void rcu_all_qs(void)
 		rcu_sched_qs();
 		preempt_enable();
 	}
-	this_cpu_inc(rcu_qs_ctr);
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
 EXPORT_SYMBOL_GPL(rcu_all_qs);
@@ -1315,7 +1312,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 */
 	rnp = rdp->mynode;
 	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
-	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_qs_ctr, rdp->cpu) &&
+	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
 	    READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
 		return 1;
@@ -2024,7 +2021,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
 		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
 		need_gp = !!(rnp->qsmask & rdp->grpmask);
 		rdp->cpu_no_qs.b.norm = need_gp;
-		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
+		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
 		rdp->core_needs_qs = need_gp;
 		zero_cpu_stall_ticks(rdp);
 		WRITE_ONCE(rdp->gpwrap, false);
@@ -2622,7 +2619,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
 		 * within the current grace period.
 		 */
 		rdp->cpu_no_qs.b.norm = true;	/* need qs for new gp. */
-		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
+		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
@@ -3620,7 +3617,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 	/* Is the RCU core waiting for a quiescent state from this CPU? */
 	if (rcu_scheduler_fully_active &&
 	    rdp->core_needs_qs && rdp->cpu_no_qs.b.norm &&
-	    rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
+	    rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) {
 		rdp->n_rp_core_needs_qs++;
 	} else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) {
 		rdp->n_rp_report_qs++;
@@ -3933,7 +3930,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
 	rdp->completed = rnp->completed;
 	rdp->cpu_no_qs.b.norm = true;
-	rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu);
+	rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
 	rdp->core_needs_qs = false;
 	trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e298281984dc..76e4467bc765 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -113,7 +113,8 @@ struct rcu_dynticks {
 				    /* Process level is worth LLONG_MAX/2. */
 	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
-	int rcu_sched_qs_mask;      /* GP old, need quiescent state. */
+	int rcu_sched_qs_mask;      /* GP old, need heavy quiescent state. */
+	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 	long long dynticks_idle_nesting;
 				    /* irq/process nesting level from idle. */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 8751a748499a..65b43be38e68 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -45,8 +45,6 @@
 #define RCU_TREE_NONCORE
 #include "tree.h"
 
-DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
-
 static int r_open(struct inode *inode, struct file *file,
 					const struct seq_operations *op)
 {
@@ -121,7 +119,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
 		   ulong2long(rdp->completed), ulong2long(rdp->gpnum),
 		   rdp->cpu_no_qs.b.norm,
-		   rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu),
+		   rdp->rcu_qs_ctr_snap == per_cpu(rdp->dynticks->rcu_qs_ctr, rdp->cpu),
 		   rdp->core_needs_qs);
 	seq_printf(m, " dt=%d/%llx/%d df=%lu",
 		   rcu_dynticks_snap(rdp->dynticks),
-- 
cgit v1.2.3


From 0f9be8cabbc343218dd2807af7308656be113045 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 27 Jan 2017 13:17:02 -0800
Subject: rcu: Eliminate flavor scan in rcu_momentary_dyntick_idle()

The rcu_momentary_dyntick_idle() function scans the RCU flavors, checking
that one of them still needs a quiescent state before doing an expensive
atomic operation on the ->dynticks counter.  However, this check reduces
overhead only after a rare race condition, and increases complexity.  This
commit therefore removes the scan and the mechanism enabling the scan.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 .../Design/Data-Structures/Data-Structures.html    |  4 +-
 kernel/rcu/tree.c                                  | 62 +++++-----------------
 kernel/rcu/tree.h                                  |  3 +-
 3 files changed, 15 insertions(+), 54 deletions(-)

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index 3d0311657533..e4bf20a68fa3 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -1104,7 +1104,7 @@ Its fields are as follows:
   1   int dynticks_nesting;
   2   int dynticks_nmi_nesting;
   3   atomic_t dynticks;
-  4   int rcu_sched_qs_mask;
+  4   bool rcu_need_heavy_qs;
   5   unsigned long rcu_qs_ctr;
 </pre>
 
@@ -1124,7 +1124,7 @@ CPU's transitions to and from dyntick-idle mode, so that this counter
 has an even value when the CPU is in dyntick-idle mode and an odd
 value otherwise.
 
-</p><p>The <tt>-&gt;rcu_sched_qs_mask</tt> field is used
+</p><p>The <tt>-&gt;rcu_need_heavy_qs</tt> field is used
 to record the fact that the RCU core code would really like to
 see a quiescent state from the corresponding CPU, so much so that
 it is willing to call for heavy-weight dyntick-counter operations.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 82a86a67c92a..c2cbc78a0625 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -443,44 +443,14 @@ bool rcu_eqs_special_set(int cpu)
  * memory barriers to let the RCU core know about it, regardless of what
  * this CPU might (or might not) do in the near future.
  *
- * We inform the RCU core by emulating a zero-duration dyntick-idle
- * period, which we in turn do by incrementing the ->dynticks counter
- * by two.
+ * We inform the RCU core by emulating a zero-duration dyntick-idle period.
  *
  * The caller must have disabled interrupts.
  */
 static void rcu_momentary_dyntick_idle(void)
 {
-	struct rcu_data *rdp;
-	int resched_mask;
-	struct rcu_state *rsp;
-
-	/*
-	 * Yes, we can lose flag-setting operations.  This is OK, because
-	 * the flag will be set again after some delay.
-	 */
-	resched_mask = raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask);
-	raw_cpu_write(rcu_dynticks.rcu_sched_qs_mask, 0);
-
-	/* Find the flavor that needs a quiescent state. */
-	for_each_rcu_flavor(rsp) {
-		rdp = raw_cpu_ptr(rsp->rda);
-		if (!(resched_mask & rsp->flavor_mask))
-			continue;
-		smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */
-		if (READ_ONCE(rdp->mynode->completed) !=
-		    READ_ONCE(rdp->cond_resched_completed))
-			continue;
-
-		/*
-		 * Pretend to be momentarily idle for the quiescent state.
-		 * This allows the grace-period kthread to record the
-		 * quiescent state, with no need for this CPU to do anything
-		 * further.
-		 */
-		rcu_dynticks_momentary_idle();
-		break;
-	}
+	raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false);
+	rcu_dynticks_momentary_idle();
 }
 
 /*
@@ -494,7 +464,7 @@ void rcu_note_context_switch(void)
 	trace_rcu_utilization(TPS("Start context switch"));
 	rcu_sched_qs();
 	rcu_preempt_note_context_switch();
-	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask)))
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
 		rcu_momentary_dyntick_idle();
 	trace_rcu_utilization(TPS("End context switch"));
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
@@ -519,7 +489,7 @@ void rcu_all_qs(void)
 	unsigned long flags;
 
 	barrier(); /* Avoid RCU read-side critical sections leaking down. */
-	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask))) {
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {
 		local_irq_save(flags);
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
@@ -1275,7 +1245,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 				    bool *isidle, unsigned long *maxj)
 {
 	unsigned long jtsq;
-	int *rcrmp;
+	bool *rnhqp;
 	unsigned long rjtsc;
 	struct rcu_node *rnp;
 
@@ -1332,7 +1302,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * in-kernel CPU-bound tasks cannot advance grace periods.
 	 * So if the grace period is old enough, make the CPU pay attention.
 	 * Note that the unsynchronized assignments to the per-CPU
-	 * rcu_sched_qs_mask variable are safe.  Yes, setting of
+	 * rcu_need_heavy_qs variable are safe.  Yes, setting of
 	 * bits can be lost, but they will be set again on the next
 	 * force-quiescent-state pass.  So lost bit sets do not result
 	 * in incorrect behavior, merely in a grace period lasting
@@ -1346,16 +1316,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * is set too high, we override with half of the RCU CPU stall
 	 * warning delay.
 	 */
-	rcrmp = &per_cpu(rcu_dynticks.rcu_sched_qs_mask, rdp->cpu);
-	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
-	    time_after(jiffies, rdp->rsp->jiffies_resched)) {
-		if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
-			WRITE_ONCE(rdp->cond_resched_completed,
-				   READ_ONCE(rdp->mynode->completed));
-			smp_mb(); /* ->cond_resched_completed before *rcrmp. */
-			WRITE_ONCE(*rcrmp,
-				   READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask);
-		}
+	rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);
+	if (!READ_ONCE(*rnhqp) &&
+	    (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
+	     time_after(jiffies, rdp->rsp->jiffies_resched))) {
+		WRITE_ONCE(*rnhqp, true);
 		rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
 	}
 
@@ -4169,7 +4134,6 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 	static const char * const fqs[] = RCU_FQS_NAME_INIT;
 	static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 	static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
-	static u8 fl_mask = 0x1;
 
 	int levelcnt[RCU_NUM_LVLS];		/* # nodes in each level. */
 	int levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */
@@ -4191,8 +4155,6 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 	for (i = 1; i < rcu_num_lvls; i++)
 		rsp->level[i] = rsp->level[i - 1] + levelcnt[i - 1];
 	rcu_init_levelspread(levelspread, levelcnt);
-	rsp->flavor_mask = fl_mask;
-	fl_mask <<= 1;
 
 	/* Initialize the elements themselves, starting from the leaves. */
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 76e4467bc765..b212cd0f22c7 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -113,7 +113,7 @@ struct rcu_dynticks {
 				    /* Process level is worth LLONG_MAX/2. */
 	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
-	int rcu_sched_qs_mask;      /* GP old, need heavy quiescent state. */
+	bool rcu_need_heavy_qs;      /* GP old, need heavy quiescent state. */
 	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 	long long dynticks_idle_nesting;
@@ -484,7 +484,6 @@ struct rcu_state {
 	struct rcu_node *level[RCU_NUM_LVLS + 1];
 						/* Hierarchy levels (+1 to */
 						/*  shut bogus gcc warning) */
-	u8 flavor_mask;				/* bit in flavor mask. */
 	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
 	call_rcu_func_t call;			/* call_rcu() flavor. */
 	int ncpus;				/* # CPUs seen so far. */
-- 
cgit v1.2.3


From 9226b10d78ffe7895549045fe388dc5e73b87eac Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 27 Jan 2017 14:17:50 -0800
Subject: rcu: Place guard on rcu_all_qs() and rcu_note_context_switch()
 actions

The rcu_all_qs() and rcu_note_context_switch() do a series of checks,
taking various actions to supply RCU with quiescent states, depending
on the outcomes of the various checks.  This is a bit much for scheduling
fastpaths, so this commit creates a separate ->rcu_urgent_qs field in
the rcu_dynticks structure that acts as a global guard for these checks.
Thus, in the common case, rcu_all_qs() and rcu_note_context_switch()
check the ->rcu_urgent_qs field, find it false, and simply return.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 .../Design/Data-Structures/Data-Structures.html    | 11 ++++++-
 kernel/rcu/tree.c                                  | 38 ++++++++++++++--------
 kernel/rcu/tree.h                                  |  3 +-
 kernel/rcu/tree_exp.h                              |  2 ++
 kernel/rcu/tree_plugin.h                           |  8 +++--
 5 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index e4bf20a68fa3..4dec89097559 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -1106,6 +1106,7 @@ Its fields are as follows:
   3   atomic_t dynticks;
   4   bool rcu_need_heavy_qs;
   5   unsigned long rcu_qs_ctr;
+  6   bool rcu_urgent_qs;
 </pre>
 
 <p>The <tt>-&gt;dynticks_nesting</tt> field counts the
@@ -1131,12 +1132,20 @@ it is willing to call for heavy-weight dyntick-counter operations.
 This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
 code, which provide a momentary idle sojourn in response.
 
-</p><p>Finally the <tt>-&gt;rcu_qs_ctr</tt> field is used to record
+</p><p>The <tt>-&gt;rcu_qs_ctr</tt> field is used to record
 quiescent states from <tt>cond_resched()</tt>.
 Because <tt>cond_resched()</tt> can execute quite frequently, this
 must be quite lightweight, as in a non-atomic increment of this
 per-CPU field.
 
+</p><p>Finally, the <tt>-&gt;rcu_urgent_qs</tt> field is used to record
+the fact that the RCU core code would really like to see a quiescent
+state from the corresponding CPU, with the various other fields indicating
+just how badly RCU wants this quiescent state.
+This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
+code, which, if nothing else, non-atomically increment <tt>-&gt;rcu_qs_ctr</tt>
+in response.
+
 <table>
 <tr><th>&nbsp;</th></tr>
 <tr><th align="left">Quick Quiz:</th></tr>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c2cbc78a0625..530ab6cf7a0b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -464,8 +464,14 @@ void rcu_note_context_switch(void)
 	trace_rcu_utilization(TPS("Start context switch"));
 	rcu_sched_qs();
 	rcu_preempt_note_context_switch();
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
+		goto out;
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
 	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
 		rcu_momentary_dyntick_idle();
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+out:
 	trace_rcu_utilization(TPS("End context switch"));
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
@@ -488,29 +494,26 @@ void rcu_all_qs(void)
 {
 	unsigned long flags;
 
+	if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs))
+		return;
+	preempt_disable();
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {
+		preempt_enable();
+		return;
+	}
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
 	barrier(); /* Avoid RCU read-side critical sections leaking down. */
 	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {
 		local_irq_save(flags);
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
 	}
-	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) {
-		/*
-		 * Yes, we just checked a per-CPU variable with preemption
-		 * enabled, so we might be migrated to some other CPU at
-		 * this point.  That is OK because in that case, the
-		 * migration will supply the needed quiescent state.
-		 * We might end up needlessly disabling preemption and
-		 * invoking rcu_sched_qs() on the destination CPU, but
-		 * the probability and cost are both quite low, so this
-		 * should not be a problem in practice.
-		 */
-		preempt_disable();
+	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)))
 		rcu_sched_qs();
-		preempt_enable();
-	}
 	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(rcu_all_qs);
 
@@ -1246,6 +1249,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 {
 	unsigned long jtsq;
 	bool *rnhqp;
+	bool *ruqp;
 	unsigned long rjtsc;
 	struct rcu_node *rnp;
 
@@ -1281,11 +1285,15 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * might not be the case for nohz_full CPUs looping in the kernel.
 	 */
 	rnp = rdp->mynode;
+	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
 	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
 	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
 	    READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
 		return 1;
+	} else {
+		/* Load rcu_qs_ctr before store to rcu_urgent_qs. */
+		smp_store_release(ruqp, true);
 	}
 
 	/* Check for the CPU being offline. */
@@ -1321,6 +1329,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	    (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
 	     time_after(jiffies, rdp->rsp->jiffies_resched))) {
 		WRITE_ONCE(*rnhqp, true);
+		/* Store rcu_need_heavy_qs before rcu_urgent_qs. */
+		smp_store_release(ruqp, true);
 		rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
 	}
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b212cd0f22c7..d2f276fc2edc 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -113,8 +113,9 @@ struct rcu_dynticks {
 				    /* Process level is worth LLONG_MAX/2. */
 	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
-	bool rcu_need_heavy_qs;      /* GP old, need heavy quiescent state. */
+	bool rcu_need_heavy_qs;     /* GP old, need heavy quiescent state. */
 	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
+	bool rcu_urgent_qs;	    /* GP old need light quiescent state. */
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 	long long dynticks_idle_nesting;
 				    /* irq/process nesting level from idle. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index a7b639ccd46e..a1f52bbe9db6 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -331,6 +331,8 @@ static void sync_sched_exp_handler(void *data)
 		return;
 	}
 	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
+	/* Store .exp before .rcu_urgent_qs. */
+	smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 	resched_cpu(smp_processor_id());
 }
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0a62a8f1caac..621296a6694b 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1860,7 +1860,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeEmpty"));
 		} else {
-			rdp->nocb_defer_wakeup = RCU_NOGP_WAKE;
+			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE);
+			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
+			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeEmptyIsDeferred"));
 		}
@@ -1872,7 +1874,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeOvf"));
 		} else {
-			rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE;
+			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_FORCE);
+			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
+			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeOvfIsDeferred"));
 		}
-- 
cgit v1.2.3


From b8c78d3afc6aac1c722af3bec18959c6bd93231c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 3 Feb 2017 09:54:05 -0800
Subject: rcu: Default RCU_FANOUT_LEAF to 16 unless explicitly changed

If the RCU_EXPERT Kconfig option is not set (the default), then the
RCU_FANOUT_LEAF Kconfig option will not be defined, which will cause
the leaf-level rcu_node tree fanout to default to 32 on 32-bit systems
and 64 on 64-bit systems.  This can result in excessive lock contention.
This commit therefore changes the computation of the leaf-level rcu_node
tree fanout so that the result will be 16 unless an explicit Kconfig or
kernel-boot setting says otherwise.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index d2f276fc2edc..376c01e539c7 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -52,11 +52,7 @@
 #ifdef CONFIG_RCU_FANOUT_LEAF
 #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
 #else /* #ifdef CONFIG_RCU_FANOUT_LEAF */
-# ifdef CONFIG_64BIT
-# define RCU_FANOUT_LEAF 64
-# else
-# define RCU_FANOUT_LEAF 32
-# endif
+#define RCU_FANOUT_LEAF 16
 #endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */
 
 #define RCU_FANOUT_1	      (RCU_FANOUT_LEAF)
-- 
cgit v1.2.3


From 15fecf89e46a962ccda583d919e25d9da7bf0723 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 8 Feb 2017 12:36:42 -0800
Subject: srcu: Abstract multi-tail callback list handling

RCU has only one multi-tail callback list, which is implemented via
the nxtlist, nxttail, nxtcompleted, qlen_lazy, and qlen fields in the
rcu_data structure, and whose operations are open-code throughout the
Tree RCU implementation.  This has been more or less OK in the past,
but upcoming callback-list optimizations in SRCU could really use
a multi-tail callback list there as well.

This commit therefore abstracts the multi-tail callback list handling
into a new kernel/rcu/rcu_segcblist.h file, and uses this new API.
The simple head-and-tail pointer callback list is also abstracted and
applied everywhere except for the NOCB callback-offload lists.  (Yes,
the plan is to apply them there as well, but this commit is already
bigger than would be good.)

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu_segcblist.h | 625 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/tree.c          | 348 ++++++++-----------------
 kernel/rcu/tree.h          |  41 +--
 kernel/rcu/tree_plugin.h   |  54 ++--
 kernel/rcu/tree_trace.c    |  21 +-
 5 files changed, 780 insertions(+), 309 deletions(-)
 create mode 100644 kernel/rcu/rcu_segcblist.h

diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
new file mode 100644
index 000000000000..24078f3c0218
--- /dev/null
+++ b/kernel/rcu/rcu_segcblist.h
@@ -0,0 +1,625 @@
+/*
+ * RCU segmented callback lists
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifndef __KERNEL_RCU_SEGCBLIST_H
+#define __KERNEL_RCU_SEGCBLIST_H
+
+/* Simple unsegmented callback lists. */
+struct rcu_cblist {
+	struct rcu_head *head;
+	struct rcu_head **tail;
+	long len;
+	long len_lazy;
+};
+
+#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
+
+/* Initialize simple callback list. */
+static inline void rcu_cblist_init(struct rcu_cblist *rclp)
+{
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+	rclp->len = 0;
+	rclp->len_lazy = 0;
+}
+
+/* Is simple callback list empty? */
+static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
+{
+	return !rclp->head;
+}
+
+/* Return number of callbacks in simple callback list. */
+static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
+{
+	return rclp->len;
+}
+
+/* Return number of lazy callbacks in simple callback list. */
+static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
+{
+	return rclp->len_lazy;
+}
+
+/*
+ * Debug function to actually count the number of callbacks.
+ * If the number exceeds the limit specified, return -1.
+ */
+static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
+{
+	int cnt = 0;
+	struct rcu_head **rhpp = &rclp->head;
+
+	for (;;) {
+		if (!*rhpp)
+			return cnt;
+		if (++cnt > lim)
+			return -1;
+		rhpp = &(*rhpp)->next;
+	}
+}
+
+/*
+ * Dequeue the oldest rcu_head structure from the specified callback
+ * list.  This function assumes that the callback is non-lazy, but
+ * the caller can later invoke rcu_cblist_dequeued_lazy() if it
+ * finds otherwise (and if it cares about laziness).  This allows
+ * different users to have different ways of determining laziness.
+ */
+static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
+{
+	struct rcu_head *rhp;
+
+	rhp = rclp->head;
+	if (!rhp)
+		return NULL;
+	prefetch(rhp);
+	rclp->len--;
+	rclp->head = rhp->next;
+	if (!rclp->head)
+		rclp->tail = &rclp->head;
+	return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
+{
+	rclp->len_lazy--;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer.  Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
+{
+	return rclp->head;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer.  Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
+{
+	WARN_ON_ONCE(rcu_cblist_empty(rclp));
+	return rclp->tail;
+}
+
+/* Complicated segmented callback lists.  ;-) */
+
+/*
+ * Index values for segments in rcu_segcblist structure.
+ *
+ * The segments are as follows:
+ *
+ * [head, *tails[RCU_DONE_TAIL]):
+ *	Callbacks whose grace period has elapsed, and thus can be invoked.
+ * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]):
+ *	Callbacks waiting for the current GP from the current CPU's viewpoint.
+ * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]):
+ *	Callbacks that arrived before the next GP started, again from
+ *	the current CPU's viewpoint.  These can be handled by the next GP.
+ * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]):
+ *	Callbacks that might have arrived after the next GP started.
+ *	There is some uncertainty as to when a given GP starts and
+ *	ends, but a CPU knows the exact times if it is the one starting
+ *	or ending the GP.  Other CPUs know that the previous GP ends
+ *	before the next one starts.
+ *
+ * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also
+ * empty.
+ *
+ * The ->gp_seq[] array contains the grace-period number at which the
+ * corresponding segment of callbacks will be ready to invoke.  A given
+ * element of this array is meaningful only when the corresponding segment
+ * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks
+ * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have
+ * not yet been assigned a grace-period number).
+ */
+#define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
+#define RCU_WAIT_TAIL		1	/* Also RCU_NEXT_READY head. */
+#define RCU_NEXT_READY_TAIL	2	/* Also RCU_NEXT head. */
+#define RCU_NEXT_TAIL		3
+#define RCU_CBLIST_NSEGS	4
+
+struct rcu_segcblist {
+	struct rcu_head *head;
+	struct rcu_head **tails[RCU_CBLIST_NSEGS];
+	unsigned long gp_seq[RCU_CBLIST_NSEGS];
+	long len;
+	long len_lazy;
+};
+
+/*
+ * Initialize an rcu_segcblist structure.
+ */
+static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
+{
+	int i;
+
+	BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
+	BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
+	rsclp->head = NULL;
+	for (i = 0; i < RCU_CBLIST_NSEGS; i++)
+		rsclp->tails[i] = &rsclp->head;
+	rsclp->len = 0;
+	rsclp->len_lazy = 0;
+}
+
+/*
+ * Is the specified rcu_segcblist structure empty?
+ *
+ * But careful!  The fact that the ->head field is NULL does not
+ * necessarily imply that there are no callbacks associated with
+ * this structure.  When callbacks are being invoked, they are
+ * removed as a group.  If callback invocation must be preempted,
+ * the remaining callbacks will be added back to the list.  Either
+ * way, the counts are updated later.
+ *
+ * So it is often the case that rcu_segcblist_n_cbs() should be used
+ * instead.
+ */
+static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
+{
+	return !rsclp->head;
+}
+
+/* Return number of callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
+{
+	return READ_ONCE(rsclp->len);
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
+{
+	return rsclp->len_lazy;
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
+{
+	return rsclp->len - rsclp->len_lazy;
+}
+
+/*
+ * Is the specified rcu_segcblist enabled, for example, not corresponding
+ * to an offline or callback-offloaded CPU?
+ */
+static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
+{
+	return !!rsclp->tails[RCU_NEXT_TAIL];
+}
+
+/*
+ * Disable the specified rcu_segcblist structure, so that callbacks can
+ * no longer be posted to it.  This structure must be empty.
+ */
+static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
+{
+	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
+	rsclp->tails[RCU_NEXT_TAIL] = NULL;
+}
+
+/*
+ * Is the specified segment of the specified rcu_segcblist structure
+ * empty of callbacks?
+ */
+static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
+{
+	if (seg == RCU_DONE_TAIL)
+		return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
+	return rsclp->tails[seg - 1] == rsclp->tails[seg];
+}
+
+/*
+ * Are all segments following the specified segment of the specified
+ * rcu_segcblist structure empty of callbacks?  (The specified
+ * segment might well contain callbacks.)
+ */
+static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
+{
+	return !*rsclp->tails[seg];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are ready to be invoked?
+ */
+static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are still pending, that is, not yet ready to be invoked?
+ */
+static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
+}
+
+/*
+ * Return a pointer to the first callback in the specified rcu_segcblist
+ * structure.  This is useful for diagnostics.
+ */
+static inline struct rcu_head *
+rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
+{
+	if (rcu_segcblist_is_enabled(rsclp))
+		return rsclp->head;
+	return NULL;
+}
+
+/*
+ * Return a pointer to the first pending callback in the specified
+ * rcu_segcblist structure.  This is useful just after posting a given
+ * callback -- if that callback is the first pending callback, then
+ * you cannot rely on someone else having already started up the required
+ * grace period.
+ */
+static inline struct rcu_head *
+rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
+{
+	if (rcu_segcblist_is_enabled(rsclp))
+		return *rsclp->tails[RCU_DONE_TAIL];
+	return NULL;
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * have not yet been processed beyond having been posted, that is,
+ * does it contain callbacks in its last segment?
+ */
+static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
+}
+
+/*
+ * Enqueue the specified callback onto the specified rcu_segcblist
+ * structure, updating accounting as needed.  Note that the ->len
+ * field may be accessed locklessly, hence the WRITE_ONCE().
+ * The ->len field is used by rcu_barrier() and friends to determine
+ * if it must post a callback on this structure, and it is OK
+ * for rcu_barrier() to sometimes post callbacks needlessly, but
+ * absolutely not OK for it to ever miss posting a callback.
+ */
+static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
+					 struct rcu_head *rhp, bool lazy)
+{
+	WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
+	if (lazy)
+		rsclp->len_lazy++;
+	smp_mb(); /* Ensure counts are updated before callback is enqueued. */
+	rhp->next = NULL;
+	*rsclp->tails[RCU_NEXT_TAIL] = rhp;
+	rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
+}
+
+/*
+ * Extract only the counts from the specified rcu_segcblist structure,
+ * and place them in the specified rcu_cblist structure.  This function
+ * supports both callback orphaning and invocation, hence the separation
+ * of counts and callbacks.  (Callbacks ready for invocation must be
+ * orphaned and adopted separately from pending callbacks, but counts
+ * apply to all callbacks.  Locking must be used to make sure that
+ * both orphaned-callbacks lists are consistent.)
+ */
+static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
+					       struct rcu_cblist *rclp)
+{
+	rclp->len_lazy += rsclp->len_lazy;
+	rclp->len += rsclp->len;
+	rsclp->len_lazy = 0;
+	WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
+}
+
+/*
+ * Extract only those callbacks ready to be invoked from the specified
+ * rcu_segcblist structure and place them in the specified rcu_cblist
+ * structure.
+ */
+static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
+						  struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rcu_segcblist_ready_cbs(rsclp))
+		return; /* Nothing to do. */
+	*rclp->tail = rsclp->head;
+	rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
+	*rsclp->tails[RCU_DONE_TAIL] = NULL;
+	rclp->tail = rsclp->tails[RCU_DONE_TAIL];
+	for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
+			rsclp->tails[i] = &rsclp->head;
+}
+
+/*
+ * Extract only those callbacks still pending (not yet ready to be
+ * invoked) from the specified rcu_segcblist structure and place them in
+ * the specified rcu_cblist structure.  Note that this loses information
+ * about any callbacks that might have been partway done waiting for
+ * their grace period.  Too bad!  They will have to start over.
+ */
+static inline void
+rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
+			       struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rcu_segcblist_pend_cbs(rsclp))
+		return; /* Nothing to do. */
+	*rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
+	rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
+	*rsclp->tails[RCU_DONE_TAIL] = NULL;
+	for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
+		rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Move the entire contents of the specified rcu_segcblist structure,
+ * counts, callbacks, and all, to the specified rcu_cblist structure.
+ * @@@ Why do we need this???  Moving early-boot CBs to NOCB lists?
+ * @@@ Memory barrier needed?  (Not if only used at boot time...)
+ */
+static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
+					     struct rcu_cblist *rclp)
+{
+	rcu_segcblist_extract_done_cbs(rsclp, rclp);
+	rcu_segcblist_extract_pend_cbs(rsclp, rclp);
+	rcu_segcblist_extract_count(rsclp, rclp);
+}
+
+/*
+ * Insert counts from the specified rcu_cblist structure in the
+ * specified rcu_segcblist structure.
+ */
+static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
+					      struct rcu_cblist *rclp)
+{
+	rsclp->len_lazy += rclp->len_lazy;
+	/* ->len sampled locklessly. */
+	WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
+	rclp->len_lazy = 0;
+	rclp->len = 0;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the beginning of the
+ * done-callbacks segment of the specified rcu_segcblist.
+ */
+static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
+						 struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rclp->head)
+		return; /* No callbacks to move. */
+	*rclp->tail = rsclp->head;
+	rsclp->head = rclp->head;
+	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
+		if (&rsclp->head == rsclp->tails[i])
+			rsclp->tails[i] = rclp->tail;
+		else
+			break;
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the end of the
+ * new-callbacks segment of the specified rcu_segcblist.
+ */
+static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
+						 struct rcu_cblist *rclp)
+{
+	if (!rclp->head)
+		return; /* Nothing to do. */
+	*rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
+	rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+}
+
+/*
+ * Advance the callbacks in the specified rcu_segcblist structure based
+ * on the current value passed in for the grace-period counter.
+ */
+static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
+					 unsigned long seq)
+{
+	int i, j;
+
+	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
+
+	/*
+	 * Find all callbacks whose ->gp_seq numbers indicate that they
+	 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
+	 */
+	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
+		if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+			break;
+		rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
+	}
+
+	/* If no callbacks moved, nothing more need be done. */
+	if (i == RCU_WAIT_TAIL)
+		return;
+
+	/* Clean up tail pointers that might have been misordered above. */
+	for (j = RCU_WAIT_TAIL; j < i; j++)
+		rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
+
+	/*
+	 * Callbacks moved, so clean up the misordered ->tails[] pointers
+	 * that now point into the middle of the list of ready-to-invoke
+	 * callbacks.  The overall effect is to copy down the later pointers
+	 * into the gap that was created by the now-ready segments.
+	 */
+	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
+		if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
+			break;  /* No more callbacks. */
+		rsclp->tails[j] = rsclp->tails[i];
+		rsclp->gp_seq[j] = rsclp->gp_seq[i];
+	}
+}
+
+/*
+ * "Accelerate" callbacks based on more-accurate grace-period information.
+ * The reason for this is that RCU does not synchronize the beginnings and
+ * ends of grace periods, and that callbacks are posted locally.  This in
+ * turn means that the callbacks must be labelled conservatively early
+ * on, as getting exact information would degrade both performance and
+ * scalability.  When more accurate grace-period information becomes
+ * available, previously posted callbacks can be "accelerated", marking
+ * them to complete at the end of the earlier grace period.
+ *
+ * This function operates on an rcu_segcblist structure, and also the
+ * grace-period sequence number at which new callbacks would become
+ * ready to invoke.
+ */
+static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
+					    unsigned long seq)
+{
+	int i;
+
+	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
+
+	/*
+	 * Find the segment preceding the oldest segment of callbacks
+	 * whose ->gp_seq[] completion is at or after that passed in via
+	 * "seq", skipping any empty segments.  This oldest segment, along
+	 * with any later segments, can be merged in with any newly arrived
+	 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
+	 * as their ->gp_seq[] grace-period completion sequence number.
+	 */
+	for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] != rsclp->tails[i - 1] &&
+		    ULONG_CMP_LT(rsclp->gp_seq[i], seq))
+			break;
+
+	/*
+	 * If all the segments contain callbacks that correspond to
+	 * earlier grace-period sequence numbers than "seq", leave.
+	 * Assuming that the rcu_segcblist structure has enough
+	 * segments in its arrays, this can only happen if some of
+	 * the non-done segments contain callbacks that really are
+	 * ready to invoke.  This situation will get straightened
+	 * out by the next call to rcu_segcblist_advance().
+	 *
+	 * Also advance to the oldest segment of callbacks whose
+	 * ->gp_seq[] completion is at or after that passed in via "seq",
+	 * skipping any empty segments.
+	 */
+	if (++i >= RCU_NEXT_TAIL)
+		return false;
+
+	/*
+	 * Merge all later callbacks, including newly arrived callbacks,
+	 * into the segment located by the for-loop above.  Assign "seq"
+	 * as the ->gp_seq[] value in order to correctly handle the case
+	 * where there were no pending callbacks in the rcu_segcblist
+	 * structure other than in the RCU_NEXT_TAIL segment.
+	 */
+	for (; i < RCU_NEXT_TAIL; i++) {
+		rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
+		rsclp->gp_seq[i] = seq;
+	}
+	return true;
+}
+
+/*
+ * Scan the specified rcu_segcblist structure for callbacks that need
+ * a grace period later than the one specified by "seq".  We don't look
+ * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
+ * have a grace-period sequence number.
+ */
+static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
+						  unsigned long seq)
+{
+	int i;
+
+	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
+		if (rsclp->tails[i - 1] != rsclp->tails[i] &&
+		    ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+			return true;
+	return false;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer.  Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
+{
+	return rsclp->head;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer.  Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
+{
+	WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
+	return rsclp->tails[RCU_NEXT_TAIL];
+}
+
+#endif /* __KERNEL_RCU_SEGCBLIST_H */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 530ab6cf7a0b..8cc9d40b41ea 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -97,8 +97,8 @@ struct rcu_state sname##_state = { \
 	.gpnum = 0UL - 300UL, \
 	.completed = 0UL - 300UL, \
 	.orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
-	.orphan_nxttail = &sname##_state.orphan_nxtlist, \
-	.orphan_donetail = &sname##_state.orphan_donelist, \
+	.orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \
+	.orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \
 	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
 	.name = RCU_STATE_NAME(sname), \
 	.abbr = sabbr, \
@@ -725,16 +725,6 @@ void rcutorture_record_progress(unsigned long vernum)
 }
 EXPORT_SYMBOL_GPL(rcutorture_record_progress);
 
-/*
- * Does the CPU have callbacks ready to be invoked?
- */
-static int
-cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
-{
-	return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
-	       rdp->nxttail[RCU_NEXT_TAIL] != NULL;
-}
-
 /*
  * Return the root node of the specified rcu_state structure.
  */
@@ -765,21 +755,17 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
 static bool
 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 {
-	int i;
-
 	if (rcu_gp_in_progress(rsp))
 		return false;  /* No, a grace period is already in progress. */
 	if (rcu_future_needs_gp(rsp))
 		return true;  /* Yes, a no-CBs CPU needs one. */
-	if (!rdp->nxttail[RCU_NEXT_TAIL])
+	if (!rcu_segcblist_is_enabled(&rdp->cblist))
 		return false;  /* No, this is a no-CBs (or offline) CPU. */
-	if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
+	if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
 		return true;  /* Yes, CPU has newly registered callbacks. */
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
-		if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
-		    ULONG_CMP_LT(READ_ONCE(rsp->completed),
-				 rdp->nxtcompleted[i]))
-			return true;  /* Yes, CBs for future grace period. */
+	if (rcu_segcblist_future_gp_needed(&rdp->cblist,
+					   READ_ONCE(rsp->completed)))
+		return true;  /* Yes, CBs for future grace period. */
 	return false; /* No grace period needed. */
 }
 
@@ -1490,7 +1476,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
 
 	print_cpu_stall_info_end();
 	for_each_possible_cpu(cpu)
-		totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
+		totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
+							    cpu)->cblist);
 	pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
 	       smp_processor_id(), (long)(jiffies - rsp->gp_start),
 	       (long)rsp->gpnum, (long)rsp->completed, totqlen);
@@ -1544,7 +1531,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
 	print_cpu_stall_info(rsp, smp_processor_id());
 	print_cpu_stall_info_end();
 	for_each_possible_cpu(cpu)
-		totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
+		totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
+							    cpu)->cblist);
 	pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
 		jiffies - rsp->gp_start,
 		(long)rsp->gpnum, (long)rsp->completed, totqlen);
@@ -1646,30 +1634,6 @@ void rcu_cpu_stall_reset(void)
 		WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2);
 }
 
-/*
- * Initialize the specified rcu_data structure's default callback list
- * to empty.  The default callback list is the one that is not used by
- * no-callbacks CPUs.
- */
-static void init_default_callback_list(struct rcu_data *rdp)
-{
-	int i;
-
-	rdp->nxtlist = NULL;
-	for (i = 0; i < RCU_NEXT_SIZE; i++)
-		rdp->nxttail[i] = &rdp->nxtlist;
-}
-
-/*
- * Initialize the specified rcu_data structure's callback list to empty.
- */
-static void init_callback_list(struct rcu_data *rdp)
-{
-	if (init_nocb_callback_list(rdp))
-		return;
-	init_default_callback_list(rdp);
-}
-
 /*
  * Determine the value that ->completed will have at the end of the
  * next subsequent grace period.  This is used to tag callbacks so that
@@ -1724,7 +1688,6 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 		    unsigned long *c_out)
 {
 	unsigned long c;
-	int i;
 	bool ret = false;
 	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
 
@@ -1770,13 +1733,11 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 	/*
 	 * Get a new grace-period number.  If there really is no grace
 	 * period in progress, it will be smaller than the one we obtained
-	 * earlier.  Adjust callbacks as needed.  Note that even no-CBs
-	 * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
+	 * earlier.  Adjust callbacks as needed.
 	 */
 	c = rcu_cbs_completed(rdp->rsp, rnp_root);
-	for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
-		if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
-			rdp->nxtcompleted[i] = c;
+	if (!rcu_is_nocb_cpu(rdp->cpu))
+		(void)rcu_segcblist_accelerate(&rdp->cblist, c);
 
 	/*
 	 * If the needed for the required grace period is already
@@ -1856,57 +1817,27 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
 static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 			       struct rcu_data *rdp)
 {
-	unsigned long c;
-	int i;
-	bool ret;
-
-	/* If the CPU has no callbacks, nothing to do. */
-	if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
-		return false;
-
-	/*
-	 * Starting from the sublist containing the callbacks most
-	 * recently assigned a ->completed number and working down, find the
-	 * first sublist that is not assignable to an upcoming grace period.
-	 * Such a sublist has something in it (first two tests) and has
-	 * a ->completed number assigned that will complete sooner than
-	 * the ->completed number for newly arrived callbacks (last test).
-	 *
-	 * The key point is that any later sublist can be assigned the
-	 * same ->completed number as the newly arrived callbacks, which
-	 * means that the callbacks in any of these later sublist can be
-	 * grouped into a single sublist, whether or not they have already
-	 * been assigned a ->completed number.
-	 */
-	c = rcu_cbs_completed(rsp, rnp);
-	for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
-		if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
-		    !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
-			break;
+	bool ret = false;
 
-	/*
-	 * If there are no sublist for unassigned callbacks, leave.
-	 * At the same time, advance "i" one sublist, so that "i" will
-	 * index into the sublist where all the remaining callbacks should
-	 * be grouped into.
-	 */
-	if (++i >= RCU_NEXT_TAIL)
+	/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
+	if (!rcu_segcblist_pend_cbs(&rdp->cblist))
 		return false;
 
 	/*
-	 * Assign all subsequent callbacks' ->completed number to the next
-	 * full grace period and group them all in the sublist initially
-	 * indexed by "i".
+	 * Callbacks are often registered with incomplete grace-period
+	 * information.  Something about the fact that getting exact
+	 * information requires acquiring a global lock...  RCU therefore
+	 * makes a conservative estimate of the grace period number at which
+	 * a given callback will become ready to invoke.	The following
+	 * code checks this estimate and improves it when possible, thus
+	 * accelerating callback invocation to an earlier grace-period
+	 * number.
 	 */
-	for (; i <= RCU_NEXT_TAIL; i++) {
-		rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
-		rdp->nxtcompleted[i] = c;
-	}
-	/* Record any needed additional grace periods. */
-	ret = rcu_start_future_gp(rnp, rdp, NULL);
+	if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp)))
+		ret = rcu_start_future_gp(rnp, rdp, NULL);
 
 	/* Trace depending on how much we were able to accelerate. */
-	if (!*rdp->nxttail[RCU_WAIT_TAIL])
+	if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
 		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
 	else
 		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
@@ -1926,32 +1857,15 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 			    struct rcu_data *rdp)
 {
-	int i, j;
-
-	/* If the CPU has no callbacks, nothing to do. */
-	if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
+	/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
+	if (!rcu_segcblist_pend_cbs(&rdp->cblist))
 		return false;
 
 	/*
 	 * Find all callbacks whose ->completed numbers indicate that they
 	 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
 	 */
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
-		if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
-			break;
-		rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
-	}
-	/* Clean up any sublist tail pointers that were misordered above. */
-	for (j = RCU_WAIT_TAIL; j < i; j++)
-		rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
-
-	/* Copy down callbacks to fill in empty sublists. */
-	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
-		if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
-			break;
-		rdp->nxttail[j] = rdp->nxttail[i];
-		rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
-	}
+	rcu_segcblist_advance(&rdp->cblist, rnp->completed);
 
 	/* Classify any remaining callbacks. */
 	return rcu_accelerate_cbs(rsp, rnp, rdp);
@@ -2668,13 +2582,8 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
 	 * because _rcu_barrier() excludes CPU-hotplug operations, so it
 	 * cannot be running now.  Thus no memory barrier is required.
 	 */
-	if (rdp->nxtlist != NULL) {
-		rsp->qlen_lazy += rdp->qlen_lazy;
-		rsp->qlen += rdp->qlen;
-		rdp->n_cbs_orphaned += rdp->qlen;
-		rdp->qlen_lazy = 0;
-		WRITE_ONCE(rdp->qlen, 0);
-	}
+	rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist);
+	rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done);
 
 	/*
 	 * Next, move those callbacks still needing a grace period to
@@ -2682,31 +2591,18 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
 	 * Some of the callbacks might have gone partway through a grace
 	 * period, but that is too bad.  They get to start over because we
 	 * cannot assume that grace periods are synchronized across CPUs.
-	 * We don't bother updating the ->nxttail[] array yet, instead
-	 * we just reset the whole thing later on.
 	 */
-	if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) {
-		*rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
-		rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
-		*rdp->nxttail[RCU_DONE_TAIL] = NULL;
-	}
+	rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
 
 	/*
 	 * Then move the ready-to-invoke callbacks to the orphanage,
 	 * where some other CPU will pick them up.  These will not be
 	 * required to pass though another grace period: They are done.
 	 */
-	if (rdp->nxtlist != NULL) {
-		*rsp->orphan_donetail = rdp->nxtlist;
-		rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
-	}
+	rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done);
 
-	/*
-	 * Finally, initialize the rcu_data structure's list to empty and
-	 * disallow further callbacks on this CPU.
-	 */
-	init_callback_list(rdp);
-	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
+	/* Finally, disallow further callbacks on this CPU.  */
+	rcu_segcblist_disable(&rdp->cblist);
 }
 
 /*
@@ -2715,7 +2611,6 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
  */
 static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 {
-	int i;
 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
 
 	/* No-CBs CPUs are handled specially. */
@@ -2724,13 +2619,11 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 		return;
 
 	/* Do the accounting first. */
-	rdp->qlen_lazy += rsp->qlen_lazy;
-	rdp->qlen += rsp->qlen;
-	rdp->n_cbs_adopted += rsp->qlen;
-	if (rsp->qlen_lazy != rsp->qlen)
+	rdp->n_cbs_adopted += rcu_cblist_n_cbs(&rsp->orphan_done);
+	if (rcu_cblist_n_lazy_cbs(&rsp->orphan_done) !=
+	    rcu_cblist_n_cbs(&rsp->orphan_done))
 		rcu_idle_count_callbacks_posted();
-	rsp->qlen_lazy = 0;
-	rsp->qlen = 0;
+	rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
 
 	/*
 	 * We do not need a memory barrier here because the only way we
@@ -2738,24 +2631,13 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 	 * we are the task doing the rcu_barrier().
 	 */
 
-	/* First adopt the ready-to-invoke callbacks. */
-	if (rsp->orphan_donelist != NULL) {
-		*rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL];
-		*rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist;
-		for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--)
-			if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
-				rdp->nxttail[i] = rsp->orphan_donetail;
-		rsp->orphan_donelist = NULL;
-		rsp->orphan_donetail = &rsp->orphan_donelist;
-	}
-
-	/* And then adopt the callbacks that still need a grace period. */
-	if (rsp->orphan_nxtlist != NULL) {
-		*rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
-		rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
-		rsp->orphan_nxtlist = NULL;
-		rsp->orphan_nxttail = &rsp->orphan_nxtlist;
-	}
+	/* First adopt the ready-to-invoke callbacks, then the done ones. */
+	rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done);
+	WARN_ON_ONCE(!rcu_cblist_empty(&rsp->orphan_done));
+	rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
+	WARN_ON_ONCE(!rcu_cblist_empty(&rsp->orphan_pend));
+	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) !=
+		     !rcu_segcblist_n_cbs(&rdp->cblist));
 }
 
 /*
@@ -2843,9 +2725,11 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 	rcu_adopt_orphan_cbs(rsp, flags);
 	raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
 
-	WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
-		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
-		  cpu, rdp->qlen, rdp->nxtlist);
+	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
+		  !rcu_segcblist_empty(&rdp->cblist),
+		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
+		  cpu, rcu_segcblist_n_cbs(&rdp->cblist),
+		  rcu_segcblist_first_cb(&rdp->cblist));
 }
 
 /*
@@ -2855,14 +2739,17 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 {
 	unsigned long flags;
-	struct rcu_head *next, *list, **tail;
-	long bl, count, count_lazy;
-	int i;
+	struct rcu_head *rhp;
+	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
+	long bl, count;
 
 	/* If no callbacks are ready, just return. */
-	if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
-		trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
-		trace_rcu_batch_end(rsp->name, 0, !!READ_ONCE(rdp->nxtlist),
+	if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
+		trace_rcu_batch_start(rsp->name,
+				      rcu_segcblist_n_lazy_cbs(&rdp->cblist),
+				      rcu_segcblist_n_cbs(&rdp->cblist), 0);
+		trace_rcu_batch_end(rsp->name, 0,
+				    !rcu_segcblist_empty(&rdp->cblist),
 				    need_resched(), is_idle_task(current),
 				    rcu_is_callbacks_kthread());
 		return;
@@ -2870,73 +2757,62 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 
 	/*
 	 * Extract the list of ready callbacks, disabling to prevent
-	 * races with call_rcu() from interrupt handlers.
+	 * races with call_rcu() from interrupt handlers.  Leave the
+	 * callback counts, as rcu_barrier() needs to be conservative.
 	 */
 	local_irq_save(flags);
 	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
 	bl = rdp->blimit;
-	trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
-	list = rdp->nxtlist;
-	rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
-	*rdp->nxttail[RCU_DONE_TAIL] = NULL;
-	tail = rdp->nxttail[RCU_DONE_TAIL];
-	for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
-		if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
-			rdp->nxttail[i] = &rdp->nxtlist;
+	trace_rcu_batch_start(rsp->name, rcu_segcblist_n_lazy_cbs(&rdp->cblist),
+			      rcu_segcblist_n_cbs(&rdp->cblist), bl);
+	rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
 	local_irq_restore(flags);
 
 	/* Invoke callbacks. */
-	count = count_lazy = 0;
-	while (list) {
-		next = list->next;
-		prefetch(next);
-		debug_rcu_head_unqueue(list);
-		if (__rcu_reclaim(rsp->name, list))
-			count_lazy++;
-		list = next;
-		/* Stop only if limit reached and CPU has something to do. */
-		if (++count >= bl &&
+	rhp = rcu_cblist_dequeue(&rcl);
+	for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
+		debug_rcu_head_unqueue(rhp);
+		if (__rcu_reclaim(rsp->name, rhp))
+			rcu_cblist_dequeued_lazy(&rcl);
+		/*
+		 * Stop only if limit reached and CPU has something to do.
+		 * Note: The rcl structure counts down from zero.
+		 */
+		if (-rcu_cblist_n_cbs(&rcl) >= bl &&
 		    (need_resched() ||
 		     (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
 			break;
 	}
 
 	local_irq_save(flags);
-	trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
-			    is_idle_task(current),
+	count = -rcu_cblist_n_cbs(&rcl);
+	trace_rcu_batch_end(rsp->name, count, !rcu_cblist_empty(&rcl),
+			    need_resched(), is_idle_task(current),
 			    rcu_is_callbacks_kthread());
 
-	/* Update count, and requeue any remaining callbacks. */
-	if (list != NULL) {
-		*tail = rdp->nxtlist;
-		rdp->nxtlist = list;
-		for (i = 0; i < RCU_NEXT_SIZE; i++)
-			if (&rdp->nxtlist == rdp->nxttail[i])
-				rdp->nxttail[i] = tail;
-			else
-				break;
-	}
+	/* Update counts and requeue any remaining callbacks. */
+	rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
 	smp_mb(); /* List handling before counting for rcu_barrier(). */
-	rdp->qlen_lazy -= count_lazy;
-	WRITE_ONCE(rdp->qlen, rdp->qlen - count);
 	rdp->n_cbs_invoked += count;
+	rcu_segcblist_insert_count(&rdp->cblist, &rcl);
 
 	/* Reinstate batch limit if we have worked down the excess. */
-	if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
+	count = rcu_segcblist_n_cbs(&rdp->cblist);
+	if (rdp->blimit == LONG_MAX && count <= qlowmark)
 		rdp->blimit = blimit;
 
 	/* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
-	if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
+	if (count == 0 && rdp->qlen_last_fqs_check != 0) {
 		rdp->qlen_last_fqs_check = 0;
 		rdp->n_force_qs_snap = rsp->n_force_qs;
-	} else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
-		rdp->qlen_last_fqs_check = rdp->qlen;
-	WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
+	} else if (count < rdp->qlen_last_fqs_check - qhimark)
+		rdp->qlen_last_fqs_check = count;
+	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0));
 
 	local_irq_restore(flags);
 
 	/* Re-invoke RCU core processing if there are callbacks remaining. */
-	if (cpu_has_callbacks_ready_to_invoke(rdp))
+	if (rcu_segcblist_ready_cbs(&rdp->cblist))
 		invoke_rcu_core();
 }
 
@@ -3120,7 +2996,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 	}
 
 	/* If there are callbacks ready, invoke them. */
-	if (cpu_has_callbacks_ready_to_invoke(rdp))
+	if (rcu_segcblist_ready_cbs(&rdp->cblist))
 		invoke_rcu_callbacks(rsp, rdp);
 
 	/* Do any needed deferred wakeups of rcuo kthreads. */
@@ -3192,7 +3068,8 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
 	 * invoking force_quiescent_state() if the newly enqueued callback
 	 * is the only one waiting for a grace period to complete.
 	 */
-	if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
+	if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) >
+		     rdp->qlen_last_fqs_check + qhimark)) {
 
 		/* Are we ignoring a completed grace period? */
 		note_gp_changes(rsp, rdp);
@@ -3210,10 +3087,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
 			/* Give the grace period a kick. */
 			rdp->blimit = LONG_MAX;
 			if (rsp->n_force_qs == rdp->n_force_qs_snap &&
-			    *rdp->nxttail[RCU_DONE_TAIL] != head)
+			    rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
 				force_quiescent_state(rsp);
 			rdp->n_force_qs_snap = rsp->n_force_qs;
-			rdp->qlen_last_fqs_check = rdp->qlen;
+			rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
 		}
 	}
 }
@@ -3253,7 +3130,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 	rdp = this_cpu_ptr(rsp->rda);
 
 	/* Add the callback to our list. */
-	if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
+	if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist)) || cpu != -1) {
 		int offline;
 
 		if (cpu != -1)
@@ -3272,23 +3149,21 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 		 */
 		BUG_ON(cpu != -1);
 		WARN_ON_ONCE(!rcu_is_watching());
-		if (!likely(rdp->nxtlist))
-			init_default_callback_list(rdp);
+		if (rcu_segcblist_empty(&rdp->cblist))
+			rcu_segcblist_init(&rdp->cblist);
 	}
-	WRITE_ONCE(rdp->qlen, rdp->qlen + 1);
-	if (lazy)
-		rdp->qlen_lazy++;
-	else
+	rcu_segcblist_enqueue(&rdp->cblist, head, lazy);
+	if (!lazy)
 		rcu_idle_count_callbacks_posted();
-	smp_mb();  /* Count before adding callback for rcu_barrier(). */
-	*rdp->nxttail[RCU_NEXT_TAIL] = head;
-	rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
 
 	if (__is_kfree_rcu_offset((unsigned long)func))
 		trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
-					 rdp->qlen_lazy, rdp->qlen);
+					 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
+					 rcu_segcblist_n_cbs(&rdp->cblist));
 	else
-		trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
+		trace_rcu_callback(rsp->name, head,
+				   rcu_segcblist_n_lazy_cbs(&rdp->cblist),
+				   rcu_segcblist_n_cbs(&rdp->cblist));
 
 	/* Go handle any RCU core processing required. */
 	__call_rcu_core(rsp, rdp, head, flags);
@@ -3600,7 +3475,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 	}
 
 	/* Does this CPU have callbacks ready to invoke? */
-	if (cpu_has_callbacks_ready_to_invoke(rdp)) {
+	if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
 		rdp->n_rp_cb_ready++;
 		return 1;
 	}
@@ -3664,10 +3539,10 @@ static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy)
 
 	for_each_rcu_flavor(rsp) {
 		rdp = this_cpu_ptr(rsp->rda);
-		if (!rdp->nxtlist)
+		if (rcu_segcblist_empty(&rdp->cblist))
 			continue;
 		hc = true;
-		if (rdp->qlen != rdp->qlen_lazy || !all_lazy) {
+		if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist) || !all_lazy) {
 			al = false;
 			break;
 		}
@@ -3776,7 +3651,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 				__call_rcu(&rdp->barrier_head,
 					   rcu_barrier_callback, rsp, cpu, 0);
 			}
-		} else if (READ_ONCE(rdp->qlen)) {
+		} else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
 			_rcu_barrier_trace(rsp, "OnlineQ", cpu,
 					   rsp->barrier_sequence);
 			smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
@@ -3885,8 +3760,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->qlen_last_fqs_check = 0;
 	rdp->n_force_qs_snap = rsp->n_force_qs;
 	rdp->blimit = blimit;
-	if (!rdp->nxtlist)
-		init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
+	if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */
+	    !init_nocb_callback_list(rdp))
+		rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */
 	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
 	rcu_sysidle_init_percpu_data(rdp->dynticks);
 	rcu_dynticks_eqs_online();
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 376c01e539c7..93889ff21dbb 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -30,6 +30,7 @@
 #include <linux/seqlock.h>
 #include <linux/swait.h>
 #include <linux/stop_machine.h>
+#include "rcu_segcblist.h"
 
 /*
  * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
@@ -335,34 +336,9 @@ struct rcu_data {
 					/* period it is aware of. */
 
 	/* 2) batch handling */
-	/*
-	 * If nxtlist is not NULL, it is partitioned as follows.
-	 * Any of the partitions might be empty, in which case the
-	 * pointer to that partition will be equal to the pointer for
-	 * the following partition.  When the list is empty, all of
-	 * the nxttail elements point to the ->nxtlist pointer itself,
-	 * which in that case is NULL.
-	 *
-	 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
-	 *	Entries that batch # <= ->completed
-	 *	The grace period for these entries has completed, and
-	 *	the other grace-period-completed entries may be moved
-	 *	here temporarily in rcu_process_callbacks().
-	 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
-	 *	Entries that batch # <= ->completed - 1: waiting for current GP
-	 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
-	 *	Entries known to have arrived before current GP ended
-	 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
-	 *	Entries that might have arrived after current GP ended
-	 *	Note that the value of *nxttail[RCU_NEXT_TAIL] will
-	 *	always be NULL, as this is the end of the list.
-	 */
-	struct rcu_head *nxtlist;
-	struct rcu_head **nxttail[RCU_NEXT_SIZE];
-	unsigned long	nxtcompleted[RCU_NEXT_SIZE];
-					/* grace periods for sublists. */
-	long		qlen_lazy;	/* # of lazy queued callbacks */
-	long		qlen;		/* # of queued callbacks, incl lazy */
+	struct rcu_segcblist cblist;	/* Segmented callback list, with */
+					/* different callbacks waiting for */
+					/* different grace periods. */
 	long		qlen_last_fqs_check;
 					/* qlen at last check for QS forcing */
 	unsigned long	n_cbs_invoked;	/* count of RCU cbs invoked. */
@@ -500,14 +476,11 @@ struct rcu_state {
 
 	raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
 						/* Protect following fields. */
-	struct rcu_head *orphan_nxtlist;	/* Orphaned callbacks that */
+	struct rcu_cblist orphan_pend;		/* Orphaned callbacks that */
 						/*  need a grace period. */
-	struct rcu_head **orphan_nxttail;	/* Tail of above. */
-	struct rcu_head *orphan_donelist;	/* Orphaned callbacks that */
+	struct rcu_cblist orphan_done;		/* Orphaned callbacks that */
 						/*  are ready to invoke. */
-	struct rcu_head **orphan_donetail;	/* Tail of above. */
-	long qlen_lazy;				/* Number of lazy callbacks. */
-	long qlen;				/* Total number of callbacks. */
+						/* (Contains counts.) */
 	/* End of fields guarded by orphan_lock. */
 
 	struct mutex barrier_mutex;		/* Guards barrier fields. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 621296a6694b..f88356652dcf 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1350,10 +1350,10 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
 		 */
 		if ((rdp->completed != rnp->completed ||
 		     unlikely(READ_ONCE(rdp->gpwrap))) &&
-		    rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
+		    rcu_segcblist_pend_cbs(&rdp->cblist))
 			note_gp_changes(rsp, rdp);
 
-		if (cpu_has_callbacks_ready_to_invoke(rdp))
+		if (rcu_segcblist_ready_cbs(&rdp->cblist))
 			cbs_ready = true;
 	}
 	return cbs_ready;
@@ -1461,7 +1461,7 @@ static void rcu_prepare_for_idle(void)
 	rdtp->last_accelerate = jiffies;
 	for_each_rcu_flavor(rsp) {
 		rdp = this_cpu_ptr(rsp->rda);
-		if (!*rdp->nxttail[RCU_DONE_TAIL])
+		if (rcu_segcblist_pend_cbs(&rdp->cblist))
 			continue;
 		rnp = rdp->mynode;
 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
@@ -1529,7 +1529,7 @@ static void rcu_oom_notify_cpu(void *unused)
 
 	for_each_rcu_flavor(rsp) {
 		rdp = raw_cpu_ptr(rsp->rda);
-		if (rdp->qlen_lazy != 0) {
+		if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {
 			atomic_inc(&oom_callback_count);
 			rsp->call(&rdp->oom_head, rcu_oom_callback);
 		}
@@ -1934,30 +1934,26 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
 						     struct rcu_data *rdp,
 						     unsigned long flags)
 {
-	long ql = rsp->qlen;
-	long qll = rsp->qlen_lazy;
+	long ql = rcu_cblist_n_cbs(&rsp->orphan_done);
+	long qll = rcu_cblist_n_lazy_cbs(&rsp->orphan_done);
 
 	/* If this is not a no-CBs CPU, tell the caller to do it the old way. */
 	if (!rcu_is_nocb_cpu(smp_processor_id()))
 		return false;
-	rsp->qlen = 0;
-	rsp->qlen_lazy = 0;
 
 	/* First, enqueue the donelist, if any.  This preserves CB ordering. */
-	if (rsp->orphan_donelist != NULL) {
-		__call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
-					rsp->orphan_donetail, ql, qll, flags);
-		ql = qll = 0;
-		rsp->orphan_donelist = NULL;
-		rsp->orphan_donetail = &rsp->orphan_donelist;
+	if (!rcu_cblist_empty(&rsp->orphan_done)) {
+		__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_done),
+					rcu_cblist_tail(&rsp->orphan_done),
+					ql, qll, flags);
 	}
-	if (rsp->orphan_nxtlist != NULL) {
-		__call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
-					rsp->orphan_nxttail, ql, qll, flags);
-		ql = qll = 0;
-		rsp->orphan_nxtlist = NULL;
-		rsp->orphan_nxttail = &rsp->orphan_nxtlist;
+	if (!rcu_cblist_empty(&rsp->orphan_pend)) {
+		__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_pend),
+					rcu_cblist_tail(&rsp->orphan_pend),
+					ql, qll, flags);
 	}
+	rcu_cblist_init(&rsp->orphan_done);
+	rcu_cblist_init(&rsp->orphan_pend);
 	return true;
 }
 
@@ -2399,16 +2395,16 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
 		return false;
 
 	/* If there are early-boot callbacks, move them to nocb lists. */
-	if (rdp->nxtlist) {
-		rdp->nocb_head = rdp->nxtlist;
-		rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL];
-		atomic_long_set(&rdp->nocb_q_count, rdp->qlen);
-		atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy);
-		rdp->nxtlist = NULL;
-		rdp->qlen = 0;
-		rdp->qlen_lazy = 0;
+	if (!rcu_segcblist_empty(&rdp->cblist)) {
+		rdp->nocb_head = rcu_segcblist_head(&rdp->cblist);
+		rdp->nocb_tail = rcu_segcblist_tail(&rdp->cblist);
+		atomic_long_set(&rdp->nocb_q_count,
+				rcu_segcblist_n_cbs(&rdp->cblist));
+		atomic_long_set(&rdp->nocb_q_count_lazy,
+				rcu_segcblist_n_lazy_cbs(&rdp->cblist));
+		rcu_segcblist_init(&rdp->cblist);
 	}
-	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
+	rcu_segcblist_disable(&rdp->cblist);
 	return true;
 }
 
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 65b43be38e68..066c64071a7b 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -41,6 +41,7 @@
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/prefetch.h>
 
 #define RCU_TREE_NONCORE
 #include "tree.h"
@@ -128,17 +129,15 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 		   rdp->dynticks_fqs);
 	seq_printf(m, " of=%lu", rdp->offline_fqs);
 	rcu_nocb_q_lengths(rdp, &ql, &qll);
-	qll += rdp->qlen_lazy;
-	ql += rdp->qlen;
+	qll += rcu_segcblist_n_lazy_cbs(&rdp->cblist);
+	ql += rcu_segcblist_n_cbs(&rdp->cblist);
 	seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
 		   qll, ql,
-		   ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
-			rdp->nxttail[RCU_NEXT_TAIL]],
-		   ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
-			rdp->nxttail[RCU_NEXT_READY_TAIL]],
-		   ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
-			rdp->nxttail[RCU_WAIT_TAIL]],
-		   ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
+		   ".N"[!rcu_segcblist_segempty(&rdp->cblist, RCU_NEXT_TAIL)],
+		   ".R"[!rcu_segcblist_segempty(&rdp->cblist,
+						RCU_NEXT_READY_TAIL)],
+		   ".W"[!rcu_segcblist_segempty(&rdp->cblist, RCU_WAIT_TAIL)],
+		   ".D"[!rcu_segcblist_segempty(&rdp->cblist, RCU_DONE_TAIL)]);
 #ifdef CONFIG_RCU_BOOST
 	seq_printf(m, " kt=%d/%c ktl=%x",
 		   per_cpu(rcu_cpu_has_work, rdp->cpu),
@@ -276,7 +275,9 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 	seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
 		   rsp->n_force_qs, rsp->n_force_qs_ngp,
 		   rsp->n_force_qs - rsp->n_force_qs_ngp,
-		   READ_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen);
+		   READ_ONCE(rsp->n_force_qs_lh),
+		   rcu_cblist_n_lazy_cbs(&rsp->orphan_done),
+		   rcu_cblist_n_cbs(&rsp->orphan_done));
 	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
 		if (rnp->level != level) {
 			seq_puts(m, "\n");
-- 
cgit v1.2.3


From 900b1028ec388e50c98200641ae4274794c807cf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 10 Feb 2017 14:32:54 -0800
Subject: srcu: Allow SRCU to access rcu_scheduler_active

This is primarily a code-movement commit in preparation for allowing
SRCU to handle early-boot SRCU grace periods.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h  |  6 +++---
 kernel/rcu/tiny_plugin.h |  9 +++++----
 kernel/rcu/tree.c        |  2 +-
 kernel/rcu/tree_exp.h    | 12 -----------
 kernel/rcu/update.c      | 52 +++++++++++++++++++++++++++++++-----------------
 5 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 6c9d941e3962..5219be250f00 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -217,14 +217,14 @@ static inline void exit_rcu(void)
 {
 }
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
 extern int rcu_scheduler_active __read_mostly;
 void rcu_scheduler_starting(void);
-#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
 static inline void rcu_scheduler_starting(void)
 {
 }
-#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
 
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
 
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index df3a60e19f07..371034e77f87 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -52,7 +52,7 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = {
 	RCU_TRACE(.name = "rcu_bh")
 };
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
 #include <linux/kernel_stat.h>
 
 int rcu_scheduler_active __read_mostly;
@@ -65,15 +65,16 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);
  * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage.
  * The reason for this is that Tiny RCU does not need kthreads, so does
  * not have to care about the fact that the scheduler is half-initialized
- * at a certain phase of the boot process.
+ * at a certain phase of the boot process.  Unless SRCU is in the mix.
  */
 void __init rcu_scheduler_starting(void)
 {
 	WARN_ON(nr_context_switches() > 0);
-	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
+	rcu_scheduler_active = IS_ENABLED(CONFIG_SRCU)
+		? RCU_SCHEDULER_INIT : RCU_SCHEDULER_RUNNING;
 }
 
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
 
 #ifdef CONFIG_RCU_TRACE
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8cc9d40b41ea..2380d1e3dfb8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3974,7 +3974,7 @@ early_initcall(rcu_spawn_gp_kthread);
  * task is booting the system, and such primitives are no-ops).  After this
  * function is called, any synchronous grace-period primitives are run as
  * expedited, with the requesting task driving the grace period forward.
- * A later core_initcall() rcu_exp_runtime_mode() will switch to full
+ * A later core_initcall() rcu_set_runtime_mode() will switch to full
  * runtime RCU functionality.
  */
 void rcu_scheduler_starting(void)
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index a1f52bbe9db6..51ca287828a2 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -737,15 +737,3 @@ void synchronize_rcu_expedited(void)
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
-
-/*
- * Switch to run-time mode once Tree RCU has fully initialized.
- */
-static int __init rcu_exp_runtime_mode(void)
-{
-	rcu_test_sync_prims();
-	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
-	rcu_test_sync_prims();
-	return 0;
-}
-core_initcall(rcu_exp_runtime_mode);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 55c8530316c7..c5df0d756900 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held);
  * non-expedited counterparts?  Intended for use within RCU.  Note
  * that if the user specifies both rcu_expedited and rcu_normal, then
  * rcu_normal wins.  (Except during the time period during boot from
- * when the first task is spawned until the rcu_exp_runtime_mode()
+ * when the first task is spawned until the rcu_set_runtime_mode()
  * core_initcall() is invoked, at which point everything is expedited.)
  */
 bool rcu_gp_is_normal(void)
@@ -190,6 +190,39 @@ void rcu_end_inkernel_boot(void)
 
 #endif /* #ifndef CONFIG_TINY_RCU */
 
+/*
+ * Test each non-SRCU synchronous grace-period wait API.  This is
+ * useful just after a change in mode for these primitives, and
+ * during early boot.
+ */
+void rcu_test_sync_prims(void)
+{
+	if (!IS_ENABLED(CONFIG_PROVE_RCU))
+		return;
+	synchronize_rcu();
+	synchronize_rcu_bh();
+	synchronize_sched();
+	synchronize_rcu_expedited();
+	synchronize_rcu_bh_expedited();
+	synchronize_sched_expedited();
+}
+
+#if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU)
+
+/*
+ * Switch to run-time mode once RCU has fully initialized.
+ */
+static int __init rcu_set_runtime_mode(void)
+{
+	rcu_test_sync_prims();
+	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
+	rcu_test_sync_prims();
+	return 0;
+}
+core_initcall(rcu_set_runtime_mode);
+
+#endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */
+
 #ifdef CONFIG_PREEMPT_RCU
 
 /*
@@ -817,23 +850,6 @@ static void rcu_spawn_tasks_kthread(void)
 
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
-/*
- * Test each non-SRCU synchronous grace-period wait API.  This is
- * useful just after a change in mode for these primitives, and
- * during early boot.
- */
-void rcu_test_sync_prims(void)
-{
-	if (!IS_ENABLED(CONFIG_PROVE_RCU))
-		return;
-	synchronize_rcu();
-	synchronize_rcu_bh();
-	synchronize_sched();
-	synchronize_rcu_expedited();
-	synchronize_rcu_bh_expedited();
-	synchronize_sched_expedited();
-}
-
 #ifdef CONFIG_PROVE_RCU
 
 /*
-- 
cgit v1.2.3


From b5eaeaa509d30812049f9e1c05446d564e91616b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 10 Feb 2017 14:50:46 -0800
Subject: srcu: Allow early boot use of synchronize_srcu()

This commit checks for pre-scheduler state, and if that early in the
boot process, synchronize_srcu() and friends are no-ops.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/srcu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index e6da9fc1f0e9..7e7ecaa50dc5 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -417,6 +417,8 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 			 lock_is_held(&rcu_sched_lock_map),
 			 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
 
+	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
+		return;
 	might_sleep();
 	init_completion(&rcu.completion);
 
-- 
cgit v1.2.3


From bdcabf4c7db719129ca6cb94b02f50aa4726c952 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 15 Feb 2017 17:50:50 -0800
Subject: rcu: Add single-element dequeue functions to rcu_segcblist

This commit adds single-element dequeue functions to rcu_segcblist.
These are less efficient than using the extract and insert functions,
but allow more precise debugging code.  These functions are thus
expected to be used only in debug builds, for example, CONFIG_PROVE_RCU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu_segcblist.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 24078f3c0218..982e3e05b22a 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -289,6 +289,51 @@ static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
 	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
 }
 
+/*
+ * Dequeue and return the first ready-to-invoke callback.  If there
+ * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
+ * to avoid interference.  Does not protect from interference from other
+ * CPUs or tasks.
+ */
+static inline struct rcu_head *
+rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
+{
+	unsigned long flags;
+	int i;
+	struct rcu_head *rhp;
+
+	local_irq_save(flags);
+	if (!rcu_segcblist_ready_cbs(rsclp)) {
+		local_irq_restore(flags);
+		return NULL;
+	}
+	rhp = rsclp->head;
+	BUG_ON(!rhp);
+	rsclp->head = rhp->next;
+	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
+		if (rsclp->tails[i] != &rhp->next)
+			break;
+		rsclp->tails[i] = &rsclp->head;
+	}
+	smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
+	WRITE_ONCE(rsclp->len, rsclp->len - 1);
+	local_irq_restore(flags);
+	return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	rsclp->len_lazy--;
+	local_irq_restore(flags);
+}
+
 /*
  * Return a pointer to the first callback in the specified rcu_segcblist
  * structure.  This is useful for diagnostics.
-- 
cgit v1.2.3


From 2e8c28c2dd96c6f1f2d454a4e4b928385841e247 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 20 Feb 2017 14:57:17 -0800
Subject: srcu: Move rcu_seq_start() and friends to rcu.h

This commit moves rcu_seq_start(), rcu_seq_end(), rcu_seq_snap(),
and rcu_seq_done() from kernel/rcu/tree.c to kernel/rcu/rcu.h.
This will allow SRCU to use these functions, which in turn will
allow SRCU to move from a single global callback queue to a
per-CPU callback queue.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h  | 40 ++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/tree.c | 35 -----------------------------------
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 8700a81daf56..91e0bf31f6ce 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -56,6 +56,46 @@
 #define DYNTICK_TASK_EXIT_IDLE	   (DYNTICK_TASK_NEST_VALUE + \
 				    DYNTICK_TASK_FLAG)
 
+
+/*
+ * Grace-period counter management.
+ */
+
+/* Adjust sequence number for start of update-side operation. */
+static inline void rcu_seq_start(unsigned long *sp)
+{
+	WRITE_ONCE(*sp, *sp + 1);
+	smp_mb(); /* Ensure update-side operation after counter increment. */
+	WARN_ON_ONCE(!(*sp & 0x1));
+}
+
+/* Adjust sequence number for end of update-side operation. */
+static inline void rcu_seq_end(unsigned long *sp)
+{
+	smp_mb(); /* Ensure update-side operation before counter increment. */
+	WRITE_ONCE(*sp, *sp + 1);
+	WARN_ON_ONCE(*sp & 0x1);
+}
+
+/* Take a snapshot of the update side's sequence number. */
+static inline unsigned long rcu_seq_snap(unsigned long *sp)
+{
+	unsigned long s;
+
+	s = (READ_ONCE(*sp) + 3) & ~0x1;
+	smp_mb(); /* Above access must not bleed into critical section. */
+	return s;
+}
+
+/*
+ * Given a snapshot from rcu_seq_snap(), determine whether or not a
+ * full update-side operation has occurred.
+ */
+static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
+{
+	return ULONG_CMP_GE(READ_ONCE(*sp), s);
+}
+
 /*
  * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
  * by call_rcu() and rcu callback execution, and are therefore not part of the
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2380d1e3dfb8..844a030c1960 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3409,41 +3409,6 @@ void cond_synchronize_sched(unsigned long oldstate)
 }
 EXPORT_SYMBOL_GPL(cond_synchronize_sched);
 
-/* Adjust sequence number for start of update-side operation. */
-static void rcu_seq_start(unsigned long *sp)
-{
-	WRITE_ONCE(*sp, *sp + 1);
-	smp_mb(); /* Ensure update-side operation after counter increment. */
-	WARN_ON_ONCE(!(*sp & 0x1));
-}
-
-/* Adjust sequence number for end of update-side operation. */
-static void rcu_seq_end(unsigned long *sp)
-{
-	smp_mb(); /* Ensure update-side operation before counter increment. */
-	WRITE_ONCE(*sp, *sp + 1);
-	WARN_ON_ONCE(*sp & 0x1);
-}
-
-/* Take a snapshot of the update side's sequence number. */
-static unsigned long rcu_seq_snap(unsigned long *sp)
-{
-	unsigned long s;
-
-	s = (READ_ONCE(*sp) + 3) & ~0x1;
-	smp_mb(); /* Above access must not bleed into critical section. */
-	return s;
-}
-
-/*
- * Given a snapshot from rcu_seq_snap(), determine whether or not a
- * full update-side operation has occurred.
- */
-static bool rcu_seq_done(unsigned long *sp, unsigned long s)
-{
-	return ULONG_CMP_GE(READ_ONCE(*sp), s);
-}
-
 /*
  * Check to see if there is any immediate RCU-related work to be done
  * by the current CPU, for the specified type of RCU, returning 1 if so.
-- 
cgit v1.2.3


From 3c345825c899df0751b01143b159ddaefb91a220 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 4 Mar 2017 12:33:53 -0800
Subject: rcu: Expedited wakeups need to be fully ordered

Expedited grace periods use workqueue handlers that wake up the requesters,
but there is no lock mediating this wakeup.  Therefore, memory barriers
are required to ensure that the handler's memory references are seen by
all to occur before synchronize_*_expedited() returns to its caller.
Possibly detected by syzkaller.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree_exp.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 51ca287828a2..027e123d93c7 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -533,6 +533,7 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
 				rnp->exp_seq_rq = s;
 			spin_unlock(&rnp->exp_lock);
 		}
+		smp_mb(); /* All above changes before wakeup. */
 		wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
 	}
 	trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
@@ -614,6 +615,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
 	wait_event(rnp->exp_wq[(s >> 1) & 0x3],
 		   sync_exp_work_done(rsp,
 				      &rdp->exp_workdone0, s));
+	smp_mb(); /* Workqueue actions happen before return. */
 
 	/* Let the next expedited grace period start. */
 	mutex_unlock(&rsp->exp_mutex);
-- 
cgit v1.2.3


From f010ed82c7ba8b30f30872800100ad6b6efe2a6a Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Sun, 5 Mar 2017 12:17:31 -0800
Subject: rcu: Fix warning in rcu_seq_end()

The rcu_seq_end() function increments seq signifying completion
of a grace period, after that checks that the seq is even and wakes
_synchronize_rcu_expedited().  The _synchronize_rcu_expedited() function
uses wait_event() to wait for even seq.  The problem is that wait_event()
can return as soon as seq becomes even without waiting for the wakeup.
In such case the warning in rcu_seq_end() can falsely fire if the next
expedited grace period starts before the check.

Check that seq has good value before incrementing it.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: syzkaller@googlegroups.com
Cc: linux-kernel@vger.kernel.org
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: josh@joshtriplett.org
Cc: jiangshanlai@gmail.com
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

---

syzkaller-triggered warning:

WARNING: CPU: 0 PID: 4832 at kernel/rcu/tree.c:3533
rcu_seq_end+0x110/0x140 kernel/rcu/tree.c:3533
CPU: 0 PID: 4832 Comm: kworker/0:3 Not tainted 4.10.0+ #276
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Workqueue: events wait_rcu_exp_gp
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
 panic+0x1fb/0x412 kernel/panic.c:179
 __warn+0x1c4/0x1e0 kernel/panic.c:540
 warn_slowpath_null+0x2c/0x40 kernel/panic.c:583
 rcu_seq_end+0x110/0x140 kernel/rcu/tree.c:3533
 rcu_exp_gp_seq_end kernel/rcu/tree_exp.h:36 [inline]
 rcu_exp_wait_wake+0x8a9/0x1330 kernel/rcu/tree_exp.h:517
 rcu_exp_sel_wait_wake kernel/rcu/tree_exp.h:559 [inline]
 wait_rcu_exp_gp+0x83/0xc0 kernel/rcu/tree_exp.h:570
 process_one_work+0xc06/0x1c20 kernel/workqueue.c:2096
 worker_thread+0x223/0x19c0 kernel/workqueue.c:2230
 kthread+0x326/0x3f0 kernel/kthread.c:227
 ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430
---
---
 kernel/rcu/rcu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 91e0bf31f6ce..0bc1313c49e2 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -73,8 +73,8 @@ static inline void rcu_seq_start(unsigned long *sp)
 static inline void rcu_seq_end(unsigned long *sp)
 {
 	smp_mb(); /* Ensure update-side operation before counter increment. */
+	WARN_ON_ONCE(!(*sp & 0x1));
 	WRITE_ONCE(*sp, *sp + 1);
-	WARN_ON_ONCE(*sp & 0x1);
 }
 
 /* Take a snapshot of the update side's sequence number. */
-- 
cgit v1.2.3


From c6e56f593ac2df436700527c3488d4ed224c3acf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:16:42 -0800
Subject: srcu: Push srcu_advance_batches() fastpath into common case

This commit simplifies the SRCU state machine by pushing the
srcu_advance_batches() idle-SRCU fastpath into the common case.  This is
done by giving srcu_reschedule() a delay parameter, which is zero in
the call from srcu_advance_batches().

This commit is a step towards numbering callbacks in order to
efficiently handle per-CPU callback lists.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/srcu.c | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 7e7ecaa50dc5..821ecda873f2 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -399,8 +399,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 }
 EXPORT_SYMBOL_GPL(call_srcu);
 
-static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
-static void srcu_reschedule(struct srcu_struct *sp);
+static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
 
 /*
  * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
@@ -409,7 +408,6 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 {
 	struct rcu_synchronize rcu;
 	struct rcu_head *head = &rcu.head;
-	bool done = false;
 
 	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
 			 lock_is_held(&rcu_bh_lock_map) ||
@@ -431,25 +429,15 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 		sp->running = true;
 		rcu_batch_queue(&sp->batch_check0, head);
 		spin_unlock_irq(&sp->queue_lock);
-
-		srcu_advance_batches(sp, trycount);
-		if (!rcu_batch_empty(&sp->batch_done)) {
-			BUG_ON(sp->batch_done.head != head);
-			rcu_batch_dequeue(&sp->batch_done);
-			done = true;
-		}
 		/* give the processing owner to work_struct */
-		srcu_reschedule(sp);
+		srcu_reschedule(sp, 0);
 	} else {
 		rcu_batch_queue(&sp->batch_queue, head);
 		spin_unlock_irq(&sp->queue_lock);
 	}
 
-	if (!done) {
-		wait_for_completion(&rcu.completion);
-		smp_mb(); /* Caller's later accesses after GP. */
-	}
-
+	wait_for_completion(&rcu.completion);
+	smp_mb(); /* Caller's later accesses after GP. */
 }
 
 /**
@@ -639,7 +627,7 @@ static void srcu_invoke_callbacks(struct srcu_struct *sp)
  * Finished one round of SRCU grace period.  Start another if there are
  * more SRCU callbacks queued, otherwise put SRCU into not-running state.
  */
-static void srcu_reschedule(struct srcu_struct *sp)
+static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 {
 	bool pending = true;
 
@@ -653,8 +641,7 @@ static void srcu_reschedule(struct srcu_struct *sp)
 	}
 
 	if (pending)
-		queue_delayed_work(system_power_efficient_wq,
-				   &sp->work, SRCU_INTERVAL);
+		queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
 }
 
 /*
@@ -669,6 +656,6 @@ void process_srcu(struct work_struct *work)
 	srcu_collect_new(sp);
 	srcu_advance_batches(sp, 1);
 	srcu_invoke_callbacks(sp);
-	srcu_reschedule(sp);
+	srcu_reschedule(sp, SRCU_INTERVAL);
 }
 EXPORT_SYMBOL_GPL(process_srcu);
-- 
cgit v1.2.3


From c2a8ec0778b2ca0d360ba9b5cac7fcd5ddfe798f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 10 Mar 2017 15:31:55 -0800
Subject: srcu: Move to state-based grace-period sequencing

The current SRCU grace-period processing might never reach the last
portion of srcu_advance_batches().  This is OK given the current
implementation, as the first portion, up to the try_check_zero()
following the srcu_flip() is sufficient to drive grace periods forward.
However, it has the unfortunate side-effect of making it impossible to
determine when a given grace period has ended, and it will be necessary
to efficiently trace ends of grace periods in order to efficiently handle
per-CPU SRCU callback lists.

This commit therefore adds states to the SRCU grace-period processing,
so that the end of a given SRCU grace period is marked by the transition
to the SRCU_STATE_DONE state.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h |  10 ++++-
 kernel/rcu/srcu.c    | 111 ++++++++++++++++++++++++++++-----------------------
 2 files changed, 69 insertions(+), 52 deletions(-)

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index a598cf3ac70c..f149a685896c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -48,7 +48,7 @@ struct srcu_struct {
 	unsigned long completed;
 	struct srcu_array __percpu *per_cpu_ref;
 	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
-	bool running;
+	int srcu_state;
 	/* callbacks just queued */
 	struct rcu_batch batch_queue;
 	/* callbacks try to do the first check_zero */
@@ -62,6 +62,12 @@ struct srcu_struct {
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 };
 
+/* Values for -> state variable. */
+#define SRCU_STATE_IDLE		0
+#define SRCU_STATE_SCAN1	1
+#define SRCU_STATE_SCAN2	2
+#define SRCU_STATE_DONE		3
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 int __init_srcu_struct(struct srcu_struct *sp, const char *name,
@@ -89,7 +95,7 @@ void process_srcu(struct work_struct *work);
 		.completed = -300,					\
 		.per_cpu_ref = &name##_srcu_array,			\
 		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
-		.running = false,					\
+		.srcu_state = SRCU_STATE_IDLE,				\
 		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
 		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\
 		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 821ecda873f2..70286f68f3a1 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -111,7 +111,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 {
 	sp->completed = 0;
 	spin_lock_init(&sp->queue_lock);
-	sp->running = false;
+	sp->srcu_state = SRCU_STATE_IDLE;
 	rcu_batch_init(&sp->batch_queue);
 	rcu_batch_init(&sp->batch_check0);
 	rcu_batch_init(&sp->batch_check1);
@@ -270,7 +270,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
 	if (WARN_ON(!rcu_all_batches_empty(sp)))
 		return; /* Leakage unless caller handles error. */
 	flush_delayed_work(&sp->work);
-	if (WARN_ON(sp->running))
+	if (WARN_ON(READ_ONCE(sp->srcu_state) != SRCU_STATE_IDLE))
 		return; /* Caller forgot to stop doing call_srcu()? */
 	free_percpu(sp->per_cpu_ref);
 	sp->per_cpu_ref = NULL;
@@ -391,8 +391,8 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 	spin_lock_irqsave(&sp->queue_lock, flags);
 	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
 	rcu_batch_queue(&sp->batch_queue, head);
-	if (!sp->running) {
-		sp->running = true;
+	if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
+		WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
 		queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
 	}
 	spin_unlock_irqrestore(&sp->queue_lock, flags);
@@ -424,9 +424,9 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 	head->func = wakeme_after_rcu;
 	spin_lock_irq(&sp->queue_lock);
 	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	if (!sp->running) {
+	if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
 		/* steal the processing owner */
-		sp->running = true;
+		WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
 		rcu_batch_queue(&sp->batch_check0, head);
 		spin_unlock_irq(&sp->queue_lock);
 		/* give the processing owner to work_struct */
@@ -548,7 +548,9 @@ static void srcu_collect_new(struct srcu_struct *sp)
  */
 static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 {
-	int idx = 1 ^ (sp->completed & 1);
+	int idx;
+
+	WARN_ON_ONCE(sp->srcu_state == SRCU_STATE_IDLE);
 
 	/*
 	 * Because readers might be delayed for an extended period after
@@ -558,48 +560,56 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 	 * invoking a callback.
 	 */
 
-	if (rcu_batch_empty(&sp->batch_check0) &&
-	    rcu_batch_empty(&sp->batch_check1))
-		return; /* no callbacks need to be advanced */
-
-	if (!try_check_zero(sp, idx, trycount))
-		return; /* failed to advance, will try after SRCU_INTERVAL */
-
-	/*
-	 * The callbacks in ->batch_check1 have already done with their
-	 * first zero check and flip back when they were enqueued on
-	 * ->batch_check0 in a previous invocation of srcu_advance_batches().
-	 * (Presumably try_check_zero() returned false during that
-	 * invocation, leaving the callbacks stranded on ->batch_check1.)
-	 * They are therefore ready to invoke, so move them to ->batch_done.
-	 */
-	rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-
-	if (rcu_batch_empty(&sp->batch_check0))
-		return; /* no callbacks need to be advanced */
-	srcu_flip(sp);
-
-	/*
-	 * The callbacks in ->batch_check0 just finished their
-	 * first check zero and flip, so move them to ->batch_check1
-	 * for future checking on the other idx.
-	 */
-	rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
-
-	/*
-	 * SRCU read-side critical sections are normally short, so check
-	 * at least twice in quick succession after a flip.
-	 */
-	trycount = trycount < 2 ? 2 : trycount;
-	if (!try_check_zero(sp, idx^1, trycount))
-		return; /* failed to advance, will try after SRCU_INTERVAL */
+	if (sp->srcu_state == SRCU_STATE_DONE)
+		WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
+
+	if (sp->srcu_state == SRCU_STATE_SCAN1) {
+		idx = 1 ^ (sp->completed & 1);
+		if (!try_check_zero(sp, idx, trycount))
+			return; /* readers present, retry after SRCU_INTERVAL */
+
+		/*
+		 * The callbacks in ->batch_check1 have already done
+		 * with their first zero check and flip back when they were
+		 * enqueued on ->batch_check0 in a previous invocation of
+		 * srcu_advance_batches().  (Presumably try_check_zero()
+		 * returned false during that invocation, leaving the
+		 * callbacks stranded on ->batch_check1.) They are therefore
+		 * ready to invoke, so move them to ->batch_done.
+		 */
+		rcu_batch_move(&sp->batch_done, &sp->batch_check1);
+		srcu_flip(sp);
+
+		/*
+		 * The callbacks in ->batch_check0 just finished their
+		 * first check zero and flip, so move them to ->batch_check1
+		 * for future checking on the other idx.
+		 */
+		rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
+
+		WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN2);
+	}
 
-	/*
-	 * The callbacks in ->batch_check1 have now waited for all
-	 * pre-existing readers using both idx values.  They are therefore
-	 * ready to invoke, so move them to ->batch_done.
-	 */
-	rcu_batch_move(&sp->batch_done, &sp->batch_check1);
+	if (sp->srcu_state == SRCU_STATE_SCAN2) {
+
+		/*
+		 * SRCU read-side critical sections are normally short,
+		 * so check at least twice in quick succession after a flip.
+		 */
+		idx = 1 ^ (sp->completed & 1);
+		trycount = trycount < 2 ? 2 : trycount;
+		if (!try_check_zero(sp, idx, trycount))
+			return; /* readers present, retry after SRCU_INTERVAL */
+
+		/*
+		 * The callbacks in ->batch_check1 have now waited for
+		 * all pre-existing readers using both idx values.  They are
+		 * therefore ready to invoke, so move them to ->batch_done.
+		 */
+		rcu_batch_move(&sp->batch_done, &sp->batch_check1);
+
+		WRITE_ONCE(sp->srcu_state, SRCU_STATE_DONE);
+	}
 }
 
 /*
@@ -633,8 +643,9 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 
 	if (rcu_all_batches_empty(sp)) {
 		spin_lock_irq(&sp->queue_lock);
-		if (rcu_all_batches_empty(sp)) {
-			sp->running = false;
+		if (rcu_all_batches_empty(sp) &&
+		    READ_ONCE(sp->srcu_state) == SRCU_STATE_DONE) {
+			WRITE_ONCE(sp->srcu_state, SRCU_STATE_IDLE);
 			pending = false;
 		}
 		spin_unlock_irq(&sp->queue_lock);
-- 
cgit v1.2.3


From ac367c1c621b75689f6d5cd8301d364ba2c9f292 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 11 Mar 2017 07:14:06 -0800
Subject: srcu: Add grace-period sequence numbers

This commit adds grace-period sequence numbers, which will be used to
handle mid-boot grace periods and per-CPU callback lists.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h |  1 +
 kernel/rcu/srcu.c    | 27 +++++++++++++++++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index f149a685896c..047ac8c28a4e 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -46,6 +46,7 @@ struct rcu_batch {
 
 struct srcu_struct {
 	unsigned long completed;
+	unsigned long srcu_gp_seq;
 	struct srcu_array __percpu *per_cpu_ref;
 	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
 	int srcu_state;
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 70286f68f3a1..d464986d82b6 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -110,6 +110,7 @@ static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
 static int init_srcu_struct_fields(struct srcu_struct *sp)
 {
 	sp->completed = 0;
+	sp->srcu_gp_seq = 0;
 	spin_lock_init(&sp->queue_lock);
 	sp->srcu_state = SRCU_STATE_IDLE;
 	rcu_batch_init(&sp->batch_queue);
@@ -318,6 +319,15 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 #define SYNCHRONIZE_SRCU_TRYCOUNT	2
 #define SYNCHRONIZE_SRCU_EXP_TRYCOUNT	12
 
+/*
+ * Start an SRCU grace period.
+ */
+static void srcu_gp_start(struct srcu_struct *sp)
+{
+	WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
+	rcu_seq_start(&sp->srcu_gp_seq);
+}
+
 /*
  * @@@ Wait until all pre-existing readers complete.  Such readers
  * will have used the index specified by "idx".
@@ -354,6 +364,15 @@ static void srcu_flip(struct srcu_struct *sp)
 	smp_mb(); /* D */  /* Pairs with C. */
 }
 
+/*
+ * End an SRCU grace period.
+ */
+static void srcu_gp_end(struct srcu_struct *sp)
+{
+	rcu_seq_end(&sp->srcu_gp_seq);
+	WRITE_ONCE(sp->srcu_state, SRCU_STATE_DONE);
+}
+
 /*
  * Enqueue an SRCU callback on the specified srcu_struct structure,
  * initiating grace-period processing if it is not already running.
@@ -392,7 +411,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
 	rcu_batch_queue(&sp->batch_queue, head);
 	if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
-		WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
+		srcu_gp_start(sp);
 		queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
 	}
 	spin_unlock_irqrestore(&sp->queue_lock, flags);
@@ -426,7 +445,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
 	if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
 		/* steal the processing owner */
-		WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
+		srcu_gp_start(sp);
 		rcu_batch_queue(&sp->batch_check0, head);
 		spin_unlock_irq(&sp->queue_lock);
 		/* give the processing owner to work_struct */
@@ -561,7 +580,7 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 	 */
 
 	if (sp->srcu_state == SRCU_STATE_DONE)
-		WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
+		srcu_gp_start(sp);
 
 	if (sp->srcu_state == SRCU_STATE_SCAN1) {
 		idx = 1 ^ (sp->completed & 1);
@@ -608,7 +627,7 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 		 */
 		rcu_batch_move(&sp->batch_done, &sp->batch_check1);
 
-		WRITE_ONCE(sp->srcu_state, SRCU_STATE_DONE);
+		srcu_gp_end(sp);
 	}
 }
 
-- 
cgit v1.2.3


From 8660b7d8a545227fd9ee80508aa82528ea9947d7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 13 Mar 2017 16:48:18 -0700
Subject: srcu: Use rcu_segcblist to track SRCU callbacks

This commit switches SRCU from custom-built callback queues to the new
rcu_segcblist structure.  This change associates grace-period sequence
numbers with groups of callbacks, which will be needed for efficient
processing of per-CPU callbacks.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcu_segcblist.h | 678 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/srcu.h          |  24 +-
 kernel/rcu/rcu.h              |   6 +
 kernel/rcu/rcu_segcblist.h    | 670 -----------------------------------------
 kernel/rcu/srcu.c             | 159 ++--------
 kernel/rcu/tree.h             |   2 +-
 6 files changed, 721 insertions(+), 818 deletions(-)
 create mode 100644 include/linux/rcu_segcblist.h
 delete mode 100644 kernel/rcu/rcu_segcblist.h

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
new file mode 100644
index 000000000000..74b1e7243955
--- /dev/null
+++ b/include/linux/rcu_segcblist.h
@@ -0,0 +1,678 @@
+/*
+ * RCU segmented callback lists
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifndef __KERNEL_RCU_SEGCBLIST_H
+#define __KERNEL_RCU_SEGCBLIST_H
+
+/* Simple unsegmented callback lists. */
+struct rcu_cblist {
+	struct rcu_head *head;
+	struct rcu_head **tail;
+	long len;
+	long len_lazy;
+};
+
+#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
+
+/* Initialize simple callback list. */
+static inline void rcu_cblist_init(struct rcu_cblist *rclp)
+{
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+	rclp->len = 0;
+	rclp->len_lazy = 0;
+}
+
+/* Is simple callback list empty? */
+static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
+{
+	return !rclp->head;
+}
+
+/* Return number of callbacks in simple callback list. */
+static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
+{
+	return rclp->len;
+}
+
+/* Return number of lazy callbacks in simple callback list. */
+static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
+{
+	return rclp->len_lazy;
+}
+
+/*
+ * Debug function to actually count the number of callbacks.
+ * If the number exceeds the limit specified, return -1.
+ */
+static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
+{
+	int cnt = 0;
+	struct rcu_head **rhpp = &rclp->head;
+
+	for (;;) {
+		if (!*rhpp)
+			return cnt;
+		if (++cnt > lim)
+			return -1;
+		rhpp = &(*rhpp)->next;
+	}
+}
+
+/*
+ * Dequeue the oldest rcu_head structure from the specified callback
+ * list.  This function assumes that the callback is non-lazy, but
+ * the caller can later invoke rcu_cblist_dequeued_lazy() if it
+ * finds otherwise (and if it cares about laziness).  This allows
+ * different users to have different ways of determining laziness.
+ */
+static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
+{
+	struct rcu_head *rhp;
+
+	rhp = rclp->head;
+	if (!rhp)
+		return NULL;
+	rclp->len--;
+	rclp->head = rhp->next;
+	if (!rclp->head)
+		rclp->tail = &rclp->head;
+	return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
+{
+	rclp->len_lazy--;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer.  Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
+{
+	return rclp->head;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer.  Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
+{
+	WARN_ON_ONCE(rcu_cblist_empty(rclp));
+	return rclp->tail;
+}
+
+/* Complicated segmented callback lists.  ;-) */
+
+/*
+ * Index values for segments in rcu_segcblist structure.
+ *
+ * The segments are as follows:
+ *
+ * [head, *tails[RCU_DONE_TAIL]):
+ *	Callbacks whose grace period has elapsed, and thus can be invoked.
+ * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]):
+ *	Callbacks waiting for the current GP from the current CPU's viewpoint.
+ * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]):
+ *	Callbacks that arrived before the next GP started, again from
+ *	the current CPU's viewpoint.  These can be handled by the next GP.
+ * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]):
+ *	Callbacks that might have arrived after the next GP started.
+ *	There is some uncertainty as to when a given GP starts and
+ *	ends, but a CPU knows the exact times if it is the one starting
+ *	or ending the GP.  Other CPUs know that the previous GP ends
+ *	before the next one starts.
+ *
+ * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also
+ * empty.
+ *
+ * The ->gp_seq[] array contains the grace-period number at which the
+ * corresponding segment of callbacks will be ready to invoke.  A given
+ * element of this array is meaningful only when the corresponding segment
+ * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks
+ * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have
+ * not yet been assigned a grace-period number).
+ */
+#define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
+#define RCU_WAIT_TAIL		1	/* Also RCU_NEXT_READY head. */
+#define RCU_NEXT_READY_TAIL	2	/* Also RCU_NEXT head. */
+#define RCU_NEXT_TAIL		3
+#define RCU_CBLIST_NSEGS	4
+
+struct rcu_segcblist {
+	struct rcu_head *head;
+	struct rcu_head **tails[RCU_CBLIST_NSEGS];
+	unsigned long gp_seq[RCU_CBLIST_NSEGS];
+	long len;
+	long len_lazy;
+};
+
+#define RCU_SEGCBLIST_INITIALIZER(n) \
+{ \
+	.head = NULL, \
+	.tails[RCU_DONE_TAIL] = &n.head, \
+	.tails[RCU_WAIT_TAIL] = &n.head, \
+	.tails[RCU_NEXT_READY_TAIL] = &n.head, \
+	.tails[RCU_NEXT_TAIL] = &n.head, \
+}
+
+/*
+ * Initialize an rcu_segcblist structure.
+ */
+static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
+{
+	int i;
+
+	BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
+	BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
+	rsclp->head = NULL;
+	for (i = 0; i < RCU_CBLIST_NSEGS; i++)
+		rsclp->tails[i] = &rsclp->head;
+	rsclp->len = 0;
+	rsclp->len_lazy = 0;
+}
+
+/*
+ * Is the specified rcu_segcblist structure empty?
+ *
+ * But careful!  The fact that the ->head field is NULL does not
+ * necessarily imply that there are no callbacks associated with
+ * this structure.  When callbacks are being invoked, they are
+ * removed as a group.  If callback invocation must be preempted,
+ * the remaining callbacks will be added back to the list.  Either
+ * way, the counts are updated later.
+ *
+ * So it is often the case that rcu_segcblist_n_cbs() should be used
+ * instead.
+ */
+static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
+{
+	return !rsclp->head;
+}
+
+/* Return number of callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
+{
+	return READ_ONCE(rsclp->len);
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
+{
+	return rsclp->len_lazy;
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
+{
+	return rsclp->len - rsclp->len_lazy;
+}
+
+/*
+ * Is the specified rcu_segcblist enabled, for example, not corresponding
+ * to an offline or callback-offloaded CPU?
+ */
+static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
+{
+	return !!rsclp->tails[RCU_NEXT_TAIL];
+}
+
+/*
+ * Disable the specified rcu_segcblist structure, so that callbacks can
+ * no longer be posted to it.  This structure must be empty.
+ */
+static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
+{
+	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
+	rsclp->tails[RCU_NEXT_TAIL] = NULL;
+}
+
+/*
+ * Is the specified segment of the specified rcu_segcblist structure
+ * empty of callbacks?
+ */
+static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
+{
+	if (seg == RCU_DONE_TAIL)
+		return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
+	return rsclp->tails[seg - 1] == rsclp->tails[seg];
+}
+
+/*
+ * Are all segments following the specified segment of the specified
+ * rcu_segcblist structure empty of callbacks?  (The specified
+ * segment might well contain callbacks.)
+ */
+static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
+{
+	return !*rsclp->tails[seg];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are ready to be invoked?
+ */
+static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are still pending, that is, not yet ready to be invoked?
+ */
+static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
+}
+
+/*
+ * Dequeue and return the first ready-to-invoke callback.  If there
+ * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
+ * to avoid interference.  Does not protect from interference from other
+ * CPUs or tasks.
+ */
+static inline struct rcu_head *
+rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
+{
+	unsigned long flags;
+	int i;
+	struct rcu_head *rhp;
+
+	local_irq_save(flags);
+	if (!rcu_segcblist_ready_cbs(rsclp)) {
+		local_irq_restore(flags);
+		return NULL;
+	}
+	rhp = rsclp->head;
+	BUG_ON(!rhp);
+	rsclp->head = rhp->next;
+	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
+		if (rsclp->tails[i] != &rhp->next)
+			break;
+		rsclp->tails[i] = &rsclp->head;
+	}
+	smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
+	WRITE_ONCE(rsclp->len, rsclp->len - 1);
+	local_irq_restore(flags);
+	return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	rsclp->len_lazy--;
+	local_irq_restore(flags);
+}
+
+/*
+ * Return a pointer to the first callback in the specified rcu_segcblist
+ * structure.  This is useful for diagnostics.
+ */
+static inline struct rcu_head *
+rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
+{
+	if (rcu_segcblist_is_enabled(rsclp))
+		return rsclp->head;
+	return NULL;
+}
+
+/*
+ * Return a pointer to the first pending callback in the specified
+ * rcu_segcblist structure.  This is useful just after posting a given
+ * callback -- if that callback is the first pending callback, then
+ * you cannot rely on someone else having already started up the required
+ * grace period.
+ */
+static inline struct rcu_head *
+rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
+{
+	if (rcu_segcblist_is_enabled(rsclp))
+		return *rsclp->tails[RCU_DONE_TAIL];
+	return NULL;
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * have not yet been processed beyond having been posted, that is,
+ * does it contain callbacks in its last segment?
+ */
+static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
+}
+
+/*
+ * Enqueue the specified callback onto the specified rcu_segcblist
+ * structure, updating accounting as needed.  Note that the ->len
+ * field may be accessed locklessly, hence the WRITE_ONCE().
+ * The ->len field is used by rcu_barrier() and friends to determine
+ * if it must post a callback on this structure, and it is OK
+ * for rcu_barrier() to sometimes post callbacks needlessly, but
+ * absolutely not OK for it to ever miss posting a callback.
+ */
+static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
+					 struct rcu_head *rhp, bool lazy)
+{
+	WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
+	if (lazy)
+		rsclp->len_lazy++;
+	smp_mb(); /* Ensure counts are updated before callback is enqueued. */
+	rhp->next = NULL;
+	*rsclp->tails[RCU_NEXT_TAIL] = rhp;
+	rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
+}
+
+/*
+ * Extract only the counts from the specified rcu_segcblist structure,
+ * and place them in the specified rcu_cblist structure.  This function
+ * supports both callback orphaning and invocation, hence the separation
+ * of counts and callbacks.  (Callbacks ready for invocation must be
+ * orphaned and adopted separately from pending callbacks, but counts
+ * apply to all callbacks.  Locking must be used to make sure that
+ * both orphaned-callbacks lists are consistent.)
+ */
+static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
+					       struct rcu_cblist *rclp)
+{
+	rclp->len_lazy += rsclp->len_lazy;
+	rclp->len += rsclp->len;
+	rsclp->len_lazy = 0;
+	WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
+}
+
+/*
+ * Extract only those callbacks ready to be invoked from the specified
+ * rcu_segcblist structure and place them in the specified rcu_cblist
+ * structure.
+ */
+static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
+						  struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rcu_segcblist_ready_cbs(rsclp))
+		return; /* Nothing to do. */
+	*rclp->tail = rsclp->head;
+	rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
+	*rsclp->tails[RCU_DONE_TAIL] = NULL;
+	rclp->tail = rsclp->tails[RCU_DONE_TAIL];
+	for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
+			rsclp->tails[i] = &rsclp->head;
+}
+
+/*
+ * Extract only those callbacks still pending (not yet ready to be
+ * invoked) from the specified rcu_segcblist structure and place them in
+ * the specified rcu_cblist structure.  Note that this loses information
+ * about any callbacks that might have been partway done waiting for
+ * their grace period.  Too bad!  They will have to start over.
+ */
+static inline void
+rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
+			       struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rcu_segcblist_pend_cbs(rsclp))
+		return; /* Nothing to do. */
+	*rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
+	rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
+	*rsclp->tails[RCU_DONE_TAIL] = NULL;
+	for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
+		rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Move the entire contents of the specified rcu_segcblist structure,
+ * counts, callbacks, and all, to the specified rcu_cblist structure.
+ * @@@ Why do we need this???  Moving early-boot CBs to NOCB lists?
+ * @@@ Memory barrier needed?  (Not if only used at boot time...)
+ */
+static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
+					     struct rcu_cblist *rclp)
+{
+	rcu_segcblist_extract_done_cbs(rsclp, rclp);
+	rcu_segcblist_extract_pend_cbs(rsclp, rclp);
+	rcu_segcblist_extract_count(rsclp, rclp);
+}
+
+/*
+ * Insert counts from the specified rcu_cblist structure in the
+ * specified rcu_segcblist structure.
+ */
+static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
+					      struct rcu_cblist *rclp)
+{
+	rsclp->len_lazy += rclp->len_lazy;
+	/* ->len sampled locklessly. */
+	WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
+	rclp->len_lazy = 0;
+	rclp->len = 0;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the beginning of the
+ * done-callbacks segment of the specified rcu_segcblist.
+ */
+static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
+						 struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rclp->head)
+		return; /* No callbacks to move. */
+	*rclp->tail = rsclp->head;
+	rsclp->head = rclp->head;
+	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
+		if (&rsclp->head == rsclp->tails[i])
+			rsclp->tails[i] = rclp->tail;
+		else
+			break;
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the end of the
+ * new-callbacks segment of the specified rcu_segcblist.
+ */
+static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
+						 struct rcu_cblist *rclp)
+{
+	if (!rclp->head)
+		return; /* Nothing to do. */
+	*rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
+	rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+}
+
+/*
+ * Advance the callbacks in the specified rcu_segcblist structure based
+ * on the current value passed in for the grace-period counter.
+ */
+static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
+					 unsigned long seq)
+{
+	int i, j;
+
+	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
+
+	/*
+	 * Find all callbacks whose ->gp_seq numbers indicate that they
+	 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
+	 */
+	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
+		if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+			break;
+		rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
+	}
+
+	/* If no callbacks moved, nothing more need be done. */
+	if (i == RCU_WAIT_TAIL)
+		return;
+
+	/* Clean up tail pointers that might have been misordered above. */
+	for (j = RCU_WAIT_TAIL; j < i; j++)
+		rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
+
+	/*
+	 * Callbacks moved, so clean up the misordered ->tails[] pointers
+	 * that now point into the middle of the list of ready-to-invoke
+	 * callbacks.  The overall effect is to copy down the later pointers
+	 * into the gap that was created by the now-ready segments.
+	 */
+	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
+		if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
+			break;  /* No more callbacks. */
+		rsclp->tails[j] = rsclp->tails[i];
+		rsclp->gp_seq[j] = rsclp->gp_seq[i];
+	}
+}
+
+/*
+ * "Accelerate" callbacks based on more-accurate grace-period information.
+ * The reason for this is that RCU does not synchronize the beginnings and
+ * ends of grace periods, and that callbacks are posted locally.  This in
+ * turn means that the callbacks must be labelled conservatively early
+ * on, as getting exact information would degrade both performance and
+ * scalability.  When more accurate grace-period information becomes
+ * available, previously posted callbacks can be "accelerated", marking
+ * them to complete at the end of the earlier grace period.
+ *
+ * This function operates on an rcu_segcblist structure, and also the
+ * grace-period sequence number at which new callbacks would become
+ * ready to invoke.
+ */
+static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
+					    unsigned long seq)
+{
+	int i;
+
+	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
+
+	/*
+	 * Find the segment preceding the oldest segment of callbacks
+	 * whose ->gp_seq[] completion is at or after that passed in via
+	 * "seq", skipping any empty segments.  This oldest segment, along
+	 * with any later segments, can be merged in with any newly arrived
+	 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
+	 * as their ->gp_seq[] grace-period completion sequence number.
+	 */
+	for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] != rsclp->tails[i - 1] &&
+		    ULONG_CMP_LT(rsclp->gp_seq[i], seq))
+			break;
+
+	/*
+	 * If all the segments contain callbacks that correspond to
+	 * earlier grace-period sequence numbers than "seq", leave.
+	 * Assuming that the rcu_segcblist structure has enough
+	 * segments in its arrays, this can only happen if some of
+	 * the non-done segments contain callbacks that really are
+	 * ready to invoke.  This situation will get straightened
+	 * out by the next call to rcu_segcblist_advance().
+	 *
+	 * Also advance to the oldest segment of callbacks whose
+	 * ->gp_seq[] completion is at or after that passed in via "seq",
+	 * skipping any empty segments.
+	 */
+	if (++i >= RCU_NEXT_TAIL)
+		return false;
+
+	/*
+	 * Merge all later callbacks, including newly arrived callbacks,
+	 * into the segment located by the for-loop above.  Assign "seq"
+	 * as the ->gp_seq[] value in order to correctly handle the case
+	 * where there were no pending callbacks in the rcu_segcblist
+	 * structure other than in the RCU_NEXT_TAIL segment.
+	 */
+	for (; i < RCU_NEXT_TAIL; i++) {
+		rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
+		rsclp->gp_seq[i] = seq;
+	}
+	return true;
+}
+
+/*
+ * Scan the specified rcu_segcblist structure for callbacks that need
+ * a grace period later than the one specified by "seq".  We don't look
+ * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
+ * have a grace-period sequence number.
+ */
+static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
+						  unsigned long seq)
+{
+	int i;
+
+	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
+		if (rsclp->tails[i - 1] != rsclp->tails[i] &&
+		    ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+			return true;
+	return false;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer.  Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
+{
+	return rsclp->head;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer.  Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
+{
+	WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
+	return rsclp->tails[RCU_NEXT_TAIL];
+}
+
+#endif /* __KERNEL_RCU_SEGCBLIST_H */
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 047ac8c28a4e..ad154a7bc114 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -22,7 +22,7 @@
  *	   Lai Jiangshan <laijs@cn.fujitsu.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU/ *.txt
+ *		Documentation/RCU/ *.txt
  *
  */
 
@@ -32,31 +32,20 @@
 #include <linux/mutex.h>
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
+#include <linux/rcu_segcblist.h>
 
 struct srcu_array {
 	unsigned long lock_count[2];
 	unsigned long unlock_count[2];
 };
 
-struct rcu_batch {
-	struct rcu_head *head, **tail;
-};
-
-#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
-
 struct srcu_struct {
 	unsigned long completed;
 	unsigned long srcu_gp_seq;
 	struct srcu_array __percpu *per_cpu_ref;
-	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
+	spinlock_t queue_lock; /* protect ->srcu_cblist, ->srcu_state */
 	int srcu_state;
-	/* callbacks just queued */
-	struct rcu_batch batch_queue;
-	/* callbacks try to do the first check_zero */
-	struct rcu_batch batch_check0;
-	/* callbacks done with the first check_zero and the flip */
-	struct rcu_batch batch_check1;
-	struct rcu_batch batch_done;
+	struct rcu_segcblist srcu_cblist;
 	struct delayed_work work;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
@@ -97,10 +86,7 @@ void process_srcu(struct work_struct *work);
 		.per_cpu_ref = &name##_srcu_array,			\
 		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
 		.srcu_state = SRCU_STATE_IDLE,				\
-		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
-		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\
-		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\
-		.batch_done = RCU_BATCH_INIT(name.batch_done),		\
+		.srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\
 		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
 		__SRCU_DEP_MAP_INIT(name)				\
 	}
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 0bc1313c49e2..a943b42a9cf7 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -87,6 +87,12 @@ static inline unsigned long rcu_seq_snap(unsigned long *sp)
 	return s;
 }
 
+/* Return the current value the update side's sequence number, no ordering. */
+static inline unsigned long rcu_seq_current(unsigned long *sp)
+{
+	return READ_ONCE(*sp);
+}
+
 /*
  * Given a snapshot from rcu_seq_snap(), determine whether or not a
  * full update-side operation has occurred.
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
deleted file mode 100644
index 982e3e05b22a..000000000000
--- a/kernel/rcu/rcu_segcblist.h
+++ /dev/null
@@ -1,670 +0,0 @@
-/*
- * RCU segmented callback lists
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * Copyright IBM Corporation, 2017
- *
- * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
- */
-
-#ifndef __KERNEL_RCU_SEGCBLIST_H
-#define __KERNEL_RCU_SEGCBLIST_H
-
-/* Simple unsegmented callback lists. */
-struct rcu_cblist {
-	struct rcu_head *head;
-	struct rcu_head **tail;
-	long len;
-	long len_lazy;
-};
-
-#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
-
-/* Initialize simple callback list. */
-static inline void rcu_cblist_init(struct rcu_cblist *rclp)
-{
-	rclp->head = NULL;
-	rclp->tail = &rclp->head;
-	rclp->len = 0;
-	rclp->len_lazy = 0;
-}
-
-/* Is simple callback list empty? */
-static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
-{
-	return !rclp->head;
-}
-
-/* Return number of callbacks in simple callback list. */
-static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
-{
-	return rclp->len;
-}
-
-/* Return number of lazy callbacks in simple callback list. */
-static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
-{
-	return rclp->len_lazy;
-}
-
-/*
- * Debug function to actually count the number of callbacks.
- * If the number exceeds the limit specified, return -1.
- */
-static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
-{
-	int cnt = 0;
-	struct rcu_head **rhpp = &rclp->head;
-
-	for (;;) {
-		if (!*rhpp)
-			return cnt;
-		if (++cnt > lim)
-			return -1;
-		rhpp = &(*rhpp)->next;
-	}
-}
-
-/*
- * Dequeue the oldest rcu_head structure from the specified callback
- * list.  This function assumes that the callback is non-lazy, but
- * the caller can later invoke rcu_cblist_dequeued_lazy() if it
- * finds otherwise (and if it cares about laziness).  This allows
- * different users to have different ways of determining laziness.
- */
-static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
-{
-	struct rcu_head *rhp;
-
-	rhp = rclp->head;
-	if (!rhp)
-		return NULL;
-	prefetch(rhp);
-	rclp->len--;
-	rclp->head = rhp->next;
-	if (!rclp->head)
-		rclp->tail = &rclp->head;
-	return rhp;
-}
-
-/*
- * Account for the fact that a previously dequeued callback turned out
- * to be marked as lazy.
- */
-static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
-{
-	rclp->len_lazy--;
-}
-
-/*
- * Interim function to return rcu_cblist head pointer.  Longer term, the
- * rcu_cblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
-{
-	return rclp->head;
-}
-
-/*
- * Interim function to return rcu_cblist head pointer.  Longer term, the
- * rcu_cblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
-{
-	WARN_ON_ONCE(rcu_cblist_empty(rclp));
-	return rclp->tail;
-}
-
-/* Complicated segmented callback lists.  ;-) */
-
-/*
- * Index values for segments in rcu_segcblist structure.
- *
- * The segments are as follows:
- *
- * [head, *tails[RCU_DONE_TAIL]):
- *	Callbacks whose grace period has elapsed, and thus can be invoked.
- * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]):
- *	Callbacks waiting for the current GP from the current CPU's viewpoint.
- * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]):
- *	Callbacks that arrived before the next GP started, again from
- *	the current CPU's viewpoint.  These can be handled by the next GP.
- * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]):
- *	Callbacks that might have arrived after the next GP started.
- *	There is some uncertainty as to when a given GP starts and
- *	ends, but a CPU knows the exact times if it is the one starting
- *	or ending the GP.  Other CPUs know that the previous GP ends
- *	before the next one starts.
- *
- * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also
- * empty.
- *
- * The ->gp_seq[] array contains the grace-period number at which the
- * corresponding segment of callbacks will be ready to invoke.  A given
- * element of this array is meaningful only when the corresponding segment
- * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks
- * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have
- * not yet been assigned a grace-period number).
- */
-#define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
-#define RCU_WAIT_TAIL		1	/* Also RCU_NEXT_READY head. */
-#define RCU_NEXT_READY_TAIL	2	/* Also RCU_NEXT head. */
-#define RCU_NEXT_TAIL		3
-#define RCU_CBLIST_NSEGS	4
-
-struct rcu_segcblist {
-	struct rcu_head *head;
-	struct rcu_head **tails[RCU_CBLIST_NSEGS];
-	unsigned long gp_seq[RCU_CBLIST_NSEGS];
-	long len;
-	long len_lazy;
-};
-
-/*
- * Initialize an rcu_segcblist structure.
- */
-static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
-{
-	int i;
-
-	BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
-	BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
-	rsclp->head = NULL;
-	for (i = 0; i < RCU_CBLIST_NSEGS; i++)
-		rsclp->tails[i] = &rsclp->head;
-	rsclp->len = 0;
-	rsclp->len_lazy = 0;
-}
-
-/*
- * Is the specified rcu_segcblist structure empty?
- *
- * But careful!  The fact that the ->head field is NULL does not
- * necessarily imply that there are no callbacks associated with
- * this structure.  When callbacks are being invoked, they are
- * removed as a group.  If callback invocation must be preempted,
- * the remaining callbacks will be added back to the list.  Either
- * way, the counts are updated later.
- *
- * So it is often the case that rcu_segcblist_n_cbs() should be used
- * instead.
- */
-static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
-{
-	return !rsclp->head;
-}
-
-/* Return number of callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
-{
-	return READ_ONCE(rsclp->len);
-}
-
-/* Return number of lazy callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
-{
-	return rsclp->len_lazy;
-}
-
-/* Return number of lazy callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
-{
-	return rsclp->len - rsclp->len_lazy;
-}
-
-/*
- * Is the specified rcu_segcblist enabled, for example, not corresponding
- * to an offline or callback-offloaded CPU?
- */
-static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
-{
-	return !!rsclp->tails[RCU_NEXT_TAIL];
-}
-
-/*
- * Disable the specified rcu_segcblist structure, so that callbacks can
- * no longer be posted to it.  This structure must be empty.
- */
-static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
-{
-	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
-	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
-	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
-	rsclp->tails[RCU_NEXT_TAIL] = NULL;
-}
-
-/*
- * Is the specified segment of the specified rcu_segcblist structure
- * empty of callbacks?
- */
-static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
-{
-	if (seg == RCU_DONE_TAIL)
-		return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
-	return rsclp->tails[seg - 1] == rsclp->tails[seg];
-}
-
-/*
- * Are all segments following the specified segment of the specified
- * rcu_segcblist structure empty of callbacks?  (The specified
- * segment might well contain callbacks.)
- */
-static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
-{
-	return !*rsclp->tails[seg];
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * are ready to be invoked?
- */
-static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
-{
-	return rcu_segcblist_is_enabled(rsclp) &&
-	       &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * are still pending, that is, not yet ready to be invoked?
- */
-static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
-{
-	return rcu_segcblist_is_enabled(rsclp) &&
-	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
-}
-
-/*
- * Dequeue and return the first ready-to-invoke callback.  If there
- * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
- * to avoid interference.  Does not protect from interference from other
- * CPUs or tasks.
- */
-static inline struct rcu_head *
-rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
-{
-	unsigned long flags;
-	int i;
-	struct rcu_head *rhp;
-
-	local_irq_save(flags);
-	if (!rcu_segcblist_ready_cbs(rsclp)) {
-		local_irq_restore(flags);
-		return NULL;
-	}
-	rhp = rsclp->head;
-	BUG_ON(!rhp);
-	rsclp->head = rhp->next;
-	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
-		if (rsclp->tails[i] != &rhp->next)
-			break;
-		rsclp->tails[i] = &rsclp->head;
-	}
-	smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
-	WRITE_ONCE(rsclp->len, rsclp->len - 1);
-	local_irq_restore(flags);
-	return rhp;
-}
-
-/*
- * Account for the fact that a previously dequeued callback turned out
- * to be marked as lazy.
- */
-static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	rsclp->len_lazy--;
-	local_irq_restore(flags);
-}
-
-/*
- * Return a pointer to the first callback in the specified rcu_segcblist
- * structure.  This is useful for diagnostics.
- */
-static inline struct rcu_head *
-rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
-{
-	if (rcu_segcblist_is_enabled(rsclp))
-		return rsclp->head;
-	return NULL;
-}
-
-/*
- * Return a pointer to the first pending callback in the specified
- * rcu_segcblist structure.  This is useful just after posting a given
- * callback -- if that callback is the first pending callback, then
- * you cannot rely on someone else having already started up the required
- * grace period.
- */
-static inline struct rcu_head *
-rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
-{
-	if (rcu_segcblist_is_enabled(rsclp))
-		return *rsclp->tails[RCU_DONE_TAIL];
-	return NULL;
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * have not yet been processed beyond having been posted, that is,
- * does it contain callbacks in its last segment?
- */
-static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
-{
-	return rcu_segcblist_is_enabled(rsclp) &&
-	       !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
-}
-
-/*
- * Enqueue the specified callback onto the specified rcu_segcblist
- * structure, updating accounting as needed.  Note that the ->len
- * field may be accessed locklessly, hence the WRITE_ONCE().
- * The ->len field is used by rcu_barrier() and friends to determine
- * if it must post a callback on this structure, and it is OK
- * for rcu_barrier() to sometimes post callbacks needlessly, but
- * absolutely not OK for it to ever miss posting a callback.
- */
-static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
-					 struct rcu_head *rhp, bool lazy)
-{
-	WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
-	if (lazy)
-		rsclp->len_lazy++;
-	smp_mb(); /* Ensure counts are updated before callback is enqueued. */
-	rhp->next = NULL;
-	*rsclp->tails[RCU_NEXT_TAIL] = rhp;
-	rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
-}
-
-/*
- * Extract only the counts from the specified rcu_segcblist structure,
- * and place them in the specified rcu_cblist structure.  This function
- * supports both callback orphaning and invocation, hence the separation
- * of counts and callbacks.  (Callbacks ready for invocation must be
- * orphaned and adopted separately from pending callbacks, but counts
- * apply to all callbacks.  Locking must be used to make sure that
- * both orphaned-callbacks lists are consistent.)
- */
-static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
-					       struct rcu_cblist *rclp)
-{
-	rclp->len_lazy += rsclp->len_lazy;
-	rclp->len += rsclp->len;
-	rsclp->len_lazy = 0;
-	WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
-}
-
-/*
- * Extract only those callbacks ready to be invoked from the specified
- * rcu_segcblist structure and place them in the specified rcu_cblist
- * structure.
- */
-static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
-						  struct rcu_cblist *rclp)
-{
-	int i;
-
-	if (!rcu_segcblist_ready_cbs(rsclp))
-		return; /* Nothing to do. */
-	*rclp->tail = rsclp->head;
-	rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
-	*rsclp->tails[RCU_DONE_TAIL] = NULL;
-	rclp->tail = rsclp->tails[RCU_DONE_TAIL];
-	for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
-		if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
-			rsclp->tails[i] = &rsclp->head;
-}
-
-/*
- * Extract only those callbacks still pending (not yet ready to be
- * invoked) from the specified rcu_segcblist structure and place them in
- * the specified rcu_cblist structure.  Note that this loses information
- * about any callbacks that might have been partway done waiting for
- * their grace period.  Too bad!  They will have to start over.
- */
-static inline void
-rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
-			       struct rcu_cblist *rclp)
-{
-	int i;
-
-	if (!rcu_segcblist_pend_cbs(rsclp))
-		return; /* Nothing to do. */
-	*rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
-	rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
-	*rsclp->tails[RCU_DONE_TAIL] = NULL;
-	for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
-		rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
-}
-
-/*
- * Move the entire contents of the specified rcu_segcblist structure,
- * counts, callbacks, and all, to the specified rcu_cblist structure.
- * @@@ Why do we need this???  Moving early-boot CBs to NOCB lists?
- * @@@ Memory barrier needed?  (Not if only used at boot time...)
- */
-static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
-					     struct rcu_cblist *rclp)
-{
-	rcu_segcblist_extract_done_cbs(rsclp, rclp);
-	rcu_segcblist_extract_pend_cbs(rsclp, rclp);
-	rcu_segcblist_extract_count(rsclp, rclp);
-}
-
-/*
- * Insert counts from the specified rcu_cblist structure in the
- * specified rcu_segcblist structure.
- */
-static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
-					      struct rcu_cblist *rclp)
-{
-	rsclp->len_lazy += rclp->len_lazy;
-	/* ->len sampled locklessly. */
-	WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
-	rclp->len_lazy = 0;
-	rclp->len = 0;
-}
-
-/*
- * Move callbacks from the specified rcu_cblist to the beginning of the
- * done-callbacks segment of the specified rcu_segcblist.
- */
-static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
-						 struct rcu_cblist *rclp)
-{
-	int i;
-
-	if (!rclp->head)
-		return; /* No callbacks to move. */
-	*rclp->tail = rsclp->head;
-	rsclp->head = rclp->head;
-	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
-		if (&rsclp->head == rsclp->tails[i])
-			rsclp->tails[i] = rclp->tail;
-		else
-			break;
-	rclp->head = NULL;
-	rclp->tail = &rclp->head;
-}
-
-/*
- * Move callbacks from the specified rcu_cblist to the end of the
- * new-callbacks segment of the specified rcu_segcblist.
- */
-static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
-						 struct rcu_cblist *rclp)
-{
-	if (!rclp->head)
-		return; /* Nothing to do. */
-	*rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
-	rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
-	rclp->head = NULL;
-	rclp->tail = &rclp->head;
-}
-
-/*
- * Advance the callbacks in the specified rcu_segcblist structure based
- * on the current value passed in for the grace-period counter.
- */
-static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
-					 unsigned long seq)
-{
-	int i, j;
-
-	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
-	WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
-
-	/*
-	 * Find all callbacks whose ->gp_seq numbers indicate that they
-	 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
-	 */
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
-		if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
-			break;
-		rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
-	}
-
-	/* If no callbacks moved, nothing more need be done. */
-	if (i == RCU_WAIT_TAIL)
-		return;
-
-	/* Clean up tail pointers that might have been misordered above. */
-	for (j = RCU_WAIT_TAIL; j < i; j++)
-		rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
-
-	/*
-	 * Callbacks moved, so clean up the misordered ->tails[] pointers
-	 * that now point into the middle of the list of ready-to-invoke
-	 * callbacks.  The overall effect is to copy down the later pointers
-	 * into the gap that was created by the now-ready segments.
-	 */
-	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
-		if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
-			break;  /* No more callbacks. */
-		rsclp->tails[j] = rsclp->tails[i];
-		rsclp->gp_seq[j] = rsclp->gp_seq[i];
-	}
-}
-
-/*
- * "Accelerate" callbacks based on more-accurate grace-period information.
- * The reason for this is that RCU does not synchronize the beginnings and
- * ends of grace periods, and that callbacks are posted locally.  This in
- * turn means that the callbacks must be labelled conservatively early
- * on, as getting exact information would degrade both performance and
- * scalability.  When more accurate grace-period information becomes
- * available, previously posted callbacks can be "accelerated", marking
- * them to complete at the end of the earlier grace period.
- *
- * This function operates on an rcu_segcblist structure, and also the
- * grace-period sequence number at which new callbacks would become
- * ready to invoke.
- */
-static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
-					    unsigned long seq)
-{
-	int i;
-
-	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
-	WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
-
-	/*
-	 * Find the segment preceding the oldest segment of callbacks
-	 * whose ->gp_seq[] completion is at or after that passed in via
-	 * "seq", skipping any empty segments.  This oldest segment, along
-	 * with any later segments, can be merged in with any newly arrived
-	 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
-	 * as their ->gp_seq[] grace-period completion sequence number.
-	 */
-	for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
-		if (rsclp->tails[i] != rsclp->tails[i - 1] &&
-		    ULONG_CMP_LT(rsclp->gp_seq[i], seq))
-			break;
-
-	/*
-	 * If all the segments contain callbacks that correspond to
-	 * earlier grace-period sequence numbers than "seq", leave.
-	 * Assuming that the rcu_segcblist structure has enough
-	 * segments in its arrays, this can only happen if some of
-	 * the non-done segments contain callbacks that really are
-	 * ready to invoke.  This situation will get straightened
-	 * out by the next call to rcu_segcblist_advance().
-	 *
-	 * Also advance to the oldest segment of callbacks whose
-	 * ->gp_seq[] completion is at or after that passed in via "seq",
-	 * skipping any empty segments.
-	 */
-	if (++i >= RCU_NEXT_TAIL)
-		return false;
-
-	/*
-	 * Merge all later callbacks, including newly arrived callbacks,
-	 * into the segment located by the for-loop above.  Assign "seq"
-	 * as the ->gp_seq[] value in order to correctly handle the case
-	 * where there were no pending callbacks in the rcu_segcblist
-	 * structure other than in the RCU_NEXT_TAIL segment.
-	 */
-	for (; i < RCU_NEXT_TAIL; i++) {
-		rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
-		rsclp->gp_seq[i] = seq;
-	}
-	return true;
-}
-
-/*
- * Scan the specified rcu_segcblist structure for callbacks that need
- * a grace period later than the one specified by "seq".  We don't look
- * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
- * have a grace-period sequence number.
- */
-static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
-						  unsigned long seq)
-{
-	int i;
-
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
-		if (rsclp->tails[i - 1] != rsclp->tails[i] &&
-		    ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
-			return true;
-	return false;
-}
-
-/*
- * Interim function to return rcu_segcblist head pointer.  Longer term, the
- * rcu_segcblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
-{
-	return rsclp->head;
-}
-
-/*
- * Interim function to return rcu_segcblist head pointer.  Longer term, the
- * rcu_segcblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
-{
-	WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
-	return rsclp->tails[RCU_NEXT_TAIL];
-}
-
-#endif /* __KERNEL_RCU_SEGCBLIST_H */
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index d464986d82b6..56fd30862122 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -22,7 +22,7 @@
  *	   Lai Jiangshan <laijs@cn.fujitsu.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU/ *.txt
+ *		Documentation/RCU/ *.txt
  *
  */
 
@@ -38,85 +38,13 @@
 
 #include "rcu.h"
 
-/*
- * Initialize an rcu_batch structure to empty.
- */
-static inline void rcu_batch_init(struct rcu_batch *b)
-{
-	b->head = NULL;
-	b->tail = &b->head;
-}
-
-/*
- * Enqueue a callback onto the tail of the specified rcu_batch structure.
- */
-static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
-{
-	*b->tail = head;
-	b->tail = &head->next;
-}
-
-/*
- * Is the specified rcu_batch structure empty?
- */
-static inline bool rcu_batch_empty(struct rcu_batch *b)
-{
-	return b->tail == &b->head;
-}
-
-/*
- * Are all batches empty for the specified srcu_struct?
- */
-static inline bool rcu_all_batches_empty(struct srcu_struct *sp)
-{
-	return rcu_batch_empty(&sp->batch_done) &&
-	       rcu_batch_empty(&sp->batch_check1) &&
-	       rcu_batch_empty(&sp->batch_check0) &&
-	       rcu_batch_empty(&sp->batch_queue);
-}
-
-/*
- * Remove the callback at the head of the specified rcu_batch structure
- * and return a pointer to it, or return NULL if the structure is empty.
- */
-static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
-{
-	struct rcu_head *head;
-
-	if (rcu_batch_empty(b))
-		return NULL;
-
-	head = b->head;
-	b->head = head->next;
-	if (b->tail == &head->next)
-		rcu_batch_init(b);
-
-	return head;
-}
-
-/*
- * Move all callbacks from the rcu_batch structure specified by "from" to
- * the structure specified by "to".
- */
-static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
-{
-	if (!rcu_batch_empty(from)) {
-		*to->tail = from->head;
-		to->tail = from->tail;
-		rcu_batch_init(from);
-	}
-}
-
 static int init_srcu_struct_fields(struct srcu_struct *sp)
 {
 	sp->completed = 0;
 	sp->srcu_gp_seq = 0;
 	spin_lock_init(&sp->queue_lock);
 	sp->srcu_state = SRCU_STATE_IDLE;
-	rcu_batch_init(&sp->batch_queue);
-	rcu_batch_init(&sp->batch_check0);
-	rcu_batch_init(&sp->batch_check1);
-	rcu_batch_init(&sp->batch_done);
+	rcu_segcblist_init(&sp->srcu_cblist);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
 	sp->per_cpu_ref = alloc_percpu(struct srcu_array);
 	return sp->per_cpu_ref ? 0 : -ENOMEM;
@@ -268,7 +196,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
 {
 	if (WARN_ON(srcu_readers_active(sp)))
 		return; /* Leakage unless caller handles error. */
-	if (WARN_ON(!rcu_all_batches_empty(sp)))
+	if (WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)))
 		return; /* Leakage unless caller handles error. */
 	flush_delayed_work(&sp->work);
 	if (WARN_ON(READ_ONCE(sp->srcu_state) != SRCU_STATE_IDLE))
@@ -324,6 +252,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  */
 static void srcu_gp_start(struct srcu_struct *sp)
 {
+	rcu_segcblist_accelerate(&sp->srcu_cblist,
+				 rcu_seq_snap(&sp->srcu_gp_seq));
 	WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
 	rcu_seq_start(&sp->srcu_gp_seq);
 }
@@ -371,6 +301,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
 {
 	rcu_seq_end(&sp->srcu_gp_seq);
 	WRITE_ONCE(sp->srcu_state, SRCU_STATE_DONE);
+
+	spin_lock_irq(&sp->queue_lock);
+	rcu_segcblist_advance(&sp->srcu_cblist,
+			      rcu_seq_current(&sp->srcu_gp_seq));
+	spin_unlock_irq(&sp->queue_lock);
 }
 
 /*
@@ -409,7 +344,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 	head->func = func;
 	spin_lock_irqsave(&sp->queue_lock, flags);
 	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	rcu_batch_queue(&sp->batch_queue, head);
+	rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
 	if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
 		srcu_gp_start(sp);
 		queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
@@ -445,13 +380,13 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
 	if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
 		/* steal the processing owner */
+		rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
 		srcu_gp_start(sp);
-		rcu_batch_queue(&sp->batch_check0, head);
 		spin_unlock_irq(&sp->queue_lock);
 		/* give the processing owner to work_struct */
 		srcu_reschedule(sp, 0);
 	} else {
-		rcu_batch_queue(&sp->batch_queue, head);
+		rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
 		spin_unlock_irq(&sp->queue_lock);
 	}
 
@@ -548,19 +483,6 @@ EXPORT_SYMBOL_GPL(srcu_batches_completed);
 #define SRCU_CALLBACK_BATCH	10
 #define SRCU_INTERVAL		1
 
-/*
- * Move any new SRCU callbacks to the first stage of the SRCU grace
- * period pipeline.
- */
-static void srcu_collect_new(struct srcu_struct *sp)
-{
-	if (!rcu_batch_empty(&sp->batch_queue)) {
-		spin_lock_irq(&sp->queue_lock);
-		rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
-		spin_unlock_irq(&sp->queue_lock);
-	}
-}
-
 /*
  * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
  * ->batch_check1 and then to ->batch_done as readers drain.
@@ -586,26 +508,7 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 		idx = 1 ^ (sp->completed & 1);
 		if (!try_check_zero(sp, idx, trycount))
 			return; /* readers present, retry after SRCU_INTERVAL */
-
-		/*
-		 * The callbacks in ->batch_check1 have already done
-		 * with their first zero check and flip back when they were
-		 * enqueued on ->batch_check0 in a previous invocation of
-		 * srcu_advance_batches().  (Presumably try_check_zero()
-		 * returned false during that invocation, leaving the
-		 * callbacks stranded on ->batch_check1.) They are therefore
-		 * ready to invoke, so move them to ->batch_done.
-		 */
-		rcu_batch_move(&sp->batch_done, &sp->batch_check1);
 		srcu_flip(sp);
-
-		/*
-		 * The callbacks in ->batch_check0 just finished their
-		 * first check zero and flip, so move them to ->batch_check1
-		 * for future checking on the other idx.
-		 */
-		rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
-
 		WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN2);
 	}
 
@@ -619,14 +522,6 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 		trycount = trycount < 2 ? 2 : trycount;
 		if (!try_check_zero(sp, idx, trycount))
 			return; /* readers present, retry after SRCU_INTERVAL */
-
-		/*
-		 * The callbacks in ->batch_check1 have now waited for
-		 * all pre-existing readers using both idx values.  They are
-		 * therefore ready to invoke, so move them to ->batch_done.
-		 */
-		rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-
 		srcu_gp_end(sp);
 	}
 }
@@ -639,17 +534,26 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
  */
 static void srcu_invoke_callbacks(struct srcu_struct *sp)
 {
-	int i;
-	struct rcu_head *head;
+	struct rcu_cblist ready_cbs;
+	struct rcu_head *rhp;
 
-	for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
-		head = rcu_batch_dequeue(&sp->batch_done);
-		if (!head)
-			break;
+	spin_lock_irq(&sp->queue_lock);
+	if (!rcu_segcblist_ready_cbs(&sp->srcu_cblist)) {
+		spin_unlock_irq(&sp->queue_lock);
+		return;
+	}
+	rcu_cblist_init(&ready_cbs);
+	rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs);
+	spin_unlock_irq(&sp->queue_lock);
+	rhp = rcu_cblist_dequeue(&ready_cbs);
+	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
 		local_bh_disable();
-		head->func(head);
+		rhp->func(rhp);
 		local_bh_enable();
 	}
+	spin_lock_irq(&sp->queue_lock);
+	rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs);
+	spin_unlock_irq(&sp->queue_lock);
 }
 
 /*
@@ -660,9 +564,9 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 {
 	bool pending = true;
 
-	if (rcu_all_batches_empty(sp)) {
+	if (rcu_segcblist_empty(&sp->srcu_cblist)) {
 		spin_lock_irq(&sp->queue_lock);
-		if (rcu_all_batches_empty(sp) &&
+		if (rcu_segcblist_empty(&sp->srcu_cblist) &&
 		    READ_ONCE(sp->srcu_state) == SRCU_STATE_DONE) {
 			WRITE_ONCE(sp->srcu_state, SRCU_STATE_IDLE);
 			pending = false;
@@ -683,7 +587,6 @@ void process_srcu(struct work_struct *work)
 
 	sp = container_of(work, struct srcu_struct, work.work);
 
-	srcu_collect_new(sp);
 	srcu_advance_batches(sp, 1);
 	srcu_invoke_callbacks(sp);
 	srcu_reschedule(sp, SRCU_INTERVAL);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 93889ff21dbb..4f62651588ea 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -30,7 +30,7 @@
 #include <linux/seqlock.h>
 #include <linux/swait.h>
 #include <linux/stop_machine.h>
-#include "rcu_segcblist.h"
+#include <linux/rcu_segcblist.h>
 
 /*
  * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
-- 
cgit v1.2.3


From f2425b4efb0c69e77c0b9666b605ae4a1ecaae47 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 14 Mar 2017 12:42:30 -0700
Subject: srcu: Move combining-tree definitions for SRCU's benefit

This commit moves the C preprocessor code that defines the default shape
of the rcu_node combining tree to a new include/linux/rcu_node_tree.h
file as a first step towards enabling SRCU to create its own combining
tree, which in turn enables SRCU to implement per-CPU callback handling,
thus avoiding contention on the lock currently guarding the single list
of callbacks.  Note that users of SRCU still need to know the size of
the srcu_struct structure, hence include/linux rather than kernel/rcu.

This commit is code-movement only.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcu_node_tree.h | 102 ++++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/tree.h             |  71 +----------------------------
 2 files changed, 103 insertions(+), 70 deletions(-)
 create mode 100644 include/linux/rcu_node_tree.h

diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h
new file mode 100644
index 000000000000..b7eb97096b1c
--- /dev/null
+++ b/include/linux/rcu_node_tree.h
@@ -0,0 +1,102 @@
+/*
+ * RCU node combining tree definitions.  These are used to compute
+ * global attributes while avoiding common-case global contention.  A key
+ * property that these computations rely on is a tournament-style approach
+ * where only one of the tasks contending a lower level in the tree need
+ * advance to the next higher level.  If properly configured, this allows
+ * unlimited scalability while maintaining a constant level of contention
+ * on the root node.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifndef __LINUX_RCU_NODE_TREE_H
+#define __LINUX_RCU_NODE_TREE_H
+
+/*
+ * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
+ * CONFIG_RCU_FANOUT_LEAF.
+ * In theory, it should be possible to add more levels straightforwardly.
+ * In practice, this did work well going from three levels to four.
+ * Of course, your mileage may vary.
+ */
+
+#ifdef CONFIG_RCU_FANOUT
+#define RCU_FANOUT CONFIG_RCU_FANOUT
+#else /* #ifdef CONFIG_RCU_FANOUT */
+# ifdef CONFIG_64BIT
+# define RCU_FANOUT 64
+# else
+# define RCU_FANOUT 32
+# endif
+#endif /* #else #ifdef CONFIG_RCU_FANOUT */
+
+#ifdef CONFIG_RCU_FANOUT_LEAF
+#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
+#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */
+#define RCU_FANOUT_LEAF 16
+#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */
+
+#define RCU_FANOUT_1	      (RCU_FANOUT_LEAF)
+#define RCU_FANOUT_2	      (RCU_FANOUT_1 * RCU_FANOUT)
+#define RCU_FANOUT_3	      (RCU_FANOUT_2 * RCU_FANOUT)
+#define RCU_FANOUT_4	      (RCU_FANOUT_3 * RCU_FANOUT)
+
+#if NR_CPUS <= RCU_FANOUT_1
+#  define RCU_NUM_LVLS	      1
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_NODES	      NUM_RCU_LVL_0
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
+#elif NR_CPUS <= RCU_FANOUT_2
+#  define RCU_NUM_LVLS	      2
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
+#elif NR_CPUS <= RCU_FANOUT_3
+#  define RCU_NUM_LVLS	      3
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
+#elif NR_CPUS <= RCU_FANOUT_4
+#  define RCU_NUM_LVLS	      4
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_3	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
+#else
+# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
+
+extern int rcu_num_lvls;
+extern int rcu_num_nodes;
+
+#endif /* __LINUX_RCU_NODE_TREE_H */
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4f62651588ea..1bec3958d44f 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -31,76 +31,7 @@
 #include <linux/swait.h>
 #include <linux/stop_machine.h>
 #include <linux/rcu_segcblist.h>
-
-/*
- * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
- * CONFIG_RCU_FANOUT_LEAF.
- * In theory, it should be possible to add more levels straightforwardly.
- * In practice, this did work well going from three levels to four.
- * Of course, your mileage may vary.
- */
-
-#ifdef CONFIG_RCU_FANOUT
-#define RCU_FANOUT CONFIG_RCU_FANOUT
-#else /* #ifdef CONFIG_RCU_FANOUT */
-# ifdef CONFIG_64BIT
-# define RCU_FANOUT 64
-# else
-# define RCU_FANOUT 32
-# endif
-#endif /* #else #ifdef CONFIG_RCU_FANOUT */
-
-#ifdef CONFIG_RCU_FANOUT_LEAF
-#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
-#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */
-#define RCU_FANOUT_LEAF 16
-#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */
-
-#define RCU_FANOUT_1	      (RCU_FANOUT_LEAF)
-#define RCU_FANOUT_2	      (RCU_FANOUT_1 * RCU_FANOUT)
-#define RCU_FANOUT_3	      (RCU_FANOUT_2 * RCU_FANOUT)
-#define RCU_FANOUT_4	      (RCU_FANOUT_3 * RCU_FANOUT)
-
-#if NR_CPUS <= RCU_FANOUT_1
-#  define RCU_NUM_LVLS	      1
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_NODES	      NUM_RCU_LVL_0
-#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
-#  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
-#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
-#elif NR_CPUS <= RCU_FANOUT_2
-#  define RCU_NUM_LVLS	      2
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
-#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
-#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
-#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
-#elif NR_CPUS <= RCU_FANOUT_3
-#  define RCU_NUM_LVLS	      3
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
-#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
-#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
-#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
-#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
-#elif NR_CPUS <= RCU_FANOUT_4
-#  define RCU_NUM_LVLS	      4
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
-#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
-#  define NUM_RCU_LVL_3	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
-#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
-#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
-#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
-#else
-# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
-#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
-
-extern int rcu_num_lvls;
-extern int rcu_num_nodes;
+#include <linux/rcu_node_tree.h>
 
 /*
  * Dynticks per-CPU state.
-- 
cgit v1.2.3


From 2b34c43cc1671c59bad6dd1682ae3ee4f0919eb7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 14 Mar 2017 14:29:53 -0700
Subject: srcu: Move rcu_init_levelspread() to rcu_tree_node.h

This commit moves the rcu_init_levelspread() function from
kernel/rcu/tree.c to kernel/rcu/rcu.h so that SRCU can access it.  This is
another step towards enabling SRCU to create its own combining tree.
This commit is code-movement only, give or take knock-on adjustments.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcu_node_tree.h |  3 ---
 kernel/rcu/rcu.h              | 36 ++++++++++++++++++++++++++++++++++++
 kernel/rcu/srcu.c             |  1 +
 kernel/rcu/tree.c             | 25 -------------------------
 kernel/rcu/tree_trace.c       |  1 +
 5 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h
index b7eb97096b1c..4b766b61e1a0 100644
--- a/include/linux/rcu_node_tree.h
+++ b/include/linux/rcu_node_tree.h
@@ -96,7 +96,4 @@
 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
 #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
 
-extern int rcu_num_lvls;
-extern int rcu_num_nodes;
-
 #endif /* __LINUX_RCU_NODE_TREE_H */
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index a943b42a9cf7..87326479b39a 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -190,4 +190,40 @@ void rcu_test_sync_prims(void);
  */
 extern void resched_cpu(int cpu);
 
+#if defined(SRCU) || !defined(TINY_RCU)
+
+#include <linux/rcu_node_tree.h>
+
+extern int rcu_num_lvls;
+extern int rcu_num_nodes;
+static bool rcu_fanout_exact;
+static int rcu_fanout_leaf;
+
+/*
+ * Compute the per-level fanout, either using the exact fanout specified
+ * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
+ */
+static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
+{
+	int i;
+
+	if (rcu_fanout_exact) {
+		levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
+		for (i = rcu_num_lvls - 2; i >= 0; i--)
+			levelspread[i] = RCU_FANOUT;
+	} else {
+		int ccur;
+		int cprv;
+
+		cprv = nr_cpu_ids;
+		for (i = rcu_num_lvls - 1; i >= 0; i--) {
+			ccur = levelcnt[i];
+			levelspread[i] = (cprv + ccur - 1) / ccur;
+			cprv = ccur;
+		}
+	}
+}
+
+#endif /* #if defined(SRCU) || !defined(TINY_RCU) */
+
 #endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 56fd30862122..0b511de7ca4d 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -36,6 +36,7 @@
 #include <linux/delay.h>
 #include <linux/srcu.h>
 
+#include <linux/rcu_node_tree.h>
 #include "rcu.h"
 
 static int init_srcu_struct_fields(struct srcu_struct *sp)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 844a030c1960..df3527744af8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3951,31 +3951,6 @@ void rcu_scheduler_starting(void)
 	rcu_test_sync_prims();
 }
 
-/*
- * Compute the per-level fanout, either using the exact fanout specified
- * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
- */
-static void __init rcu_init_levelspread(int *levelspread, const int *levelcnt)
-{
-	int i;
-
-	if (rcu_fanout_exact) {
-		levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
-		for (i = rcu_num_lvls - 2; i >= 0; i--)
-			levelspread[i] = RCU_FANOUT;
-	} else {
-		int ccur;
-		int cprv;
-
-		cprv = nr_cpu_ids;
-		for (i = rcu_num_lvls - 1; i >= 0; i--) {
-			ccur = levelcnt[i];
-			levelspread[i] = (cprv + ccur - 1) / ccur;
-			cprv = ccur;
-		}
-	}
-}
-
 /*
  * Helper function for rcu_init() that initializes one rcu_state structure.
  */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 066c64071a7b..30c5bf89ee58 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -45,6 +45,7 @@
 
 #define RCU_TREE_NONCORE
 #include "tree.h"
+#include "rcu.h"
 
 static int r_open(struct inode *inode, struct file *file,
 					const struct seq_operations *op)
-- 
cgit v1.2.3


From 41f5c63178b7ca3d775638710b752b81b433fc86 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 15 Mar 2017 12:59:17 -0700
Subject: rcu: Remove redundant levelcnt[] array from rcu_init_one()

The levelcnt[] array is identical to num_rcu_lvl[], so this commit
removes levelcnt[].

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index df3527744af8..9397a6693d70 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3961,7 +3961,6 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 	static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 	static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
 
-	int levelcnt[RCU_NUM_LVLS];		/* # nodes in each level. */
 	int levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */
 	int cpustride = 1;
 	int i;
@@ -3976,18 +3975,16 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 
 	/* Initialize the level-tracking arrays. */
 
-	for (i = 0; i < rcu_num_lvls; i++)
-		levelcnt[i] = num_rcu_lvl[i];
 	for (i = 1; i < rcu_num_lvls; i++)
-		rsp->level[i] = rsp->level[i - 1] + levelcnt[i - 1];
-	rcu_init_levelspread(levelspread, levelcnt);
+		rsp->level[i] = rsp->level[i - 1] + num_rcu_lvl[i - 1];
+	rcu_init_levelspread(levelspread, num_rcu_lvl);
 
 	/* Initialize the elements themselves, starting from the leaves. */
 
 	for (i = rcu_num_lvls - 1; i >= 0; i--) {
 		cpustride *= levelspread[i];
 		rnp = rsp->level[i];
-		for (j = 0; j < levelcnt[i]; j++, rnp++) {
+		for (j = 0; j < num_rcu_lvl[i]; j++, rnp++) {
 			raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));
 			lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),
 						   &rcu_node_class[i], buf[i]);
-- 
cgit v1.2.3


From efbe451d46af62369226e42b98dbcd95b6940a63 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 15 Mar 2017 13:07:53 -0700
Subject: srcu: Move rcu_node traversal macros to rcu.h

This commit moves rcu_for_each_node_breadth_first(),
rcu_for_each_nonleaf_node_breadth_first(), and
rcu_for_each_leaf_node() from kernel/rcu/tree.h to
kernel/rcu/rcu.h so that SRCU can access them.
This commit is code-movement only.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h  | 35 +++++++++++++++++++++++++++++++++++
 kernel/rcu/tree.h | 35 -----------------------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 87326479b39a..5c1798ec32f7 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -224,6 +224,41 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
 	}
 }
 
+/*
+ * Do a full breadth-first scan of the rcu_node structures for the
+ * specified rcu_state structure.
+ */
+#define rcu_for_each_node_breadth_first(rsp, rnp) \
+	for ((rnp) = &(rsp)->node[0]; \
+	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
+
+/*
+ * Do a breadth-first scan of the non-leaf rcu_node structures for the
+ * specified rcu_state structure.  Note that if there is a singleton
+ * rcu_node tree with but one rcu_node structure, this loop is a no-op.
+ */
+#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
+	for ((rnp) = &(rsp)->node[0]; \
+	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
+
+/*
+ * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
+ * structure.  Note that if there is a singleton rcu_node tree with but
+ * one rcu_node structure, this loop -will- visit the rcu_node structure.
+ * It is still a leaf node, even if it is also the root node.
+ */
+#define rcu_for_each_leaf_node(rsp, rnp) \
+	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
+	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
+
+/*
+ * Iterate over all possible CPUs in a leaf RCU node.
+ */
+#define for_each_leaf_node_possible_cpu(rnp, cpu) \
+	for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
+	     cpu <= rnp->grphi; \
+	     cpu = cpumask_next((cpu), cpu_possible_mask))
+
 #endif /* #if defined(SRCU) || !defined(TINY_RCU) */
 
 #endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 1bec3958d44f..a2a45cb629d6 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -192,41 +192,6 @@ struct rcu_node {
  */
 #define leaf_node_cpu_bit(rnp, cpu) (1UL << ((cpu) - (rnp)->grplo))
 
-/*
- * Do a full breadth-first scan of the rcu_node structures for the
- * specified rcu_state structure.
- */
-#define rcu_for_each_node_breadth_first(rsp, rnp) \
-	for ((rnp) = &(rsp)->node[0]; \
-	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
-
-/*
- * Do a breadth-first scan of the non-leaf rcu_node structures for the
- * specified rcu_state structure.  Note that if there is a singleton
- * rcu_node tree with but one rcu_node structure, this loop is a no-op.
- */
-#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
-	for ((rnp) = &(rsp)->node[0]; \
-	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
-
-/*
- * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
- * structure.  Note that if there is a singleton rcu_node tree with but
- * one rcu_node structure, this loop -will- visit the rcu_node structure.
- * It is still a leaf node, even if it is also the root node.
- */
-#define rcu_for_each_leaf_node(rsp, rnp) \
-	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
-	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
-
-/*
- * Iterate over all possible CPUs in a leaf RCU node.
- */
-#define for_each_leaf_node_possible_cpu(rnp, cpu) \
-	for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
-	     cpu <= rnp->grphi; \
-	     cpu = cpumask_next((cpu), cpu_possible_mask))
-
 /*
  * Union to allow "aggregate OR" operation on the need for a quiescent
  * state by the normal and expedited grace periods.
-- 
cgit v1.2.3


From e95d68d2127716c7d6fb144bb19ef48ce9f37393 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 15 Mar 2017 13:11:11 -0700
Subject: srcu: Make num_rcu_lvl[] array be external

This commit makes the num_rcu_lvl[] array external so that SRCU can
make use of it for initializing its upcoming srcu_node tree.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h  | 1 +
 kernel/rcu/tree.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 5c1798ec32f7..4303b880ac99 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -195,6 +195,7 @@ extern void resched_cpu(int cpu);
 #include <linux/rcu_node_tree.h>
 
 extern int rcu_num_lvls;
+extern int num_rcu_lvl[];
 extern int rcu_num_nodes;
 static bool rcu_fanout_exact;
 static int rcu_fanout_leaf;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9397a6693d70..346948b51b0b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -123,7 +123,7 @@ static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
 module_param(rcu_fanout_leaf, int, 0444);
 int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
 /* Number of rcu_nodes at specified level. */
-static int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
+int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
 int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
 /* panic() on RCU Stall sysctl. */
 int sysctl_panic_on_rcu_stall __read_mostly;
-- 
cgit v1.2.3


From 91e27c356c96d760f5c283f05627ed976377debb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 15 Mar 2017 16:34:11 -0700
Subject: srcu: Fix bogus try_check_zero() comment

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/srcu.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 0b511de7ca4d..1a2dc74bb625 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -260,10 +260,9 @@ static void srcu_gp_start(struct srcu_struct *sp)
 }
 
 /*
- * @@@ Wait until all pre-existing readers complete.  Such readers
- * will have used the index specified by "idx".
- * the caller should ensures the ->completed is not changed while checking
- * and idx = (->completed & 1) ^ 1
+ * Wait until all readers counted by array index idx complete, but loop
+ * a maximum of trycount times.  The caller must ensure that ->completed
+ * is not changed while checking.
  */
 static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
 {
-- 
cgit v1.2.3


From 031aeee000e895247eafa4233c2d193dd39d4ea2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 21 Mar 2017 07:28:14 -0700
Subject: srcu: Improve rcu_seq grace-period-counter abstraction

The expedited grace-period code contains several open-coded shifts
know the format of an rcu_seq grace-period counter, which is not
particularly good style.  This commit therefore creates a new
rcu_seq_ctr() function that extracts the counter portion of the
counter, and an rcu_seq_state() function that extracts the low-order
state bit.  This commit prepares for SRCU callback parallelization,
which will require two state bits.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h      | 29 +++++++++++++++++++++++++----
 kernel/rcu/tree_exp.h |  9 ++++-----
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 4303b880ac99..c62df93bfc1b 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -61,20 +61,41 @@
  * Grace-period counter management.
  */
 
+#define RCU_SEQ_CTR_SHIFT	1
+#define RCU_SEQ_STATE_MASK	((1 << RCU_SEQ_CTR_SHIFT) - 1)
+
+/*
+ * Return the counter portion of a sequence number previously returned
+ * by rcu_seq_snap() or rcu_seq_current().
+ */
+static inline unsigned long rcu_seq_ctr(unsigned long s)
+{
+	return s >> RCU_SEQ_CTR_SHIFT;
+}
+
+/*
+ * Return the state portion of a sequence number previously returned
+ * by rcu_seq_snap() or rcu_seq_current().
+ */
+static inline int rcu_seq_state(unsigned long s)
+{
+	return s & RCU_SEQ_STATE_MASK;
+}
+
 /* Adjust sequence number for start of update-side operation. */
 static inline void rcu_seq_start(unsigned long *sp)
 {
 	WRITE_ONCE(*sp, *sp + 1);
 	smp_mb(); /* Ensure update-side operation after counter increment. */
-	WARN_ON_ONCE(!(*sp & 0x1));
+	WARN_ON_ONCE(rcu_seq_state(*sp) != 1);
 }
 
 /* Adjust sequence number for end of update-side operation. */
 static inline void rcu_seq_end(unsigned long *sp)
 {
 	smp_mb(); /* Ensure update-side operation before counter increment. */
-	WARN_ON_ONCE(!(*sp & 0x1));
-	WRITE_ONCE(*sp, *sp + 1);
+	WARN_ON_ONCE(!rcu_seq_state(*sp));
+	WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1);
 }
 
 /* Take a snapshot of the update side's sequence number. */
@@ -82,7 +103,7 @@ static inline unsigned long rcu_seq_snap(unsigned long *sp)
 {
 	unsigned long s;
 
-	s = (READ_ONCE(*sp) + 3) & ~0x1;
+	s = (READ_ONCE(*sp) + 2 * RCU_SEQ_STATE_MASK + 1) & ~RCU_SEQ_STATE_MASK;
 	smp_mb(); /* Above access must not bleed into critical section. */
 	return s;
 }
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 027e123d93c7..e513b4ab1197 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -292,7 +292,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 			trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
 						  rnp->grplo, rnp->grphi,
 						  TPS("wait"));
-			wait_event(rnp->exp_wq[(s >> 1) & 0x3],
+			wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
 				   sync_exp_work_done(rsp,
 						      &rdp->exp_workdone2, s));
 			return true;
@@ -534,7 +534,7 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
 			spin_unlock(&rnp->exp_lock);
 		}
 		smp_mb(); /* All above changes before wakeup. */
-		wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
+		wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rsp->expedited_sequence) & 0x3]);
 	}
 	trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
 	mutex_unlock(&rsp->exp_wake_mutex);
@@ -612,9 +612,8 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
 	/* Wait for expedited grace period to complete. */
 	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
 	rnp = rcu_get_root(rsp);
-	wait_event(rnp->exp_wq[(s >> 1) & 0x3],
-		   sync_exp_work_done(rsp,
-				      &rdp->exp_workdone0, s));
+	wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
+		   sync_exp_work_done(rsp, &rdp->exp_workdone0, s));
 	smp_mb(); /* Workqueue actions happen before return. */
 
 	/* Let the next expedited grace period start. */
-- 
cgit v1.2.3


From f1ec57a462314f3d1cd0c9c8a04979228aa9d38c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 21 Mar 2017 10:35:57 -0700
Subject: srcu: Allow a second bit in rcu_seq for SRCU state

This commit increases the number of reserved bits at the bottom of an
rcu_seq grace-period counter from one to two, as will be needed to
accommodate SRCU's three-state grace periods.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index c62df93bfc1b..87a0ac95b551 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -61,7 +61,7 @@
  * Grace-period counter management.
  */
 
-#define RCU_SEQ_CTR_SHIFT	1
+#define RCU_SEQ_CTR_SHIFT	2
 #define RCU_SEQ_STATE_MASK	((1 << RCU_SEQ_CTR_SHIFT) - 1)
 
 /*
-- 
cgit v1.2.3


From 80a7956fe36c2ee40c6ff12c77926d267802b7c8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 22 Mar 2017 15:26:18 -0700
Subject: srcu: Merge ->srcu_state into ->srcu_gp_seq

Updating ->srcu_state and ->srcu_gp_seq will lead to extremely complex
race conditions given multiple callback queues, so this commit takes
advantage of the two-bit state now available in rcu_seq counters to
store the state in the bottom two bits of ->srcu_gp_seq.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h |  5 +----
 kernel/rcu/rcu.h     | 10 ++++++++++
 kernel/rcu/srcu.c    | 55 +++++++++++++++++++++++++++++++++-------------------
 3 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index ad154a7bc114..e7dbc01b61a1 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -43,8 +43,7 @@ struct srcu_struct {
 	unsigned long completed;
 	unsigned long srcu_gp_seq;
 	struct srcu_array __percpu *per_cpu_ref;
-	spinlock_t queue_lock; /* protect ->srcu_cblist, ->srcu_state */
-	int srcu_state;
+	spinlock_t queue_lock; /* protect ->srcu_cblist */
 	struct rcu_segcblist srcu_cblist;
 	struct delayed_work work;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -56,7 +55,6 @@ struct srcu_struct {
 #define SRCU_STATE_IDLE		0
 #define SRCU_STATE_SCAN1	1
 #define SRCU_STATE_SCAN2	2
-#define SRCU_STATE_DONE		3
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
@@ -85,7 +83,6 @@ void process_srcu(struct work_struct *work);
 		.completed = -300,					\
 		.per_cpu_ref = &name##_srcu_array,			\
 		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
-		.srcu_state = SRCU_STATE_IDLE,				\
 		.srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\
 		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
 		__SRCU_DEP_MAP_INIT(name)				\
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 87a0ac95b551..73e16ec4054b 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -82,6 +82,16 @@ static inline int rcu_seq_state(unsigned long s)
 	return s & RCU_SEQ_STATE_MASK;
 }
 
+/*
+ * Set the state portion of the pointed-to sequence number.
+ * The caller is responsible for preventing conflicting updates.
+ */
+static inline void rcu_seq_set_state(unsigned long *sp, int newstate)
+{
+	WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK);
+	WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate);
+}
+
 /* Adjust sequence number for start of update-side operation. */
 static inline void rcu_seq_start(unsigned long *sp)
 {
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 1a2dc74bb625..90ffea31b188 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -44,7 +44,6 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 	sp->completed = 0;
 	sp->srcu_gp_seq = 0;
 	spin_lock_init(&sp->queue_lock);
-	sp->srcu_state = SRCU_STATE_IDLE;
 	rcu_segcblist_init(&sp->srcu_cblist);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
 	sp->per_cpu_ref = alloc_percpu(struct srcu_array);
@@ -180,6 +179,9 @@ static bool srcu_readers_active(struct srcu_struct *sp)
 	return sum;
 }
 
+#define SRCU_CALLBACK_BATCH	10
+#define SRCU_INTERVAL		1
+
 /**
  * cleanup_srcu_struct - deconstruct a sleep-RCU structure
  * @sp: structure to clean up.
@@ -200,8 +202,10 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
 	if (WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)))
 		return; /* Leakage unless caller handles error. */
 	flush_delayed_work(&sp->work);
-	if (WARN_ON(READ_ONCE(sp->srcu_state) != SRCU_STATE_IDLE))
+	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE)) {
+		pr_info("cleanup_srcu_struct: Active srcu_struct %lu CBs %c state: %d\n", rcu_segcblist_n_cbs(&sp->srcu_cblist), ".E"[rcu_segcblist_empty(&sp->srcu_cblist)], rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
 		return; /* Caller forgot to stop doing call_srcu()? */
+	}
 	free_percpu(sp->per_cpu_ref);
 	sp->per_cpu_ref = NULL;
 }
@@ -253,10 +257,13 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  */
 static void srcu_gp_start(struct srcu_struct *sp)
 {
+	int state;
+
 	rcu_segcblist_accelerate(&sp->srcu_cblist,
 				 rcu_seq_snap(&sp->srcu_gp_seq));
-	WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
 	rcu_seq_start(&sp->srcu_gp_seq);
+	state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
+	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
 }
 
 /*
@@ -300,7 +307,6 @@ static void srcu_flip(struct srcu_struct *sp)
 static void srcu_gp_end(struct srcu_struct *sp)
 {
 	rcu_seq_end(&sp->srcu_gp_seq);
-	WRITE_ONCE(sp->srcu_state, SRCU_STATE_DONE);
 
 	spin_lock_irq(&sp->queue_lock);
 	rcu_segcblist_advance(&sp->srcu_cblist,
@@ -345,7 +351,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 	spin_lock_irqsave(&sp->queue_lock, flags);
 	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
 	rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
-	if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
+	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_IDLE) {
 		srcu_gp_start(sp);
 		queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
 	}
@@ -378,7 +384,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 	head->func = wakeme_after_rcu;
 	spin_lock_irq(&sp->queue_lock);
 	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
+	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_IDLE) {
 		/* steal the processing owner */
 		rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
 		srcu_gp_start(sp);
@@ -480,9 +486,6 @@ unsigned long srcu_batches_completed(struct srcu_struct *sp)
 }
 EXPORT_SYMBOL_GPL(srcu_batches_completed);
 
-#define SRCU_CALLBACK_BATCH	10
-#define SRCU_INTERVAL		1
-
 /*
  * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
  * ->batch_check1 and then to ->batch_done as readers drain.
@@ -491,28 +494,40 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 {
 	int idx;
 
-	WARN_ON_ONCE(sp->srcu_state == SRCU_STATE_IDLE);
-
 	/*
 	 * Because readers might be delayed for an extended period after
 	 * fetching ->completed for their index, at any point in time there
 	 * might well be readers using both idx=0 and idx=1.  We therefore
 	 * need to wait for readers to clear from both index values before
 	 * invoking a callback.
+	 *
+	 * The load-acquire ensures that we see the accesses performed
+	 * by the prior grace period.
 	 */
+	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
+	if (idx == SRCU_STATE_IDLE) {
+		spin_lock_irq(&sp->queue_lock);
+		if (rcu_segcblist_empty(&sp->srcu_cblist)) {
+			spin_unlock_irq(&sp->queue_lock);
+			return;
+		}
+		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
+		if (idx == SRCU_STATE_IDLE)
+			srcu_gp_start(sp);
+		spin_unlock_irq(&sp->queue_lock);
+		if (idx != SRCU_STATE_IDLE)
+			return; /* Someone else started the grace period. */
+	}
 
-	if (sp->srcu_state == SRCU_STATE_DONE)
-		srcu_gp_start(sp);
-
-	if (sp->srcu_state == SRCU_STATE_SCAN1) {
+	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
 		idx = 1 ^ (sp->completed & 1);
 		if (!try_check_zero(sp, idx, trycount))
 			return; /* readers present, retry after SRCU_INTERVAL */
 		srcu_flip(sp);
-		WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN2);
+		rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2);
 	}
 
-	if (sp->srcu_state == SRCU_STATE_SCAN2) {
+	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
 
 		/*
 		 * SRCU read-side critical sections are normally short,
@@ -563,14 +578,14 @@ static void srcu_invoke_callbacks(struct srcu_struct *sp)
 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 {
 	bool pending = true;
+	int state;
 
 	if (rcu_segcblist_empty(&sp->srcu_cblist)) {
 		spin_lock_irq(&sp->queue_lock);
+		state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
 		if (rcu_segcblist_empty(&sp->srcu_cblist) &&
-		    READ_ONCE(sp->srcu_state) == SRCU_STATE_DONE) {
-			WRITE_ONCE(sp->srcu_state, SRCU_STATE_IDLE);
+		    state == SRCU_STATE_IDLE)
 			pending = false;
-		}
 		spin_unlock_irq(&sp->queue_lock);
 	}
 
-- 
cgit v1.2.3


From f60d231a87c5c9f23f10e69996f396d46f5bf901 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 24 Mar 2017 13:46:33 -0700
Subject: srcu: Crude control of expedited grace periods

SRCU's implementation of expedited grace periods has always assumed
that the SRCU instance is idle when the expedited request arrives.
This commit improves this a bit by maintaining a count of the number
of outstanding expedited requests, thus allowing prior non-expedited
grace periods accommodate these requests by shifting to expedited mode.
However, any non-expedited wait already in progress will still wait for
the full duration.

Improved control of expedited grace periods is planned, but one step
at a time.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h |  1 +
 kernel/rcu/srcu.c    | 84 ++++++++++++++++++++++++++++------------------------
 2 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index e7dbc01b61a1..73a1b6296224 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -42,6 +42,7 @@ struct srcu_array {
 struct srcu_struct {
 	unsigned long completed;
 	unsigned long srcu_gp_seq;
+	atomic_t srcu_exp_cnt;
 	struct srcu_array __percpu *per_cpu_ref;
 	spinlock_t queue_lock; /* protect ->srcu_cblist */
 	struct rcu_segcblist srcu_cblist;
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 90ffea31b188..3cfcc59bddf3 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -43,6 +43,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 {
 	sp->completed = 0;
 	sp->srcu_gp_seq = 0;
+	atomic_set(&sp->srcu_exp_cnt, 0);
 	spin_lock_init(&sp->queue_lock);
 	rcu_segcblist_init(&sp->srcu_cblist);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
@@ -179,7 +180,6 @@ static bool srcu_readers_active(struct srcu_struct *sp)
 	return sum;
 }
 
-#define SRCU_CALLBACK_BATCH	10
 #define SRCU_INTERVAL		1
 
 /**
@@ -197,6 +197,7 @@ static bool srcu_readers_active(struct srcu_struct *sp)
  */
 void cleanup_srcu_struct(struct srcu_struct *sp)
 {
+	WARN_ON_ONCE(atomic_read(&sp->srcu_exp_cnt));
 	if (WARN_ON(srcu_readers_active(sp)))
 		return; /* Leakage unless caller handles error. */
 	if (WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)))
@@ -244,13 +245,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  * We use an adaptive strategy for synchronize_srcu() and especially for
  * synchronize_srcu_expedited().  We spin for a fixed time period
  * (defined below) to allow SRCU readers to exit their read-side critical
- * sections.  If there are still some readers after 10 microseconds,
- * we repeatedly block for 1-millisecond time periods.  This approach
- * has done well in testing, so there is no need for a config parameter.
+ * sections.  If there are still some readers after a few microseconds,
+ * we repeatedly block for 1-millisecond time periods.
  */
 #define SRCU_RETRY_CHECK_DELAY		5
-#define SYNCHRONIZE_SRCU_TRYCOUNT	2
-#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT	12
 
 /*
  * Start an SRCU grace period.
@@ -267,16 +265,16 @@ static void srcu_gp_start(struct srcu_struct *sp)
 }
 
 /*
- * Wait until all readers counted by array index idx complete, but loop
- * a maximum of trycount times.  The caller must ensure that ->completed
- * is not changed while checking.
+ * Wait until all readers counted by array index idx complete, but
+ * loop an additional time if there is an expedited grace period pending.
+ * The caller must ensure that ->completed is not changed while checking.
  */
 static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
 {
 	for (;;) {
 		if (srcu_readers_active_idx_check(sp, idx))
 			return true;
-		if (--trycount <= 0)
+		if (--trycount + !!atomic_read(&sp->srcu_exp_cnt) <= 0)
 			return false;
 		udelay(SRCU_RETRY_CHECK_DELAY);
 	}
@@ -364,7 +362,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
 /*
  * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
  */
-static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
+static void __synchronize_srcu(struct srcu_struct *sp)
 {
 	struct rcu_synchronize rcu;
 	struct rcu_head *head = &rcu.head;
@@ -400,6 +398,32 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 	smp_mb(); /* Caller's later accesses after GP. */
 }
 
+/**
+ * synchronize_srcu_expedited - Brute-force SRCU grace period
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Wait for an SRCU grace period to elapse, but be more aggressive about
+ * spinning rather than blocking when waiting.
+ *
+ * Note that synchronize_srcu_expedited() has the same deadlock and
+ * memory-ordering properties as does synchronize_srcu().
+ */
+void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+	bool do_norm = rcu_gp_is_normal();
+
+	if (!do_norm) {
+		atomic_inc(&sp->srcu_exp_cnt);
+		smp_mb__after_atomic(); /* increment before GP. */
+	}
+	__synchronize_srcu(sp);
+	if (!do_norm) {
+		smp_mb__before_atomic(); /* GP before decrement. */
+		atomic_dec(&sp->srcu_exp_cnt);
+	}
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
+
 /**
  * synchronize_srcu - wait for prior SRCU read-side critical-section completion
  * @sp: srcu_struct with which to synchronize.
@@ -441,28 +465,13 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
  */
 void synchronize_srcu(struct srcu_struct *sp)
 {
-	__synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal())
-			   ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
-			   : SYNCHRONIZE_SRCU_TRYCOUNT);
+	if (rcu_gp_is_expedited())
+		synchronize_srcu_expedited(sp);
+	else
+		__synchronize_srcu(sp);
 }
 EXPORT_SYMBOL_GPL(synchronize_srcu);
 
-/**
- * synchronize_srcu_expedited - Brute-force SRCU grace period
- * @sp: srcu_struct with which to synchronize.
- *
- * Wait for an SRCU grace period to elapse, but be more aggressive about
- * spinning rather than blocking when waiting.
- *
- * Note that synchronize_srcu_expedited() has the same deadlock and
- * memory-ordering properties as does synchronize_srcu().
- */
-void synchronize_srcu_expedited(struct srcu_struct *sp)
-{
-	__synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
-}
-EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
-
 /**
  * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
  * @sp: srcu_struct on which to wait for in-flight callbacks.
@@ -490,7 +499,7 @@ EXPORT_SYMBOL_GPL(srcu_batches_completed);
  * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
  * ->batch_check1 and then to ->batch_done as readers drain.
  */
-static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
+static void srcu_advance_batches(struct srcu_struct *sp)
 {
 	int idx;
 
@@ -521,8 +530,8 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 
 	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
 		idx = 1 ^ (sp->completed & 1);
-		if (!try_check_zero(sp, idx, trycount))
-			return; /* readers present, retry after SRCU_INTERVAL */
+		if (!try_check_zero(sp, idx, 1))
+			return; /* readers present, retry later. */
 		srcu_flip(sp);
 		rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2);
 	}
@@ -534,9 +543,8 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 		 * so check at least twice in quick succession after a flip.
 		 */
 		idx = 1 ^ (sp->completed & 1);
-		trycount = trycount < 2 ? 2 : trycount;
-		if (!try_check_zero(sp, idx, trycount))
-			return; /* readers present, retry after SRCU_INTERVAL */
+		if (!try_check_zero(sp, idx, 2))
+			return; /* readers present, retry after later. */
 		srcu_gp_end(sp);
 	}
 }
@@ -602,8 +610,8 @@ void process_srcu(struct work_struct *work)
 
 	sp = container_of(work, struct srcu_struct, work.work);
 
-	srcu_advance_batches(sp, 1);
+	srcu_advance_batches(sp);
 	srcu_invoke_callbacks(sp);
-	srcu_reschedule(sp, SRCU_INTERVAL);
+	srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL);
 }
 EXPORT_SYMBOL_GPL(process_srcu);
-- 
cgit v1.2.3


From dde8da6cffe73dab81aca3855e717e40db35178c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 25 Mar 2017 10:42:07 -0700
Subject: mm: Use static initialization for "srcu"

The MM-notifier code currently dynamically initializes the srcu_struct
named "srcu" at subsys_initcall() time, and includes a BUG_ON() to check
this initialization in do_mmu_notifier_register().  Unfortunately, there
is no foolproof way to verify that an srcu_struct has been initialized,
given the possibility of an srcu_struct being allocated on the stack or
on the heap.  This means that creating an srcu_struct_is_initialized()
function is not a reasonable course of action.  Nor is peppering
do_mmu_notifier_register() with SRCU-specific #ifdefs an attractive
alternative.

This commit therefore uses DEFINE_STATIC_SRCU() to initialize
this srcu_struct at compile time, thus eliminating both the
subsys_initcall()-time initialization and the runtime BUG_ON().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: <linux-mm@kvack.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
---
 mm/mmu_notifier.c | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index a7652acd2ab9..54ca54562928 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 
 /* global SRCU for all MMs */
-static struct srcu_struct srcu;
+DEFINE_STATIC_SRCU(srcu);
 
 /*
  * This function allows mmu_notifier::release callback to delay a call to
@@ -252,12 +252,6 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
 
 	BUG_ON(atomic_read(&mm->mm_users) <= 0);
 
-	/*
-	 * Verify that mmu_notifier_init() already run and the global srcu is
-	 * initialized.
-	 */
-	BUG_ON(!srcu.per_cpu_ref);
-
 	ret = -ENOMEM;
 	mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
 	if (unlikely(!mmu_notifier_mm))
@@ -406,9 +400,3 @@ void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 	mmdrop(mm);
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
-
-static int __init mmu_notifier_init(void)
-{
-	return init_srcu_struct(&srcu);
-}
-subsys_initcall(mmu_notifier_init);
-- 
cgit v1.2.3


From d8be81735aa89413b333de488251f0e64e2be591 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 25 Mar 2017 09:59:38 -0700
Subject: srcu: Create a tiny SRCU

In response to automated complaints about modifications to SRCU
increasing its size, this commit creates a tiny SRCU that is
used in SMP=n && PREEMPT=n builds.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h     |  69 ++-------------
 include/linux/srcutiny.h |  81 ++++++++++++++++++
 include/linux/srcutree.h |  91 ++++++++++++++++++++
 init/Kconfig             |  12 +++
 kernel/rcu/Makefile      |   3 +-
 kernel/rcu/rcutorture.c  |   2 +
 kernel/rcu/srcutiny.c    | 215 +++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 411 insertions(+), 62 deletions(-)
 create mode 100644 include/linux/srcutiny.h
 create mode 100644 include/linux/srcutree.h
 create mode 100644 kernel/rcu/srcutiny.c

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 73a1b6296224..907f09b14eda 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -34,28 +34,7 @@
 #include <linux/workqueue.h>
 #include <linux/rcu_segcblist.h>
 
-struct srcu_array {
-	unsigned long lock_count[2];
-	unsigned long unlock_count[2];
-};
-
-struct srcu_struct {
-	unsigned long completed;
-	unsigned long srcu_gp_seq;
-	atomic_t srcu_exp_cnt;
-	struct srcu_array __percpu *per_cpu_ref;
-	spinlock_t queue_lock; /* protect ->srcu_cblist */
-	struct rcu_segcblist srcu_cblist;
-	struct delayed_work work;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-};
-
-/* Values for -> state variable. */
-#define SRCU_STATE_IDLE		0
-#define SRCU_STATE_SCAN1	1
-#define SRCU_STATE_SCAN2	2
+struct srcu_struct;
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
@@ -77,42 +56,13 @@ int init_srcu_struct(struct srcu_struct *sp);
 #define __SRCU_DEP_MAP_INIT(srcu_name)
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-void process_srcu(struct work_struct *work);
-
-#define __SRCU_STRUCT_INIT(name)					\
-	{								\
-		.completed = -300,					\
-		.per_cpu_ref = &name##_srcu_array,			\
-		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
-		.srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\
-		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
-		__SRCU_DEP_MAP_INIT(name)				\
-	}
-
-/*
- * Define and initialize a srcu struct at build time.
- * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
- *
- * Note that although DEFINE_STATIC_SRCU() hides the name from other
- * files, the per-CPU variable rules nevertheless require that the
- * chosen name be globally unique.  These rules also prohibit use of
- * DEFINE_STATIC_SRCU() within a function.  If these rules are too
- * restrictive, declare the srcu_struct manually.  For example, in
- * each file:
- *
- *	static struct srcu_struct my_srcu;
- *
- * Then, before the first use of each my_srcu, manually initialize it:
- *
- *	init_srcu_struct(&my_srcu);
- *
- * See include/linux/percpu-defs.h for the rules on per-CPU variables.
- */
-#define __DEFINE_SRCU(name, is_static)					\
-	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
-	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
-#define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
-#define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
+#ifdef CONFIG_TINY_SRCU
+#include <linux/srcutiny.h>
+#elif defined(CONFIG_TREE_SRCU)
+#include <linux/srcutree.h>
+#else
+#error "Unknown SRCU implementation specified to kernel configuration"
+#endif
 
 /**
  * call_srcu() - Queue a callback for invocation after an SRCU grace period
@@ -138,9 +88,6 @@ void cleanup_srcu_struct(struct srcu_struct *sp);
 int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
 void synchronize_srcu(struct srcu_struct *sp);
-void synchronize_srcu_expedited(struct srcu_struct *sp);
-unsigned long srcu_batches_completed(struct srcu_struct *sp);
-void srcu_barrier(struct srcu_struct *sp);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
new file mode 100644
index 000000000000..4f284e4f4d8c
--- /dev/null
+++ b/include/linux/srcutiny.h
@@ -0,0 +1,81 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion,
+ *	tiny variant.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2017
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ */
+
+#ifndef _LINUX_SRCU_TINY_H
+#define _LINUX_SRCU_TINY_H
+
+#include <linux/swait.h>
+
+struct srcu_struct {
+	int srcu_lock_nesting[2];	/* srcu_read_lock() nesting depth. */
+	struct swait_queue_head srcu_wq;
+					/* Last srcu_read_unlock() wakes GP. */
+	unsigned long srcu_gp_seq;	/* GP seq # for callback tagging. */
+	struct rcu_segcblist srcu_cblist;
+					/* Pending SRCU callbacks. */
+	int srcu_idx;			/* Current reader array element. */
+	bool srcu_gp_running;		/* GP workqueue running? */
+	bool srcu_gp_waiting;		/* GP waiting for readers? */
+	struct work_struct srcu_work;	/* For driving grace periods. */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+};
+
+void srcu_drive_gp(struct work_struct *wp);
+
+#define __SRCU_STRUCT_INIT(name)					\
+{									\
+	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
+	.srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),	\
+	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
+	__SRCU_DEP_MAP_INIT(name)					\
+}
+
+/*
+ * This odd _STATIC_ arrangement is needed for API compatibility with
+ * Tree SRCU, which needs some per-CPU data.
+ */
+#define DEFINE_SRCU(name) \
+	struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+#define DEFINE_STATIC_SRCU(name) \
+	static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+
+void synchronize_srcu(struct srcu_struct *sp);
+
+static inline void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+	synchronize_srcu(sp);
+}
+
+static inline void srcu_barrier(struct srcu_struct *sp)
+{
+	synchronize_srcu(sp);
+}
+
+static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
+{
+	return 0;
+}
+
+#endif
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
new file mode 100644
index 000000000000..f2b3bd6c6bc2
--- /dev/null
+++ b/include/linux/srcutree.h
@@ -0,0 +1,91 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion,
+ *	tree variant.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2017
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ */
+
+#ifndef _LINUX_SRCU_TREE_H
+#define _LINUX_SRCU_TREE_H
+
+struct srcu_array {
+	unsigned long lock_count[2];
+	unsigned long unlock_count[2];
+};
+
+struct srcu_struct {
+	unsigned long completed;
+	unsigned long srcu_gp_seq;
+	atomic_t srcu_exp_cnt;
+	struct srcu_array __percpu *per_cpu_ref;
+	spinlock_t queue_lock; /* protect ->srcu_cblist */
+	struct rcu_segcblist srcu_cblist;
+	struct delayed_work work;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+};
+
+/* Values for -> state variable. */
+#define SRCU_STATE_IDLE		0
+#define SRCU_STATE_SCAN1	1
+#define SRCU_STATE_SCAN2	2
+
+void process_srcu(struct work_struct *work);
+
+#define __SRCU_STRUCT_INIT(name)					\
+	{								\
+		.completed = -300,					\
+		.per_cpu_ref = &name##_srcu_array,			\
+		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
+		.srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\
+		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
+		__SRCU_DEP_MAP_INIT(name)				\
+	}
+
+/*
+ * Define and initialize a srcu struct at build time.
+ * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ *
+ * Note that although DEFINE_STATIC_SRCU() hides the name from other
+ * files, the per-CPU variable rules nevertheless require that the
+ * chosen name be globally unique.  These rules also prohibit use of
+ * DEFINE_STATIC_SRCU() within a function.  If these rules are too
+ * restrictive, declare the srcu_struct manually.  For example, in
+ * each file:
+ *
+ *	static struct srcu_struct my_srcu;
+ *
+ * Then, before the first use of each my_srcu, manually initialize it:
+ *
+ *	init_srcu_struct(&my_srcu);
+ *
+ * See include/linux/percpu-defs.h for the rules on per-CPU variables.
+ */
+#define __DEFINE_SRCU(name, is_static)					\
+	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
+	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+#define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
+#define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
+
+void synchronize_srcu_expedited(struct srcu_struct *sp);
+void srcu_barrier(struct srcu_struct *sp);
+unsigned long srcu_batches_completed(struct srcu_struct *sp);
+
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index a92f27da4a27..d269f2ca17b8 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -526,6 +526,18 @@ config SRCU
 	  permits arbitrary sleeping or blocking within RCU read-side critical
 	  sections.
 
+config TINY_SRCU
+	bool
+	default y if TINY_RCU
+	help
+	  This option selects the single-CPU non-preemptible version of SRCU.
+
+config TREE_SRCU
+	bool
+	default y if !TINY_RCU
+	help
+	  This option selects the full-fledged version of SRCU.
+
 config TASKS_RCU
 	bool
 	default n
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 18dfc485225c..b853214a2b99 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -3,7 +3,8 @@
 KCOV_INSTRUMENT := n
 
 obj-y += update.o sync.o
-obj-$(CONFIG_SRCU) += srcu.o
+obj-$(CONFIG_TREE_SRCU) += srcu.o
+obj-$(CONFIG_TINY_SRCU) += srcutiny.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
 obj-$(CONFIG_TREE_RCU) += tree.o
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index cccc417a8135..98591e16db1a 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -559,6 +559,7 @@ static void srcu_torture_barrier(void)
 
 static void srcu_torture_stats(void)
 {
+#ifdef CONFIG_TREE_SRCU
 	int cpu;
 	int idx = srcu_ctlp->completed & 0x1;
 
@@ -587,6 +588,7 @@ static void srcu_torture_stats(void)
 		pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
 	}
 	pr_cont("\n");
+#endif
 }
 
 static void srcu_torture_synchronize_expedited(void)
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
new file mode 100644
index 000000000000..b8293527ee18
--- /dev/null
+++ b/kernel/rcu/srcutiny.c
@@ -0,0 +1,215 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion,
+ *	tiny version for non-preemptible single-CPU use.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2017
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/mutex.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate_wait.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/srcu.h>
+
+#include <linux/rcu_node_tree.h>
+#include "rcu.h"
+
+static int init_srcu_struct_fields(struct srcu_struct *sp)
+{
+	sp->srcu_lock_nesting[0] = 0;
+	sp->srcu_lock_nesting[1] = 0;
+	init_swait_queue_head(&sp->srcu_wq);
+	sp->srcu_gp_seq = 0;
+	rcu_segcblist_init(&sp->srcu_cblist);
+	sp->srcu_gp_running = false;
+	sp->srcu_gp_waiting = false;
+	sp->srcu_idx = 0;
+	INIT_WORK(&sp->srcu_work, srcu_drive_gp);
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+		       struct lock_class_key *key)
+{
+	/* Don't re-initialize a lock while it is held. */
+	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
+	lockdep_init_map(&sp->dep_map, name, key, 0);
+	return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(__init_srcu_struct);
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/*
+ * init_srcu_struct - initialize a sleep-RCU structure
+ * @sp: structure to initialize.
+ *
+ * Must invoke this on a given srcu_struct before passing that srcu_struct
+ * to any other function.  Each srcu_struct represents a separate domain
+ * of SRCU protection.
+ */
+int init_srcu_struct(struct srcu_struct *sp)
+{
+	return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(init_srcu_struct);
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/*
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+	WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
+	flush_work(&sp->srcu_work);
+	WARN_ON(rcu_seq_state(sp->srcu_gp_seq));
+	WARN_ON(sp->srcu_gp_running);
+	WARN_ON(sp->srcu_gp_waiting);
+	WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist));
+}
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct.  Must be called from process context.
+ * Returns an index that must be passed to the matching srcu_read_unlock().
+ */
+int __srcu_read_lock(struct srcu_struct *sp)
+{
+	int idx;
+
+	idx = READ_ONCE(sp->srcu_idx);
+	WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1);
+	return idx;
+}
+EXPORT_SYMBOL_GPL(__srcu_read_lock);
+
+/*
+ * Removes the count for the old reader from the appropriate element of
+ * the srcu_struct.  Must be called from process context.
+ */
+void __srcu_read_unlock(struct srcu_struct *sp, int idx)
+{
+	int newval = sp->srcu_lock_nesting[idx] - 1;
+
+	WRITE_ONCE(sp->srcu_lock_nesting[idx], newval);
+	if (!newval && READ_ONCE(sp->srcu_gp_waiting))
+		swake_up(&sp->srcu_wq);
+}
+EXPORT_SYMBOL_GPL(__srcu_read_unlock);
+
+/*
+ * Workqueue handler to drive one grace period and invoke any callbacks
+ * that become ready as a result.  Single-CPU and !PREEMPT operation
+ * means that we get away with murder on synchronization.  ;-)
+ */
+void srcu_drive_gp(struct work_struct *wp)
+{
+	int idx;
+	struct rcu_cblist ready_cbs;
+	struct srcu_struct *sp;
+	struct rcu_head *rhp;
+
+	sp = container_of(wp, struct srcu_struct, srcu_work);
+	if (sp->srcu_gp_running || rcu_segcblist_empty(&sp->srcu_cblist))
+		return; /* Already running or nothing to do. */
+
+	/* Tag recently arrived callbacks and wait for readers. */
+	WRITE_ONCE(sp->srcu_gp_running, true);
+	rcu_segcblist_accelerate(&sp->srcu_cblist,
+				 rcu_seq_snap(&sp->srcu_gp_seq));
+	rcu_seq_start(&sp->srcu_gp_seq);
+	idx = sp->srcu_idx;
+	WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx);
+	WRITE_ONCE(sp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */
+	swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
+	WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
+	rcu_seq_end(&sp->srcu_gp_seq);
+
+	/* Update callback list based on GP, and invoke ready callbacks. */
+	rcu_segcblist_advance(&sp->srcu_cblist,
+			      rcu_seq_current(&sp->srcu_gp_seq));
+	if (rcu_segcblist_ready_cbs(&sp->srcu_cblist)) {
+		rcu_cblist_init(&ready_cbs);
+		local_irq_disable();
+		rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs);
+		local_irq_enable();
+		rhp = rcu_cblist_dequeue(&ready_cbs);
+		for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
+			local_bh_disable();
+			rhp->func(rhp);
+			local_bh_enable();
+		}
+		local_irq_disable();
+		rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs);
+		local_irq_enable();
+	}
+	WRITE_ONCE(sp->srcu_gp_running, false);
+
+	/*
+	 * If more callbacks, reschedule ourselves.  This can race with
+	 * a call_srcu() at interrupt level, but the ->srcu_gp_running
+	 * checks will straighten that out.
+	 */
+	if (!rcu_segcblist_empty(&sp->srcu_cblist))
+		schedule_work(&sp->srcu_work);
+}
+EXPORT_SYMBOL_GPL(srcu_drive_gp);
+
+/*
+ * Enqueue an SRCU callback on the specified srcu_struct structure,
+ * initiating grace-period processing if it is not already running.
+ */
+void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
+	       rcu_callback_t func)
+{
+	unsigned long flags;
+
+	head->func = func;
+	local_irq_save(flags);
+	rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
+	local_irq_restore(flags);
+	if (!READ_ONCE(sp->srcu_gp_running))
+		schedule_work(&sp->srcu_work);
+}
+EXPORT_SYMBOL_GPL(call_srcu);
+
+/*
+ * synchronize_srcu - wait for prior SRCU read-side critical-section completion
+ */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+	struct rcu_synchronize rs;
+
+	init_rcu_head_on_stack(&rs.head);
+	init_completion(&rs.completion);
+	call_srcu(sp, &rs.head, wakeme_after_rcu);
+	wait_for_completion(&rs.completion);
+	destroy_rcu_head_on_stack(&rs.head);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu);
-- 
cgit v1.2.3


From 32071141b2448458479932fe726ce892cbe1b4e4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 25 Mar 2017 11:34:42 -0700
Subject: srcutorture: Print Tiny SRCU reader statistics

The srcu_torture_stats() function is adapted to the specific srcu_struct
layout traditionally used by SRCU.  This commit therefore adds support
for Tiny SRCU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcutorture.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 98591e16db1a..9cbb8a7b909d 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -559,11 +559,12 @@ static void srcu_torture_barrier(void)
 
 static void srcu_torture_stats(void)
 {
-#ifdef CONFIG_TREE_SRCU
-	int cpu;
-	int idx = srcu_ctlp->completed & 0x1;
+	int __maybe_unused cpu;
+	int idx;
 
-	pr_alert("%s%s per-CPU(idx=%d):",
+#ifdef CONFIG_TREE_SRCU
+	idx = srcu_ctlp->completed & 0x1;
+	pr_alert("%s%s Tree SRCU per-CPU(idx=%d):",
 		 torture_type, TORTURE_FLAG, idx);
 	for_each_possible_cpu(cpu) {
 		unsigned long l0, l1;
@@ -588,6 +589,12 @@ static void srcu_torture_stats(void)
 		pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
 	}
 	pr_cont("\n");
+#elif defined(CONFIG_TINY_SRCU)
+	idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1;
+	pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%d,%d)\n",
+		 torture_type, TORTURE_FLAG, idx,
+		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]),
+		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx]));
 #endif
 }
 
-- 
cgit v1.2.3


From dad81a2026841b5e2651aab58a7398c13cc05847 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 25 Mar 2017 17:23:44 -0700
Subject: srcu: Introduce CLASSIC_SRCU Kconfig option

The TREE_SRCU rewrite is large and a bit on the non-simple side, so
this commit helps reduce risk by allowing the old v4.11 SRCU algorithm
to be selected using a new CLASSIC_SRCU Kconfig option that depends
on RCU_EXPERT.  The default is to use the new TREE_SRCU and TINY_SRCU
algorithms, in order to help get these the testing that they need.
However, if your users do not require the update-side scalability that
is to be provided by TREE_SRCU, select RCU_EXPERT and then CLASSIC_SRCU
to revert back to the old classic SRCU algorithm.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h        |   2 +
 include/linux/srcuclassic.h | 101 ++++++++
 init/Kconfig                |  21 +-
 kernel/rcu/Makefile         |   3 +-
 kernel/rcu/rcutorture.c     |   2 +-
 kernel/rcu/srcu.c           | 347 ++++++++++++++-----------
 kernel/rcu/srcutree.c       | 613 ++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 934 insertions(+), 155 deletions(-)
 create mode 100644 include/linux/srcuclassic.h
 create mode 100644 kernel/rcu/srcutree.c

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 907f09b14eda..167ad8831aaf 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -60,6 +60,8 @@ int init_srcu_struct(struct srcu_struct *sp);
 #include <linux/srcutiny.h>
 #elif defined(CONFIG_TREE_SRCU)
 #include <linux/srcutree.h>
+#elif defined(CONFIG_CLASSIC_SRCU)
+#include <linux/srcuclassic.h>
 #else
 #error "Unknown SRCU implementation specified to kernel configuration"
 #endif
diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h
new file mode 100644
index 000000000000..41cf99930f34
--- /dev/null
+++ b/include/linux/srcuclassic.h
@@ -0,0 +1,101 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion,
+ *	classic v4.11 variant.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2017
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ */
+
+#ifndef _LINUX_SRCU_CLASSIC_H
+#define _LINUX_SRCU_CLASSIC_H
+
+struct srcu_array {
+	unsigned long lock_count[2];
+	unsigned long unlock_count[2];
+};
+
+struct rcu_batch {
+	struct rcu_head *head, **tail;
+};
+
+#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
+
+struct srcu_struct {
+	unsigned long completed;
+	struct srcu_array __percpu *per_cpu_ref;
+	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
+	bool running;
+	/* callbacks just queued */
+	struct rcu_batch batch_queue;
+	/* callbacks try to do the first check_zero */
+	struct rcu_batch batch_check0;
+	/* callbacks done with the first check_zero and the flip */
+	struct rcu_batch batch_check1;
+	struct rcu_batch batch_done;
+	struct delayed_work work;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+};
+
+void process_srcu(struct work_struct *work);
+
+#define __SRCU_STRUCT_INIT(name)					\
+	{								\
+		.completed = -300,					\
+		.per_cpu_ref = &name##_srcu_array,			\
+		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
+		.running = false,					\
+		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
+		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\
+		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\
+		.batch_done = RCU_BATCH_INIT(name.batch_done),		\
+		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
+		__SRCU_DEP_MAP_INIT(name)				\
+	}
+
+/*
+ * Define and initialize a srcu struct at build time.
+ * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ *
+ * Note that although DEFINE_STATIC_SRCU() hides the name from other
+ * files, the per-CPU variable rules nevertheless require that the
+ * chosen name be globally unique.  These rules also prohibit use of
+ * DEFINE_STATIC_SRCU() within a function.  If these rules are too
+ * restrictive, declare the srcu_struct manually.  For example, in
+ * each file:
+ *
+ *	static struct srcu_struct my_srcu;
+ *
+ * Then, before the first use of each my_srcu, manually initialize it:
+ *
+ *	init_srcu_struct(&my_srcu);
+ *
+ * See include/linux/percpu-defs.h for the rules on per-CPU variables.
+ */
+#define __DEFINE_SRCU(name, is_static)					\
+	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
+	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+#define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
+#define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
+
+void synchronize_srcu_expedited(struct srcu_struct *sp);
+void srcu_barrier(struct srcu_struct *sp);
+unsigned long srcu_batches_completed(struct srcu_struct *sp);
+
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index d269f2ca17b8..558cc3638ab9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -526,15 +526,32 @@ config SRCU
 	  permits arbitrary sleeping or blocking within RCU read-side critical
 	  sections.
 
+config CLASSIC_SRCU
+	bool "Use v4.11 classic SRCU implementation"
+	default n
+	depends on RCU_EXPERT && SRCU
+	help
+	  This option selects the traditional well-tested classic SRCU
+	  implementation from v4.11, as might be desired for enterprise
+	  Linux distributions.  Without this option, the shiny new
+	  Tiny SRCU and Tree SRCU implementations are used instead.
+	  At some point, it is hoped that Tiny SRCU and Tree SRCU
+	  will accumulate enough test time and confidence to allow
+	  Classic SRCU to be dropped entirely.
+
+	  Say Y if you need a rock-solid SRCU.
+
+	  Say N if you would like help test Tree SRCU.
+
 config TINY_SRCU
 	bool
-	default y if TINY_RCU
+	default y if TINY_RCU && !CLASSIC_SRCU
 	help
 	  This option selects the single-CPU non-preemptible version of SRCU.
 
 config TREE_SRCU
 	bool
-	default y if !TINY_RCU
+	default y if !TINY_RCU && !CLASSIC_SRCU
 	help
 	  This option selects the full-fledged version of SRCU.
 
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index b853214a2b99..158e6593d58c 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -3,7 +3,8 @@
 KCOV_INSTRUMENT := n
 
 obj-y += update.o sync.o
-obj-$(CONFIG_TREE_SRCU) += srcu.o
+obj-$(CONFIG_CLASSIC_SRCU) += srcu.o
+obj-$(CONFIG_TREE_SRCU) += srcutree.o
 obj-$(CONFIG_TINY_SRCU) += srcutiny.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 9cbb8a7b909d..6f344b6748a8 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -562,7 +562,7 @@ static void srcu_torture_stats(void)
 	int __maybe_unused cpu;
 	int idx;
 
-#ifdef CONFIG_TREE_SRCU
+#if defined(CONFIG_TREE_SRCU) || defined(CONFIG_CLASSIC_SRCU)
 	idx = srcu_ctlp->completed & 0x1;
 	pr_alert("%s%s Tree SRCU per-CPU(idx=%d):",
 		 torture_type, TORTURE_FLAG, idx);
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 3cfcc59bddf3..584d8a983883 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -36,16 +36,75 @@
 #include <linux/delay.h>
 #include <linux/srcu.h>
 
-#include <linux/rcu_node_tree.h>
 #include "rcu.h"
 
+/*
+ * Initialize an rcu_batch structure to empty.
+ */
+static inline void rcu_batch_init(struct rcu_batch *b)
+{
+	b->head = NULL;
+	b->tail = &b->head;
+}
+
+/*
+ * Enqueue a callback onto the tail of the specified rcu_batch structure.
+ */
+static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
+{
+	*b->tail = head;
+	b->tail = &head->next;
+}
+
+/*
+ * Is the specified rcu_batch structure empty?
+ */
+static inline bool rcu_batch_empty(struct rcu_batch *b)
+{
+	return b->tail == &b->head;
+}
+
+/*
+ * Remove the callback at the head of the specified rcu_batch structure
+ * and return a pointer to it, or return NULL if the structure is empty.
+ */
+static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
+{
+	struct rcu_head *head;
+
+	if (rcu_batch_empty(b))
+		return NULL;
+
+	head = b->head;
+	b->head = head->next;
+	if (b->tail == &head->next)
+		rcu_batch_init(b);
+
+	return head;
+}
+
+/*
+ * Move all callbacks from the rcu_batch structure specified by "from" to
+ * the structure specified by "to".
+ */
+static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
+{
+	if (!rcu_batch_empty(from)) {
+		*to->tail = from->head;
+		to->tail = from->tail;
+		rcu_batch_init(from);
+	}
+}
+
 static int init_srcu_struct_fields(struct srcu_struct *sp)
 {
 	sp->completed = 0;
-	sp->srcu_gp_seq = 0;
-	atomic_set(&sp->srcu_exp_cnt, 0);
 	spin_lock_init(&sp->queue_lock);
-	rcu_segcblist_init(&sp->srcu_cblist);
+	sp->running = false;
+	rcu_batch_init(&sp->batch_queue);
+	rcu_batch_init(&sp->batch_check0);
+	rcu_batch_init(&sp->batch_check1);
+	rcu_batch_init(&sp->batch_done);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
 	sp->per_cpu_ref = alloc_percpu(struct srcu_array);
 	return sp->per_cpu_ref ? 0 : -ENOMEM;
@@ -180,8 +239,6 @@ static bool srcu_readers_active(struct srcu_struct *sp)
 	return sum;
 }
 
-#define SRCU_INTERVAL		1
-
 /**
  * cleanup_srcu_struct - deconstruct a sleep-RCU structure
  * @sp: structure to clean up.
@@ -197,16 +254,8 @@ static bool srcu_readers_active(struct srcu_struct *sp)
  */
 void cleanup_srcu_struct(struct srcu_struct *sp)
 {
-	WARN_ON_ONCE(atomic_read(&sp->srcu_exp_cnt));
 	if (WARN_ON(srcu_readers_active(sp)))
 		return; /* Leakage unless caller handles error. */
-	if (WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)))
-		return; /* Leakage unless caller handles error. */
-	flush_delayed_work(&sp->work);
-	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE)) {
-		pr_info("cleanup_srcu_struct: Active srcu_struct %lu CBs %c state: %d\n", rcu_segcblist_n_cbs(&sp->srcu_cblist), ".E"[rcu_segcblist_empty(&sp->srcu_cblist)], rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
-		return; /* Caller forgot to stop doing call_srcu()? */
-	}
 	free_percpu(sp->per_cpu_ref);
 	sp->per_cpu_ref = NULL;
 }
@@ -245,36 +294,26 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  * We use an adaptive strategy for synchronize_srcu() and especially for
  * synchronize_srcu_expedited().  We spin for a fixed time period
  * (defined below) to allow SRCU readers to exit their read-side critical
- * sections.  If there are still some readers after a few microseconds,
- * we repeatedly block for 1-millisecond time periods.
+ * sections.  If there are still some readers after 10 microseconds,
+ * we repeatedly block for 1-millisecond time periods.  This approach
+ * has done well in testing, so there is no need for a config parameter.
  */
 #define SRCU_RETRY_CHECK_DELAY		5
+#define SYNCHRONIZE_SRCU_TRYCOUNT	2
+#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT	12
 
 /*
- * Start an SRCU grace period.
- */
-static void srcu_gp_start(struct srcu_struct *sp)
-{
-	int state;
-
-	rcu_segcblist_accelerate(&sp->srcu_cblist,
-				 rcu_seq_snap(&sp->srcu_gp_seq));
-	rcu_seq_start(&sp->srcu_gp_seq);
-	state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
-	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
-}
-
-/*
- * Wait until all readers counted by array index idx complete, but
- * loop an additional time if there is an expedited grace period pending.
- * The caller must ensure that ->completed is not changed while checking.
+ * @@@ Wait until all pre-existing readers complete.  Such readers
+ * will have used the index specified by "idx".
+ * the caller should ensures the ->completed is not changed while checking
+ * and idx = (->completed & 1) ^ 1
  */
 static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
 {
 	for (;;) {
 		if (srcu_readers_active_idx_check(sp, idx))
 			return true;
-		if (--trycount + !!atomic_read(&sp->srcu_exp_cnt) <= 0)
+		if (--trycount <= 0)
 			return false;
 		udelay(SRCU_RETRY_CHECK_DELAY);
 	}
@@ -299,19 +338,6 @@ static void srcu_flip(struct srcu_struct *sp)
 	smp_mb(); /* D */  /* Pairs with C. */
 }
 
-/*
- * End an SRCU grace period.
- */
-static void srcu_gp_end(struct srcu_struct *sp)
-{
-	rcu_seq_end(&sp->srcu_gp_seq);
-
-	spin_lock_irq(&sp->queue_lock);
-	rcu_segcblist_advance(&sp->srcu_cblist,
-			      rcu_seq_current(&sp->srcu_gp_seq));
-	spin_unlock_irq(&sp->queue_lock);
-}
-
 /*
  * Enqueue an SRCU callback on the specified srcu_struct structure,
  * initiating grace-period processing if it is not already running.
@@ -348,24 +374,26 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 	head->func = func;
 	spin_lock_irqsave(&sp->queue_lock, flags);
 	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
-	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_IDLE) {
-		srcu_gp_start(sp);
+	rcu_batch_queue(&sp->batch_queue, head);
+	if (!sp->running) {
+		sp->running = true;
 		queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
 	}
 	spin_unlock_irqrestore(&sp->queue_lock, flags);
 }
 EXPORT_SYMBOL_GPL(call_srcu);
 
-static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
+static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
+static void srcu_reschedule(struct srcu_struct *sp);
 
 /*
  * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
  */
-static void __synchronize_srcu(struct srcu_struct *sp)
+static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 {
 	struct rcu_synchronize rcu;
 	struct rcu_head *head = &rcu.head;
+	bool done = false;
 
 	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
 			 lock_is_held(&rcu_bh_lock_map) ||
@@ -373,8 +401,6 @@ static void __synchronize_srcu(struct srcu_struct *sp)
 			 lock_is_held(&rcu_sched_lock_map),
 			 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
 
-	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
-		return;
 	might_sleep();
 	init_completion(&rcu.completion);
 
@@ -382,47 +408,31 @@ static void __synchronize_srcu(struct srcu_struct *sp)
 	head->func = wakeme_after_rcu;
 	spin_lock_irq(&sp->queue_lock);
 	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_IDLE) {
+	if (!sp->running) {
 		/* steal the processing owner */
-		rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
-		srcu_gp_start(sp);
+		sp->running = true;
+		rcu_batch_queue(&sp->batch_check0, head);
 		spin_unlock_irq(&sp->queue_lock);
+
+		srcu_advance_batches(sp, trycount);
+		if (!rcu_batch_empty(&sp->batch_done)) {
+			BUG_ON(sp->batch_done.head != head);
+			rcu_batch_dequeue(&sp->batch_done);
+			done = true;
+		}
 		/* give the processing owner to work_struct */
-		srcu_reschedule(sp, 0);
+		srcu_reschedule(sp);
 	} else {
-		rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
+		rcu_batch_queue(&sp->batch_queue, head);
 		spin_unlock_irq(&sp->queue_lock);
 	}
 
-	wait_for_completion(&rcu.completion);
-	smp_mb(); /* Caller's later accesses after GP. */
-}
-
-/**
- * synchronize_srcu_expedited - Brute-force SRCU grace period
- * @sp: srcu_struct with which to synchronize.
- *
- * Wait for an SRCU grace period to elapse, but be more aggressive about
- * spinning rather than blocking when waiting.
- *
- * Note that synchronize_srcu_expedited() has the same deadlock and
- * memory-ordering properties as does synchronize_srcu().
- */
-void synchronize_srcu_expedited(struct srcu_struct *sp)
-{
-	bool do_norm = rcu_gp_is_normal();
-
-	if (!do_norm) {
-		atomic_inc(&sp->srcu_exp_cnt);
-		smp_mb__after_atomic(); /* increment before GP. */
-	}
-	__synchronize_srcu(sp);
-	if (!do_norm) {
-		smp_mb__before_atomic(); /* GP before decrement. */
-		atomic_dec(&sp->srcu_exp_cnt);
+	if (!done) {
+		wait_for_completion(&rcu.completion);
+		smp_mb(); /* Caller's later accesses after GP. */
 	}
+
 }
-EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
 
 /**
  * synchronize_srcu - wait for prior SRCU read-side critical-section completion
@@ -465,13 +475,28 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
  */
 void synchronize_srcu(struct srcu_struct *sp)
 {
-	if (rcu_gp_is_expedited())
-		synchronize_srcu_expedited(sp);
-	else
-		__synchronize_srcu(sp);
+	__synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal())
+			   ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
+			   : SYNCHRONIZE_SRCU_TRYCOUNT);
 }
 EXPORT_SYMBOL_GPL(synchronize_srcu);
 
+/**
+ * synchronize_srcu_expedited - Brute-force SRCU grace period
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Wait for an SRCU grace period to elapse, but be more aggressive about
+ * spinning rather than blocking when waiting.
+ *
+ * Note that synchronize_srcu_expedited() has the same deadlock and
+ * memory-ordering properties as does synchronize_srcu().
+ */
+void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+	__synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
+
 /**
  * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
  * @sp: srcu_struct on which to wait for in-flight callbacks.
@@ -495,13 +520,29 @@ unsigned long srcu_batches_completed(struct srcu_struct *sp)
 }
 EXPORT_SYMBOL_GPL(srcu_batches_completed);
 
+#define SRCU_CALLBACK_BATCH	10
+#define SRCU_INTERVAL		1
+
+/*
+ * Move any new SRCU callbacks to the first stage of the SRCU grace
+ * period pipeline.
+ */
+static void srcu_collect_new(struct srcu_struct *sp)
+{
+	if (!rcu_batch_empty(&sp->batch_queue)) {
+		spin_lock_irq(&sp->queue_lock);
+		rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
+		spin_unlock_irq(&sp->queue_lock);
+	}
+}
+
 /*
  * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
  * ->batch_check1 and then to ->batch_done as readers drain.
  */
-static void srcu_advance_batches(struct srcu_struct *sp)
+static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 {
-	int idx;
+	int idx = 1 ^ (sp->completed & 1);
 
 	/*
 	 * Because readers might be delayed for an extended period after
@@ -509,44 +550,50 @@ static void srcu_advance_batches(struct srcu_struct *sp)
 	 * might well be readers using both idx=0 and idx=1.  We therefore
 	 * need to wait for readers to clear from both index values before
 	 * invoking a callback.
-	 *
-	 * The load-acquire ensures that we see the accesses performed
-	 * by the prior grace period.
 	 */
-	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
-	if (idx == SRCU_STATE_IDLE) {
-		spin_lock_irq(&sp->queue_lock);
-		if (rcu_segcblist_empty(&sp->srcu_cblist)) {
-			spin_unlock_irq(&sp->queue_lock);
-			return;
-		}
-		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
-		if (idx == SRCU_STATE_IDLE)
-			srcu_gp_start(sp);
-		spin_unlock_irq(&sp->queue_lock);
-		if (idx != SRCU_STATE_IDLE)
-			return; /* Someone else started the grace period. */
-	}
 
-	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
-		idx = 1 ^ (sp->completed & 1);
-		if (!try_check_zero(sp, idx, 1))
-			return; /* readers present, retry later. */
-		srcu_flip(sp);
-		rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2);
-	}
+	if (rcu_batch_empty(&sp->batch_check0) &&
+	    rcu_batch_empty(&sp->batch_check1))
+		return; /* no callbacks need to be advanced */
 
-	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
+	if (!try_check_zero(sp, idx, trycount))
+		return; /* failed to advance, will try after SRCU_INTERVAL */
 
-		/*
-		 * SRCU read-side critical sections are normally short,
-		 * so check at least twice in quick succession after a flip.
-		 */
-		idx = 1 ^ (sp->completed & 1);
-		if (!try_check_zero(sp, idx, 2))
-			return; /* readers present, retry after later. */
-		srcu_gp_end(sp);
-	}
+	/*
+	 * The callbacks in ->batch_check1 have already done with their
+	 * first zero check and flip back when they were enqueued on
+	 * ->batch_check0 in a previous invocation of srcu_advance_batches().
+	 * (Presumably try_check_zero() returned false during that
+	 * invocation, leaving the callbacks stranded on ->batch_check1.)
+	 * They are therefore ready to invoke, so move them to ->batch_done.
+	 */
+	rcu_batch_move(&sp->batch_done, &sp->batch_check1);
+
+	if (rcu_batch_empty(&sp->batch_check0))
+		return; /* no callbacks need to be advanced */
+	srcu_flip(sp);
+
+	/*
+	 * The callbacks in ->batch_check0 just finished their
+	 * first check zero and flip, so move them to ->batch_check1
+	 * for future checking on the other idx.
+	 */
+	rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
+
+	/*
+	 * SRCU read-side critical sections are normally short, so check
+	 * at least twice in quick succession after a flip.
+	 */
+	trycount = trycount < 2 ? 2 : trycount;
+	if (!try_check_zero(sp, idx^1, trycount))
+		return; /* failed to advance, will try after SRCU_INTERVAL */
+
+	/*
+	 * The callbacks in ->batch_check1 have now waited for all
+	 * pre-existing readers using both idx values.  They are therefore
+	 * ready to invoke, so move them to ->batch_done.
+	 */
+	rcu_batch_move(&sp->batch_done, &sp->batch_check1);
 }
 
 /*
@@ -557,48 +604,45 @@ static void srcu_advance_batches(struct srcu_struct *sp)
  */
 static void srcu_invoke_callbacks(struct srcu_struct *sp)
 {
-	struct rcu_cblist ready_cbs;
-	struct rcu_head *rhp;
+	int i;
+	struct rcu_head *head;
 
-	spin_lock_irq(&sp->queue_lock);
-	if (!rcu_segcblist_ready_cbs(&sp->srcu_cblist)) {
-		spin_unlock_irq(&sp->queue_lock);
-		return;
-	}
-	rcu_cblist_init(&ready_cbs);
-	rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs);
-	spin_unlock_irq(&sp->queue_lock);
-	rhp = rcu_cblist_dequeue(&ready_cbs);
-	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
+	for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
+		head = rcu_batch_dequeue(&sp->batch_done);
+		if (!head)
+			break;
 		local_bh_disable();
-		rhp->func(rhp);
+		head->func(head);
 		local_bh_enable();
 	}
-	spin_lock_irq(&sp->queue_lock);
-	rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs);
-	spin_unlock_irq(&sp->queue_lock);
 }
 
 /*
  * Finished one round of SRCU grace period.  Start another if there are
  * more SRCU callbacks queued, otherwise put SRCU into not-running state.
  */
-static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
+static void srcu_reschedule(struct srcu_struct *sp)
 {
 	bool pending = true;
-	int state;
 
-	if (rcu_segcblist_empty(&sp->srcu_cblist)) {
+	if (rcu_batch_empty(&sp->batch_done) &&
+	    rcu_batch_empty(&sp->batch_check1) &&
+	    rcu_batch_empty(&sp->batch_check0) &&
+	    rcu_batch_empty(&sp->batch_queue)) {
 		spin_lock_irq(&sp->queue_lock);
-		state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
-		if (rcu_segcblist_empty(&sp->srcu_cblist) &&
-		    state == SRCU_STATE_IDLE)
+		if (rcu_batch_empty(&sp->batch_done) &&
+		    rcu_batch_empty(&sp->batch_check1) &&
+		    rcu_batch_empty(&sp->batch_check0) &&
+		    rcu_batch_empty(&sp->batch_queue)) {
+			sp->running = false;
 			pending = false;
+		}
 		spin_unlock_irq(&sp->queue_lock);
 	}
 
 	if (pending)
-		queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
+		queue_delayed_work(system_power_efficient_wq,
+				   &sp->work, SRCU_INTERVAL);
 }
 
 /*
@@ -610,8 +654,9 @@ void process_srcu(struct work_struct *work)
 
 	sp = container_of(work, struct srcu_struct, work.work);
 
-	srcu_advance_batches(sp);
+	srcu_collect_new(sp);
+	srcu_advance_batches(sp, 1);
 	srcu_invoke_callbacks(sp);
-	srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL);
+	srcu_reschedule(sp);
 }
 EXPORT_SYMBOL_GPL(process_srcu);
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
new file mode 100644
index 000000000000..da676b0d016b
--- /dev/null
+++ b/kernel/rcu/srcutree.c
@@ -0,0 +1,613 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ * Copyright (C) Fujitsu, 2012
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ *	   Lai Jiangshan <laijs@cn.fujitsu.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ *		Documentation/RCU/ *.txt
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate_wait.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/srcu.h>
+
+#include <linux/rcu_node_tree.h>
+#include "rcu.h"
+
+static int init_srcu_struct_fields(struct srcu_struct *sp)
+{
+	sp->completed = 0;
+	sp->srcu_gp_seq = 0;
+	atomic_set(&sp->srcu_exp_cnt, 0);
+	spin_lock_init(&sp->queue_lock);
+	rcu_segcblist_init(&sp->srcu_cblist);
+	INIT_DELAYED_WORK(&sp->work, process_srcu);
+	sp->per_cpu_ref = alloc_percpu(struct srcu_array);
+	return sp->per_cpu_ref ? 0 : -ENOMEM;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+		       struct lock_class_key *key)
+{
+	/* Don't re-initialize a lock while it is held. */
+	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
+	lockdep_init_map(&sp->dep_map, name, key, 0);
+	return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(__init_srcu_struct);
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/**
+ * init_srcu_struct - initialize a sleep-RCU structure
+ * @sp: structure to initialize.
+ *
+ * Must invoke this on a given srcu_struct before passing that srcu_struct
+ * to any other function.  Each srcu_struct represents a separate domain
+ * of SRCU protection.
+ */
+int init_srcu_struct(struct srcu_struct *sp)
+{
+	return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(init_srcu_struct);
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/*
+ * Returns approximate total of the readers' ->lock_count[] values for the
+ * rank of per-CPU counters specified by idx.
+ */
+static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
+{
+	int cpu;
+	unsigned long sum = 0;
+
+	for_each_possible_cpu(cpu) {
+		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
+
+		sum += READ_ONCE(cpuc->lock_count[idx]);
+	}
+	return sum;
+}
+
+/*
+ * Returns approximate total of the readers' ->unlock_count[] values for the
+ * rank of per-CPU counters specified by idx.
+ */
+static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
+{
+	int cpu;
+	unsigned long sum = 0;
+
+	for_each_possible_cpu(cpu) {
+		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
+
+		sum += READ_ONCE(cpuc->unlock_count[idx]);
+	}
+	return sum;
+}
+
+/*
+ * Return true if the number of pre-existing readers is determined to
+ * be zero.
+ */
+static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
+{
+	unsigned long unlocks;
+
+	unlocks = srcu_readers_unlock_idx(sp, idx);
+
+	/*
+	 * Make sure that a lock is always counted if the corresponding
+	 * unlock is counted. Needs to be a smp_mb() as the read side may
+	 * contain a read from a variable that is written to before the
+	 * synchronize_srcu() in the write side. In this case smp_mb()s
+	 * A and B act like the store buffering pattern.
+	 *
+	 * This smp_mb() also pairs with smp_mb() C to prevent accesses
+	 * after the synchronize_srcu() from being executed before the
+	 * grace period ends.
+	 */
+	smp_mb(); /* A */
+
+	/*
+	 * If the locks are the same as the unlocks, then there must have
+	 * been no readers on this index at some time in between. This does
+	 * not mean that there are no more readers, as one could have read
+	 * the current index but not have incremented the lock counter yet.
+	 *
+	 * Possible bug: There is no guarantee that there haven't been
+	 * ULONG_MAX increments of ->lock_count[] since the unlocks were
+	 * counted, meaning that this could return true even if there are
+	 * still active readers.  Since there are no memory barriers around
+	 * srcu_flip(), the CPU is not required to increment ->completed
+	 * before running srcu_readers_unlock_idx(), which means that there
+	 * could be an arbitrarily large number of critical sections that
+	 * execute after srcu_readers_unlock_idx() but use the old value
+	 * of ->completed.
+	 */
+	return srcu_readers_lock_idx(sp, idx) == unlocks;
+}
+
+/**
+ * srcu_readers_active - returns true if there are readers. and false
+ *                       otherwise
+ * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
+ *
+ * Note that this is not an atomic primitive, and can therefore suffer
+ * severe errors when invoked on an active srcu_struct.  That said, it
+ * can be useful as an error check at cleanup time.
+ */
+static bool srcu_readers_active(struct srcu_struct *sp)
+{
+	int cpu;
+	unsigned long sum = 0;
+
+	for_each_possible_cpu(cpu) {
+		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
+
+		sum += READ_ONCE(cpuc->lock_count[0]);
+		sum += READ_ONCE(cpuc->lock_count[1]);
+		sum -= READ_ONCE(cpuc->unlock_count[0]);
+		sum -= READ_ONCE(cpuc->unlock_count[1]);
+	}
+	return sum;
+}
+
+#define SRCU_INTERVAL		1
+
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+	WARN_ON_ONCE(atomic_read(&sp->srcu_exp_cnt));
+	if (WARN_ON(srcu_readers_active(sp)))
+		return; /* Leakage unless caller handles error. */
+	if (WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)))
+		return; /* Leakage unless caller handles error. */
+	flush_delayed_work(&sp->work);
+	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE)) {
+		pr_info("cleanup_srcu_struct: Active srcu_struct %lu CBs %c state: %d\n", rcu_segcblist_n_cbs(&sp->srcu_cblist), ".E"[rcu_segcblist_empty(&sp->srcu_cblist)], rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
+		return; /* Caller forgot to stop doing call_srcu()? */
+	}
+	free_percpu(sp->per_cpu_ref);
+	sp->per_cpu_ref = NULL;
+}
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct.  Must be called from process context.
+ * Returns an index that must be passed to the matching srcu_read_unlock().
+ */
+int __srcu_read_lock(struct srcu_struct *sp)
+{
+	int idx;
+
+	idx = READ_ONCE(sp->completed) & 0x1;
+	__this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
+	smp_mb(); /* B */  /* Avoid leaking the critical section. */
+	return idx;
+}
+EXPORT_SYMBOL_GPL(__srcu_read_lock);
+
+/*
+ * Removes the count for the old reader from the appropriate per-CPU
+ * element of the srcu_struct.  Note that this may well be a different
+ * CPU than that which was incremented by the corresponding srcu_read_lock().
+ * Must be called from process context.
+ */
+void __srcu_read_unlock(struct srcu_struct *sp, int idx)
+{
+	smp_mb(); /* C */  /* Avoid leaking the critical section. */
+	this_cpu_inc(sp->per_cpu_ref->unlock_count[idx]);
+}
+EXPORT_SYMBOL_GPL(__srcu_read_unlock);
+
+/*
+ * We use an adaptive strategy for synchronize_srcu() and especially for
+ * synchronize_srcu_expedited().  We spin for a fixed time period
+ * (defined below) to allow SRCU readers to exit their read-side critical
+ * sections.  If there are still some readers after a few microseconds,
+ * we repeatedly block for 1-millisecond time periods.
+ */
+#define SRCU_RETRY_CHECK_DELAY		5
+
+/*
+ * Start an SRCU grace period.
+ */
+static void srcu_gp_start(struct srcu_struct *sp)
+{
+	int state;
+
+	rcu_segcblist_accelerate(&sp->srcu_cblist,
+				 rcu_seq_snap(&sp->srcu_gp_seq));
+	rcu_seq_start(&sp->srcu_gp_seq);
+	state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
+	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
+}
+
+/*
+ * Wait until all readers counted by array index idx complete, but
+ * loop an additional time if there is an expedited grace period pending.
+ * The caller must ensure that ->completed is not changed while checking.
+ */
+static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
+{
+	for (;;) {
+		if (srcu_readers_active_idx_check(sp, idx))
+			return true;
+		if (--trycount + !!atomic_read(&sp->srcu_exp_cnt) <= 0)
+			return false;
+		udelay(SRCU_RETRY_CHECK_DELAY);
+	}
+}
+
+/*
+ * Increment the ->completed counter so that future SRCU readers will
+ * use the other rank of the ->(un)lock_count[] arrays.  This allows
+ * us to wait for pre-existing readers in a starvation-free manner.
+ */
+static void srcu_flip(struct srcu_struct *sp)
+{
+	WRITE_ONCE(sp->completed, sp->completed + 1);
+
+	/*
+	 * Ensure that if the updater misses an __srcu_read_unlock()
+	 * increment, that task's next __srcu_read_lock() will see the
+	 * above counter update.  Note that both this memory barrier
+	 * and the one in srcu_readers_active_idx_check() provide the
+	 * guarantee for __srcu_read_lock().
+	 */
+	smp_mb(); /* D */  /* Pairs with C. */
+}
+
+/*
+ * End an SRCU grace period.
+ */
+static void srcu_gp_end(struct srcu_struct *sp)
+{
+	rcu_seq_end(&sp->srcu_gp_seq);
+
+	spin_lock_irq(&sp->queue_lock);
+	rcu_segcblist_advance(&sp->srcu_cblist,
+			      rcu_seq_current(&sp->srcu_gp_seq));
+	spin_unlock_irq(&sp->queue_lock);
+}
+
+/*
+ * Enqueue an SRCU callback on the specified srcu_struct structure,
+ * initiating grace-period processing if it is not already running.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing SRCU read-side critical section.  On systems with
+ * more than one CPU, this means that when "func()" is invoked, each CPU
+ * is guaranteed to have executed a full memory barrier since the end of
+ * its last corresponding SRCU read-side critical section whose beginning
+ * preceded the call to call_rcu().  It also means that each CPU executing
+ * an SRCU read-side critical section that continues beyond the start of
+ * "func()" must have executed a memory barrier after the call_rcu()
+ * but before the beginning of that SRCU read-side critical section.
+ * Note that these guarantees include CPUs that are offline, idle, or
+ * executing in user mode, as well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting SRCU callback function "func()", then both CPU A and CPU
+ * B are guaranteed to execute a full memory barrier during the time
+ * interval between the call to call_rcu() and the invocation of "func()".
+ * This guarantee applies even if CPU A and CPU B are the same CPU (but
+ * again only if the system has more than one CPU).
+ *
+ * Of course, these guarantees apply only for invocations of call_srcu(),
+ * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
+ * srcu_struct structure.
+ */
+void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
+	       rcu_callback_t func)
+{
+	unsigned long flags;
+
+	head->next = NULL;
+	head->func = func;
+	spin_lock_irqsave(&sp->queue_lock, flags);
+	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
+	rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
+	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_IDLE) {
+		srcu_gp_start(sp);
+		queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
+	}
+	spin_unlock_irqrestore(&sp->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(call_srcu);
+
+static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
+
+/*
+ * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
+ */
+static void __synchronize_srcu(struct srcu_struct *sp)
+{
+	struct rcu_synchronize rcu;
+	struct rcu_head *head = &rcu.head;
+
+	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
+			 lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
+
+	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
+		return;
+	might_sleep();
+	init_completion(&rcu.completion);
+
+	head->next = NULL;
+	head->func = wakeme_after_rcu;
+	spin_lock_irq(&sp->queue_lock);
+	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
+	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_IDLE) {
+		/* steal the processing owner */
+		rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
+		srcu_gp_start(sp);
+		spin_unlock_irq(&sp->queue_lock);
+		/* give the processing owner to work_struct */
+		srcu_reschedule(sp, 0);
+	} else {
+		rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
+		spin_unlock_irq(&sp->queue_lock);
+	}
+
+	wait_for_completion(&rcu.completion);
+	smp_mb(); /* Caller's later accesses after GP. */
+}
+
+/**
+ * synchronize_srcu_expedited - Brute-force SRCU grace period
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Wait for an SRCU grace period to elapse, but be more aggressive about
+ * spinning rather than blocking when waiting.
+ *
+ * Note that synchronize_srcu_expedited() has the same deadlock and
+ * memory-ordering properties as does synchronize_srcu().
+ */
+void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+	bool do_norm = rcu_gp_is_normal();
+
+	if (!do_norm) {
+		atomic_inc(&sp->srcu_exp_cnt);
+		smp_mb__after_atomic(); /* increment before GP. */
+	}
+	__synchronize_srcu(sp);
+	if (!do_norm) {
+		smp_mb__before_atomic(); /* GP before decrement. */
+		atomic_dec(&sp->srcu_exp_cnt);
+	}
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
+
+/**
+ * synchronize_srcu - wait for prior SRCU read-side critical-section completion
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Wait for the count to drain to zero of both indexes. To avoid the
+ * possible starvation of synchronize_srcu(), it waits for the count of
+ * the index=((->completed & 1) ^ 1) to drain to zero at first,
+ * and then flip the completed and wait for the count of the other index.
+ *
+ * Can block; must be called from process context.
+ *
+ * Note that it is illegal to call synchronize_srcu() from the corresponding
+ * SRCU read-side critical section; doing so will result in deadlock.
+ * However, it is perfectly legal to call synchronize_srcu() on one
+ * srcu_struct from some other srcu_struct's read-side critical section,
+ * as long as the resulting graph of srcu_structs is acyclic.
+ *
+ * There are memory-ordering constraints implied by synchronize_srcu().
+ * On systems with more than one CPU, when synchronize_srcu() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since
+ * the end of its last corresponding SRCU-sched read-side critical section
+ * whose beginning preceded the call to synchronize_srcu().  In addition,
+ * each CPU having an SRCU read-side critical section that extends beyond
+ * the return from synchronize_srcu() is guaranteed to have executed a
+ * full memory barrier after the beginning of synchronize_srcu() and before
+ * the beginning of that SRCU read-side critical section.  Note that these
+ * guarantees include CPUs that are offline, idle, or executing in user mode,
+ * as well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_srcu(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_srcu().  This guarantee applies even if CPU A and CPU B
+ * are the same CPU, but again only if the system has more than one CPU.
+ *
+ * Of course, these memory-ordering guarantees apply only when
+ * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
+ * passed the same srcu_struct structure.
+ */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+	if (rcu_gp_is_expedited())
+		synchronize_srcu_expedited(sp);
+	else
+		__synchronize_srcu(sp);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu);
+
+/**
+ * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
+ * @sp: srcu_struct on which to wait for in-flight callbacks.
+ */
+void srcu_barrier(struct srcu_struct *sp)
+{
+	synchronize_srcu(sp);
+}
+EXPORT_SYMBOL_GPL(srcu_barrier);
+
+/**
+ * srcu_batches_completed - return batches completed.
+ * @sp: srcu_struct on which to report batch completion.
+ *
+ * Report the number of batches, correlated with, but not necessarily
+ * precisely the same as, the number of grace periods that have elapsed.
+ */
+unsigned long srcu_batches_completed(struct srcu_struct *sp)
+{
+	return sp->completed;
+}
+EXPORT_SYMBOL_GPL(srcu_batches_completed);
+
+/*
+ * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
+ * ->batch_check1 and then to ->batch_done as readers drain.
+ */
+static void srcu_advance_batches(struct srcu_struct *sp)
+{
+	int idx;
+
+	/*
+	 * Because readers might be delayed for an extended period after
+	 * fetching ->completed for their index, at any point in time there
+	 * might well be readers using both idx=0 and idx=1.  We therefore
+	 * need to wait for readers to clear from both index values before
+	 * invoking a callback.
+	 *
+	 * The load-acquire ensures that we see the accesses performed
+	 * by the prior grace period.
+	 */
+	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
+	if (idx == SRCU_STATE_IDLE) {
+		spin_lock_irq(&sp->queue_lock);
+		if (rcu_segcblist_empty(&sp->srcu_cblist)) {
+			spin_unlock_irq(&sp->queue_lock);
+			return;
+		}
+		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
+		if (idx == SRCU_STATE_IDLE)
+			srcu_gp_start(sp);
+		spin_unlock_irq(&sp->queue_lock);
+		if (idx != SRCU_STATE_IDLE)
+			return; /* Someone else started the grace period. */
+	}
+
+	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
+		idx = 1 ^ (sp->completed & 1);
+		if (!try_check_zero(sp, idx, 1))
+			return; /* readers present, retry later. */
+		srcu_flip(sp);
+		rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2);
+	}
+
+	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
+
+		/*
+		 * SRCU read-side critical sections are normally short,
+		 * so check at least twice in quick succession after a flip.
+		 */
+		idx = 1 ^ (sp->completed & 1);
+		if (!try_check_zero(sp, idx, 2))
+			return; /* readers present, retry after later. */
+		srcu_gp_end(sp);
+	}
+}
+
+/*
+ * Invoke a limited number of SRCU callbacks that have passed through
+ * their grace period.  If there are more to do, SRCU will reschedule
+ * the workqueue.  Note that needed memory barriers have been executed
+ * in this task's context by srcu_readers_active_idx_check().
+ */
+static void srcu_invoke_callbacks(struct srcu_struct *sp)
+{
+	struct rcu_cblist ready_cbs;
+	struct rcu_head *rhp;
+
+	spin_lock_irq(&sp->queue_lock);
+	if (!rcu_segcblist_ready_cbs(&sp->srcu_cblist)) {
+		spin_unlock_irq(&sp->queue_lock);
+		return;
+	}
+	rcu_cblist_init(&ready_cbs);
+	rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs);
+	spin_unlock_irq(&sp->queue_lock);
+	rhp = rcu_cblist_dequeue(&ready_cbs);
+	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
+		local_bh_disable();
+		rhp->func(rhp);
+		local_bh_enable();
+	}
+	spin_lock_irq(&sp->queue_lock);
+	rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs);
+	spin_unlock_irq(&sp->queue_lock);
+}
+
+/*
+ * Finished one round of SRCU grace period.  Start another if there are
+ * more SRCU callbacks queued, otherwise put SRCU into not-running state.
+ */
+static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
+{
+	bool pending = true;
+	int state;
+
+	if (rcu_segcblist_empty(&sp->srcu_cblist)) {
+		spin_lock_irq(&sp->queue_lock);
+		state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
+		if (rcu_segcblist_empty(&sp->srcu_cblist) &&
+		    state == SRCU_STATE_IDLE)
+			pending = false;
+		spin_unlock_irq(&sp->queue_lock);
+	}
+
+	if (pending)
+		queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
+}
+
+/*
+ * This is the work-queue function that handles SRCU grace periods.
+ */
+void process_srcu(struct work_struct *work)
+{
+	struct srcu_struct *sp;
+
+	sp = container_of(work, struct srcu_struct, work.work);
+
+	srcu_advance_batches(sp);
+	srcu_invoke_callbacks(sp);
+	srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL);
+}
+EXPORT_SYMBOL_GPL(process_srcu);
-- 
cgit v1.2.3


From 5f0d5a3ae7cff0d7fa943c199c3a2e44f23e1fac Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 18 Jan 2017 02:53:44 -0800
Subject: mm: Rename SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU

A group of Linux kernel hackers reported chasing a bug that resulted
from their assumption that SLAB_DESTROY_BY_RCU provided an existence
guarantee, that is, that no block from such a slab would be reallocated
during an RCU read-side critical section.  Of course, that is not the
case.  Instead, SLAB_DESTROY_BY_RCU only prevents freeing of an entire
slab of blocks.

However, there is a phrase for this, namely "type safety".  This commit
therefore renames SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU in order
to avoid future instances of this sort of confusion.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: <linux-mm@kvack.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
[ paulmck: Add comments mentioning the old name, as requested by Eric
  Dumazet, in order to help people familiar with the old name find
  the new one. ]
Acked-by: David Rientjes <rientjes@google.com>
---
 Documentation/RCU/00-INDEX                      |  2 +-
 Documentation/RCU/rculist_nulls.txt             |  6 +++---
 Documentation/RCU/whatisRCU.txt                 |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c                 |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.h         |  2 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c |  2 +-
 fs/jbd2/journal.c                               |  2 +-
 fs/signalfd.c                                   |  2 +-
 include/linux/dma-fence.h                       |  4 ++--
 include/linux/slab.h                            |  6 ++++--
 include/net/sock.h                              |  2 +-
 kernel/fork.c                                   |  4 ++--
 kernel/signal.c                                 |  2 +-
 mm/kasan/kasan.c                                |  6 +++---
 mm/kmemcheck.c                                  |  2 +-
 mm/rmap.c                                       |  4 ++--
 mm/slab.c                                       |  6 +++---
 mm/slab.h                                       |  4 ++--
 mm/slab_common.c                                |  6 +++---
 mm/slob.c                                       |  6 +++---
 mm/slub.c                                       | 12 ++++++------
 net/dccp/ipv4.c                                 |  2 +-
 net/dccp/ipv6.c                                 |  2 +-
 net/ipv4/tcp_ipv4.c                             |  2 +-
 net/ipv6/tcp_ipv6.c                             |  2 +-
 net/llc/af_llc.c                                |  2 +-
 net/llc/llc_conn.c                              |  4 ++--
 net/llc/llc_sap.c                               |  2 +-
 net/netfilter/nf_conntrack_core.c               |  8 ++++----
 net/smc/af_smc.c                                |  2 +-
 30 files changed, 57 insertions(+), 54 deletions(-)

diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index f773a264ae02..1672573b037a 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -17,7 +17,7 @@ rcu_dereference.txt
 rcubarrier.txt
 	- RCU and Unloadable Modules
 rculist_nulls.txt
-	- RCU list primitives for use with SLAB_DESTROY_BY_RCU
+	- RCU list primitives for use with SLAB_TYPESAFE_BY_RCU
 rcuref.txt
 	- Reference-count design for elements of lists/arrays protected by RCU
 rcu.txt
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 18f9651ff23d..8151f0195f76 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -1,5 +1,5 @@
 Using hlist_nulls to protect read-mostly linked lists and
-objects using SLAB_DESTROY_BY_RCU allocations.
+objects using SLAB_TYPESAFE_BY_RCU allocations.
 
 Please read the basics in Documentation/RCU/listRCU.txt
 
@@ -7,7 +7,7 @@ Using special makers (called 'nulls') is a convenient way
 to solve following problem :
 
 A typical RCU linked list managing objects which are
-allocated with SLAB_DESTROY_BY_RCU kmem_cache can
+allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
 use following algos :
 
 1) Lookup algo
@@ -96,7 +96,7 @@ unlock_chain(); // typically a spin_unlock()
 3) Remove algo
 --------------
 Nothing special here, we can use a standard RCU hlist deletion.
-But thanks to SLAB_DESTROY_BY_RCU, beware a deleted object can be reused
+But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
 very very fast (before the end of RCU grace period)
 
 if (put_last_reference_on(obj) {
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 5cbd8b2395b8..91c912e86915 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -925,7 +925,8 @@ d.	Do you need RCU grace periods to complete even in the face
 
 e.	Is your workload too update-intensive for normal use of
 	RCU, but inappropriate for other synchronization mechanisms?
-	If so, consider SLAB_DESTROY_BY_RCU.  But please be careful!
+	If so, consider SLAB_TYPESAFE_BY_RCU (which was originally
+	named SLAB_DESTROY_BY_RCU).  But please be careful!
 
 f.	Do you need read-side critical sections that are respected
 	even though they are in the middle of the idle loop, during
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6908123162d1..3b668895ac24 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4552,7 +4552,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
 					SLAB_HWCACHE_ALIGN |
 					SLAB_RECLAIM_ACCOUNT |
-					SLAB_DESTROY_BY_RCU);
+					SLAB_TYPESAFE_BY_RCU);
 	if (!dev_priv->requests)
 		goto err_vmas;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index ea511f06efaf..9ee2750e1dde 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -493,7 +493,7 @@ static inline struct drm_i915_gem_request *
 __i915_gem_active_get_rcu(const struct i915_gem_active *active)
 {
 	/* Performing a lockless retrieval of the active request is super
-	 * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing
+	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
 	 * slab of request objects will not be freed whilst we hold the
 	 * RCU read lock. It does not guarantee that the request itself
 	 * will not be freed and then *reused*. Viz,
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index 12647af5a336..e7fb47e84a93 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -1071,7 +1071,7 @@ int ldlm_init(void)
 	ldlm_lock_slab = kmem_cache_create("ldlm_locks",
 					   sizeof(struct ldlm_lock), 0,
 					   SLAB_HWCACHE_ALIGN |
-					   SLAB_DESTROY_BY_RCU, NULL);
+					   SLAB_TYPESAFE_BY_RCU, NULL);
 	if (!ldlm_lock_slab) {
 		kmem_cache_destroy(ldlm_resource_slab);
 		return -ENOMEM;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a1a359bfcc9c..7f8f962454e5 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2340,7 +2340,7 @@ static int jbd2_journal_init_journal_head_cache(void)
 	jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
 				sizeof(struct journal_head),
 				0,		/* offset */
-				SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
+				SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU,
 				NULL);		/* ctor */
 	retval = 0;
 	if (!jbd2_journal_head_cache) {
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 270221fcef42..7e3d71109f51 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -38,7 +38,7 @@ void signalfd_cleanup(struct sighand_struct *sighand)
 	/*
 	 * The lockless check can race with remove_wait_queue() in progress,
 	 * but in this case its caller should run under rcu_read_lock() and
-	 * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
+	 * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
 	 */
 	if (likely(!waitqueue_active(wqh)))
 		return;
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 6048fa404e57..a5195a7d6f77 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -229,7 +229,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence)
  *
  * Function returns NULL if no refcount could be obtained, or the fence.
  * This function handles acquiring a reference to a fence that may be
- * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU),
+ * reallocated within the RCU grace period (such as with SLAB_TYPESAFE_BY_RCU),
  * so long as the caller is using RCU on the pointer to the fence.
  *
  * An alternative mechanism is to employ a seqlock to protect a bunch of
@@ -257,7 +257,7 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep)
 		 * have successfully acquire a reference to it. If it no
 		 * longer matches, we are holding a reference to some other
 		 * reallocated pointer. This is possible if the allocator
-		 * is using a freelist like SLAB_DESTROY_BY_RCU where the
+		 * is using a freelist like SLAB_TYPESAFE_BY_RCU where the
 		 * fence remains valid for the RCU grace period, but it
 		 * may be reallocated. When using such allocators, we are
 		 * responsible for ensuring the reference we get is to
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3c37a8c51921..04a7f7993e67 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -28,7 +28,7 @@
 #define SLAB_STORE_USER		0x00010000UL	/* DEBUG: Store the last owner for bug hunting */
 #define SLAB_PANIC		0x00040000UL	/* Panic if kmem_cache_create() fails */
 /*
- * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS!
+ * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
  *
  * This delays freeing the SLAB page by a grace period, it does _NOT_
  * delay object freeing. This means that if you do kmem_cache_free()
@@ -61,8 +61,10 @@
  *
  * rcu_read_lock before reading the address, then rcu_read_unlock after
  * taking the spinlock within the structure expected at that address.
+ *
+ * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
  */
-#define SLAB_DESTROY_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
+#define SLAB_TYPESAFE_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
 #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */
 #define SLAB_TRACE		0x00200000UL	/* Trace allocations and frees */
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 5e5997654db6..59cdccaa30e7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -993,7 +993,7 @@ struct smc_hashinfo;
 struct module;
 
 /*
- * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes
  * un-modified. Special care is taken when initializing object to zero.
  */
 static inline void sk_prot_clear_nulls(struct sock *sk, int size)
diff --git a/kernel/fork.c b/kernel/fork.c
index 6c463c80e93d..9330ce24f1bb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1313,7 +1313,7 @@ void __cleanup_sighand(struct sighand_struct *sighand)
 	if (atomic_dec_and_test(&sighand->count)) {
 		signalfd_cleanup(sighand);
 		/*
-		 * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it
+		 * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
 		 * without an RCU grace period, see __lock_task_sighand().
 		 */
 		kmem_cache_free(sighand_cachep, sighand);
@@ -2144,7 +2144,7 @@ void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
 			SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
diff --git a/kernel/signal.c b/kernel/signal.c
index 7e59ebc2c25e..6df5f72158e4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1237,7 +1237,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
 		}
 		/*
 		 * This sighand can be already freed and even reused, but
-		 * we rely on SLAB_DESTROY_BY_RCU and sighand_ctor() which
+		 * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which
 		 * initializes ->siglock: this slab can't go away, it has
 		 * the same object type, ->siglock can't be reinitialized.
 		 *
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 98b27195e38b..4b20061102f6 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -413,7 +413,7 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size,
 	*size += sizeof(struct kasan_alloc_meta);
 
 	/* Add free meta. */
-	if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor ||
+	if (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor ||
 	    cache->object_size < sizeof(struct kasan_free_meta)) {
 		cache->kasan_info.free_meta_offset = *size;
 		*size += sizeof(struct kasan_free_meta);
@@ -561,7 +561,7 @@ static void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
 	unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
 
 	/* RCU slabs could be legally used after free within the RCU period */
-	if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+	if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
 		return;
 
 	kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
@@ -572,7 +572,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object)
 	s8 shadow_byte;
 
 	/* RCU slabs could be legally used after free within the RCU period */
-	if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+	if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
 		return false;
 
 	shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object));
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index 5bf191756a4a..2d5959c5f7c5 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -95,7 +95,7 @@ void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
 void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size)
 {
 	/* TODO: RCU freeing is unsupported for now; hide false positives. */
-	if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU))
+	if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU))
 		kmemcheck_mark_freed(object, size);
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 49ed681ccc7b..8ffd59df8a3f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -430,7 +430,7 @@ static void anon_vma_ctor(void *data)
 void __init anon_vma_init(void)
 {
 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
-			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
+			0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
 			anon_vma_ctor);
 	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
 			SLAB_PANIC|SLAB_ACCOUNT);
@@ -481,7 +481,7 @@ struct anon_vma *page_get_anon_vma(struct page *page)
 	 * If this page is still mapped, then its anon_vma cannot have been
 	 * freed.  But if it has been unmapped, we have no security against the
 	 * anon_vma structure being freed and reused (for another anon_vma:
-	 * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
+	 * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
 	 * above cannot corrupt).
 	 */
 	if (!page_mapped(page)) {
diff --git a/mm/slab.c b/mm/slab.c
index 807d86c76908..93c827864862 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1728,7 +1728,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
 
 	freelist = page->freelist;
 	slab_destroy_debugcheck(cachep, page);
-	if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
+	if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
 		call_rcu(&page->rcu_head, kmem_rcu_free);
 	else
 		kmem_freepages(cachep, page);
@@ -1924,7 +1924,7 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
 
 	cachep->num = 0;
 
-	if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU)
+	if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
 		return false;
 
 	left = calculate_slab_order(cachep, size,
@@ -2030,7 +2030,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
 						2 * sizeof(unsigned long long)))
 		flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
-	if (!(flags & SLAB_DESTROY_BY_RCU))
+	if (!(flags & SLAB_TYPESAFE_BY_RCU))
 		flags |= SLAB_POISON;
 #endif
 #endif
diff --git a/mm/slab.h b/mm/slab.h
index 65e7c3fcac72..9cfcf099709c 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -126,7 +126,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
 
 /* Legal flag mask for kmem_cache_create(), for various configurations */
 #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS )
+			 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
 
 #if defined(CONFIG_DEBUG_SLAB)
 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
@@ -415,7 +415,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
 	 * back there or track user information then we can
 	 * only use the space before that information.
 	 */
-	if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
+	if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
 		return s->inuse;
 	/*
 	 * Else we can use all the padding etc for the allocation
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 09d0e849b07f..01a0fe2eb332 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -39,7 +39,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
  * Set of flags that will prevent slab merging
  */
 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
+		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
 		SLAB_FAILSLAB | SLAB_KASAN)
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
@@ -500,7 +500,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
 	struct kmem_cache *s, *s2;
 
 	/*
-	 * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the
+	 * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
 	 * @slab_caches_to_rcu_destroy list.  The slab pages are freed
 	 * through RCU and and the associated kmem_cache are dereferenced
 	 * while freeing the pages, so the kmem_caches should be freed only
@@ -537,7 +537,7 @@ static int shutdown_cache(struct kmem_cache *s)
 	memcg_unlink_cache(s);
 	list_del(&s->list);
 
-	if (s->flags & SLAB_DESTROY_BY_RCU) {
+	if (s->flags & SLAB_TYPESAFE_BY_RCU) {
 		list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
 		schedule_work(&slab_caches_to_rcu_destroy_work);
 	} else {
diff --git a/mm/slob.c b/mm/slob.c
index eac04d4357ec..1bae78d71096 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -126,7 +126,7 @@ static inline void clear_slob_page_free(struct page *sp)
 
 /*
  * struct slob_rcu is inserted at the tail of allocated slob blocks, which
- * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
+ * were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free
  * the block using call_rcu.
  */
 struct slob_rcu {
@@ -524,7 +524,7 @@ EXPORT_SYMBOL(ksize);
 
 int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)
 {
-	if (flags & SLAB_DESTROY_BY_RCU) {
+	if (flags & SLAB_TYPESAFE_BY_RCU) {
 		/* leave room for rcu footer at the end of object */
 		c->size += sizeof(struct slob_rcu);
 	}
@@ -598,7 +598,7 @@ static void kmem_rcu_free(struct rcu_head *head)
 void kmem_cache_free(struct kmem_cache *c, void *b)
 {
 	kmemleak_free_recursive(b, c->flags);
-	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
+	if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
 		struct slob_rcu *slob_rcu;
 		slob_rcu = b + (c->size - sizeof(struct slob_rcu));
 		slob_rcu->size = c->size;
diff --git a/mm/slub.c b/mm/slub.c
index 7f4bc7027ed5..57e5156f02be 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1687,7 +1687,7 @@ static void rcu_free_slab(struct rcu_head *h)
 
 static void free_slab(struct kmem_cache *s, struct page *page)
 {
-	if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
+	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
 		struct rcu_head *head;
 
 		if (need_reserve_slab_rcu) {
@@ -2963,7 +2963,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
 	 * slab_free_freelist_hook() could have put the items into quarantine.
 	 * If so, no need to free them.
 	 */
-	if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU))
+	if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU))
 		return;
 	do_slab_free(s, page, head, tail, cnt, addr);
 }
@@ -3433,7 +3433,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	 * the slab may touch the object after free or before allocation
 	 * then we should never poison the object itself.
 	 */
-	if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
+	if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
 			!s->ctor)
 		s->flags |= __OBJECT_POISON;
 	else
@@ -3455,7 +3455,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	 */
 	s->inuse = size;
 
-	if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
+	if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
 		s->ctor)) {
 		/*
 		 * Relocate free pointer after the object if it is not
@@ -3537,7 +3537,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
 	s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
 	s->reserved = 0;
 
-	if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
+	if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU))
 		s->reserved = sizeof(struct rcu_head);
 
 	if (!calculate_sizes(s, -1))
@@ -5042,7 +5042,7 @@ SLAB_ATTR_RO(cache_dma);
 
 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
 {
-	return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
+	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
 }
 SLAB_ATTR_RO(destroy_by_rcu);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 409d0cfd3447..90210a0e3888 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -950,7 +950,7 @@ static struct proto dccp_v4_prot = {
 	.orphan_count		= &dccp_orphan_count,
 	.max_header		= MAX_DCCP_HEADER,
 	.obj_size		= sizeof(struct dccp_sock),
-	.slab_flags		= SLAB_DESTROY_BY_RCU,
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.rsk_prot		= &dccp_request_sock_ops,
 	.twsk_prot		= &dccp_timewait_sock_ops,
 	.h.hashinfo		= &dccp_hashinfo,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 233b57367758..b4019a5e4551 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1012,7 +1012,7 @@ static struct proto dccp_v6_prot = {
 	.orphan_count	   = &dccp_orphan_count,
 	.max_header	   = MAX_DCCP_HEADER,
 	.obj_size	   = sizeof(struct dccp6_sock),
-	.slab_flags	   = SLAB_DESTROY_BY_RCU,
+	.slab_flags	   = SLAB_TYPESAFE_BY_RCU,
 	.rsk_prot	   = &dccp6_request_sock_ops,
 	.twsk_prot	   = &dccp6_timewait_sock_ops,
 	.h.hashinfo	   = &dccp_hashinfo,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9a89b8deafae..82c89abeb989 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2398,7 +2398,7 @@ struct proto tcp_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
-	.slab_flags		= SLAB_DESTROY_BY_RCU,
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 60a5295a7de6..bdbc4327ebee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1919,7 +1919,7 @@ struct proto tcpv6_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
-	.slab_flags		= SLAB_DESTROY_BY_RCU,
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 06186d608a27..d096ca563054 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -142,7 +142,7 @@ static struct proto llc_proto = {
 	.name	  = "LLC",
 	.owner	  = THIS_MODULE,
 	.obj_size = sizeof(struct llc_sock),
-	.slab_flags = SLAB_DESTROY_BY_RCU,
+	.slab_flags = SLAB_TYPESAFE_BY_RCU,
 };
 
 /**
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 8bc5a1bd2d45..9b02c13d258b 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -506,7 +506,7 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap,
 again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_estab_match(sap, daddr, laddr, rc)) {
-			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
@@ -565,7 +565,7 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap,
 again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_listener_match(sap, laddr, rc)) {
-			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 5404d0d195cc..63b6ab056370 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -328,7 +328,7 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap,
 again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_dgram_match(sap, laddr, rc)) {
-			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 071b97fcbefb..fdcdac7916b2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -914,7 +914,7 @@ static unsigned int early_drop_list(struct net *net,
 			continue;
 
 		/* kill only if still in same netns -- might have moved due to
-		 * SLAB_DESTROY_BY_RCU rules.
+		 * SLAB_TYPESAFE_BY_RCU rules.
 		 *
 		 * We steal the timer reference.  If that fails timer has
 		 * already fired or someone else deleted it. Just drop ref
@@ -1069,7 +1069,7 @@ __nf_conntrack_alloc(struct net *net,
 
 	/*
 	 * Do not use kmem_cache_zalloc(), as this cache uses
-	 * SLAB_DESTROY_BY_RCU.
+	 * SLAB_TYPESAFE_BY_RCU.
 	 */
 	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
 	if (ct == NULL)
@@ -1114,7 +1114,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	struct net *net = nf_ct_net(ct);
 
 	/* A freed object has refcnt == 0, that's
-	 * the golden rule for SLAB_DESTROY_BY_RCU
+	 * the golden rule for SLAB_TYPESAFE_BY_RCU
 	 */
 	NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
 
@@ -1878,7 +1878,7 @@ int nf_conntrack_init_start(void)
 	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
 						sizeof(struct nf_conn),
 						NFCT_INFOMASK + 1,
-						SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
+						SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
 	if (!nf_conntrack_cachep)
 		goto err_cachep;
 
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 85837ab90e89..d34bbd6d8f38 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -101,7 +101,7 @@ struct proto smc_proto = {
 	.unhash		= smc_unhash_sk,
 	.obj_size	= sizeof(struct smc_sock),
 	.h.smc_hash	= &smc_v4_hashinfo,
-	.slab_flags	= SLAB_DESTROY_BY_RCU,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
 };
 EXPORT_SYMBOL_GPL(smc_proto);
 
-- 
cgit v1.2.3


From 9adddb01ce5f71ae5c26b8aff01335d40be55133 Mon Sep 17 00:00:00 2001
From: Alexey Firago <alexey_firago@mentor.com>
Date: Fri, 7 Apr 2017 12:12:22 +0300
Subject: clk: vc5: Add structure to describe particular chip features

Introduce vc5_chip_info structure to describe features of a particular
VC5 chip (id, number of FODs, number of outputs, flags).
For now flags are only used to indicate if chip has internal XTAL.
vc5_chip_info is set on probe from the matched of_device_id->data.

Also add defines to specify maximum number of FODs and clock outputs
supported by the driver.

With these changes it should be easier to extend driver to support
more VC5 models.

Signed-off-by: Alexey Firago <alexey_firago@mentor.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-versaclock5.c | 65 +++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c
index 56741f3cf0a3..2b1cc699347c 100644
--- a/drivers/clk/clk-versaclock5.c
+++ b/drivers/clk/clk-versaclock5.c
@@ -113,12 +113,30 @@
 #define VC5_MUX_IN_XIN		BIT(0)
 #define VC5_MUX_IN_CLKIN	BIT(1)
 
+/* Maximum number of clk_out supported by this driver */
+#define VC5_MAX_CLK_OUT_NUM	3
+
+/* Maximum number of FODs supported by this driver */
+#define VC5_MAX_FOD_NUM	2
+
+/* flags to describe chip features */
+/* chip has built-in oscilator */
+#define VC5_HAS_INTERNAL_XTAL	BIT(0)
+
 /* Supported IDT VC5 models. */
 enum vc5_model {
 	IDT_VC5_5P49V5923,
 	IDT_VC5_5P49V5933,
 };
 
+/* Structure to describe features of a particular VC5 model */
+struct vc5_chip_info {
+	const enum vc5_model	model;
+	const unsigned int	clk_fod_cnt;
+	const unsigned int	clk_out_cnt;
+	const u32		flags;
+};
+
 struct vc5_driver_data;
 
 struct vc5_hw_data {
@@ -132,15 +150,15 @@ struct vc5_hw_data {
 struct vc5_driver_data {
 	struct i2c_client	*client;
 	struct regmap		*regmap;
-	enum vc5_model		model;
+	const struct vc5_chip_info	*chip_info;
 
 	struct clk		*pin_xin;
 	struct clk		*pin_clkin;
 	unsigned char		clk_mux_ins;
 	struct clk_hw		clk_mux;
 	struct vc5_hw_data	clk_pll;
-	struct vc5_hw_data	clk_fod[2];
-	struct vc5_hw_data	clk_out[3];
+	struct vc5_hw_data	clk_fod[VC5_MAX_FOD_NUM];
+	struct vc5_hw_data	clk_out[VC5_MAX_CLK_OUT_NUM];
 };
 
 static const char * const vc5_mux_names[] = {
@@ -563,7 +581,7 @@ static struct clk_hw *vc5_of_clk_get(struct of_phandle_args *clkspec,
 	struct vc5_driver_data *vc5 = data;
 	unsigned int idx = clkspec->args[0];
 
-	if (idx > 2)
+	if (idx >= vc5->chip_info->clk_out_cnt)
 		return ERR_PTR(-EINVAL);
 
 	return &vc5->clk_out[idx].hw;
@@ -586,12 +604,10 @@ static const struct of_device_id clk_vc5_of_match[];
 static int vc5_probe(struct i2c_client *client,
 		     const struct i2c_device_id *id)
 {
-	const struct of_device_id *of_id =
-		of_match_device(clk_vc5_of_match, &client->dev);
 	struct vc5_driver_data *vc5;
 	struct clk_init_data init;
 	const char *parent_names[2];
-	unsigned int n, idx;
+	unsigned int n, idx = 0;
 	int ret;
 
 	vc5 = devm_kzalloc(&client->dev, sizeof(*vc5), GFP_KERNEL);
@@ -600,7 +616,7 @@ static int vc5_probe(struct i2c_client *client,
 
 	i2c_set_clientdata(client, vc5);
 	vc5->client = client;
-	vc5->model = (enum vc5_model)of_id->data;
+	vc5->chip_info = of_device_get_match_data(&client->dev);
 
 	vc5->pin_xin = devm_clk_get(&client->dev, "xin");
 	if (PTR_ERR(vc5->pin_xin) == -EPROBE_DEFER)
@@ -622,8 +638,7 @@ static int vc5_probe(struct i2c_client *client,
 	if (!IS_ERR(vc5->pin_xin)) {
 		vc5->clk_mux_ins |= VC5_MUX_IN_XIN;
 		parent_names[init.num_parents++] = __clk_get_name(vc5->pin_xin);
-	} else if (vc5->model == IDT_VC5_5P49V5933) {
-		/* IDT VC5 5P49V5933 has built-in oscilator. */
+	} else if (vc5->chip_info->flags & VC5_HAS_INTERNAL_XTAL) {
 		vc5->pin_xin = clk_register_fixed_rate(&client->dev,
 						       "internal-xtal", NULL,
 						       0, 25000000);
@@ -672,8 +687,8 @@ static int vc5_probe(struct i2c_client *client,
 	}
 
 	/* Register FODs */
-	for (n = 0; n < 2; n++) {
-		idx = vc5_map_index_to_output(vc5->model, n);
+	for (n = 0; n < vc5->chip_info->clk_fod_cnt; n++) {
+		idx = vc5_map_index_to_output(vc5->chip_info->model, n);
 		memset(&init, 0, sizeof(init));
 		init.name = vc5_fod_names[idx];
 		init.ops = &vc5_fod_ops;
@@ -709,8 +724,8 @@ static int vc5_probe(struct i2c_client *client,
 	}
 
 	/* Register FOD-connected OUTx outputs */
-	for (n = 1; n < 3; n++) {
-		idx = vc5_map_index_to_output(vc5->model, n - 1);
+	for (n = 1; n < vc5->chip_info->clk_out_cnt; n++) {
+		idx = vc5_map_index_to_output(vc5->chip_info->model, n - 1);
 		parent_names[0] = vc5_fod_names[idx];
 		if (n == 1)
 			parent_names[1] = vc5_mux_names[0];
@@ -744,7 +759,7 @@ static int vc5_probe(struct i2c_client *client,
 	return 0;
 
 err_clk:
-	if (vc5->model == IDT_VC5_5P49V5933)
+	if (vc5->chip_info->flags & VC5_HAS_INTERNAL_XTAL)
 		clk_unregister_fixed_rate(vc5->pin_xin);
 	return ret;
 }
@@ -755,12 +770,26 @@ static int vc5_remove(struct i2c_client *client)
 
 	of_clk_del_provider(client->dev.of_node);
 
-	if (vc5->model == IDT_VC5_5P49V5933)
+	if (vc5->chip_info->flags & VC5_HAS_INTERNAL_XTAL)
 		clk_unregister_fixed_rate(vc5->pin_xin);
 
 	return 0;
 }
 
+static const struct vc5_chip_info idt_5p49v5923_info = {
+	.model = IDT_VC5_5P49V5923,
+	.clk_fod_cnt = 2,
+	.clk_out_cnt = 3,
+	.flags = 0,
+};
+
+static const struct vc5_chip_info idt_5p49v5933_info = {
+	.model = IDT_VC5_5P49V5933,
+	.clk_fod_cnt = 2,
+	.clk_out_cnt = 3,
+	.flags = VC5_HAS_INTERNAL_XTAL,
+};
+
 static const struct i2c_device_id vc5_id[] = {
 	{ "5p49v5923", .driver_data = IDT_VC5_5P49V5923 },
 	{ "5p49v5933", .driver_data = IDT_VC5_5P49V5933 },
@@ -769,8 +798,8 @@ static const struct i2c_device_id vc5_id[] = {
 MODULE_DEVICE_TABLE(i2c, vc5_id);
 
 static const struct of_device_id clk_vc5_of_match[] = {
-	{ .compatible = "idt,5p49v5923", .data = (void *)IDT_VC5_5P49V5923 },
-	{ .compatible = "idt,5p49v5933", .data = (void *)IDT_VC5_5P49V5933 },
+	{ .compatible = "idt,5p49v5923", .data = &idt_5p49v5923_info },
+	{ .compatible = "idt,5p49v5933", .data = &idt_5p49v5933_info },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, clk_vc5_of_match);
-- 
cgit v1.2.3


From a330b6f5d1aee83fb2554d00ad33877b91a74c0c Mon Sep 17 00:00:00 2001
From: Alexey Firago <alexey_firago@mentor.com>
Date: Fri, 7 Apr 2017 12:12:23 +0300
Subject: clk: vc5: Add bindings for IDT VersaClock 5P49V5935

IDT VersaClock 5 5P49V5935 has 4 clock outputs, 4 fractional dividers.
Input clock source can be taken from either integrated crystal or from
external reference clock.

Signed-off-by: Alexey Firago <alexey_firago@mentor.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 .../devicetree/bindings/clock/idt,versaclock5.txt        | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt
index 87e9c47a89a3..53d7e50ed875 100644
--- a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt
+++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt
@@ -6,18 +6,21 @@ from 3 to 12 output clocks.
 ==I2C device node==
 
 Required properties:
-- compatible:	shall be one of "idt,5p49v5923" , "idt,5p49v5933".
+- compatible:	shall be one of "idt,5p49v5923" , "idt,5p49v5933" ,
+		"idt,5p49v5935".
 - reg:		i2c device address, shall be 0x68 or 0x6a.
 - #clock-cells:	from common clock binding; shall be set to 1.
 - clocks:	from common clock binding; list of parent clock handles,
 		- 5p49v5923: (required) either or both of XTAL or CLKIN
 					reference clock.
-		- 5p49v5933: (optional) property not present (internal
+		- 5p49v5933 and
+		- 5p49v5935: (optional) property not present (internal
 					Xtal used) or CLKIN reference
 					clock.
 - clock-names:	from common clock binding; clock input names, can be
 		- 5p49v5923: (required) either or both of "xin", "clkin".
-		- 5p49v5933: (optional) property not present or "clkin".
+		- 5p49v5933 and
+		- 5p49v5935: (optional) property not present or "clkin".
 
 ==Mapping between clock specifier and physical pins==
 
@@ -34,6 +37,13 @@ clock specifier, the following mapping applies:
 	1 -- OUT1
 	2 -- OUT4
 
+5P49V5935:
+	0 -- OUT0_SEL_I2CB
+	1 -- OUT1
+	2 -- OUT2
+	3 -- OUT3
+	4 -- OUT4
+
 ==Example==
 
 /* 25MHz reference crystal */
-- 
cgit v1.2.3


From 1193e14fddbb5abe4fed49874ab3aab5d4f33b2c Mon Sep 17 00:00:00 2001
From: Alexey Firago <alexey_firago@mentor.com>
Date: Fri, 7 Apr 2017 12:12:24 +0300
Subject: clk: vc5: Add support for IDT VersaClock 5P49V5935

Update IDT VersaClock 5 driver to support 5P49V5935. This chip has
two clock inputs (internal XTAL or external CLKIN), four fractional
dividers (FODs) and five clock outputs (four universal clock outputs
and one reference clock output at OUT0_SELB_I2C).

Current driver supports up to 2 FODs and up to 3 clock outputs. This
patch sets max number of supported FODs to 4 and max number of supported
clock outputs to 5.

Signed-off-by: Alexey Firago <alexey_firago@mentor.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-versaclock5.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c
index 2b1cc699347c..ea7d552a2f2b 100644
--- a/drivers/clk/clk-versaclock5.c
+++ b/drivers/clk/clk-versaclock5.c
@@ -114,10 +114,10 @@
 #define VC5_MUX_IN_CLKIN	BIT(1)
 
 /* Maximum number of clk_out supported by this driver */
-#define VC5_MAX_CLK_OUT_NUM	3
+#define VC5_MAX_CLK_OUT_NUM	5
 
 /* Maximum number of FODs supported by this driver */
-#define VC5_MAX_FOD_NUM	2
+#define VC5_MAX_FOD_NUM	4
 
 /* flags to describe chip features */
 /* chip has built-in oscilator */
@@ -127,6 +127,7 @@
 enum vc5_model {
 	IDT_VC5_5P49V5923,
 	IDT_VC5_5P49V5933,
+	IDT_VC5_5P49V5935,
 };
 
 /* Structure to describe features of a particular VC5 model */
@@ -594,6 +595,7 @@ static int vc5_map_index_to_output(const enum vc5_model model,
 	case IDT_VC5_5P49V5933:
 		return (n == 0) ? 0 : 3;
 	case IDT_VC5_5P49V5923:
+	case IDT_VC5_5P49V5935:
 	default:
 		return n;
 	}
@@ -790,9 +792,17 @@ static const struct vc5_chip_info idt_5p49v5933_info = {
 	.flags = VC5_HAS_INTERNAL_XTAL,
 };
 
+static const struct vc5_chip_info idt_5p49v5935_info = {
+	.model = IDT_VC5_5P49V5935,
+	.clk_fod_cnt = 4,
+	.clk_out_cnt = 5,
+	.flags = VC5_HAS_INTERNAL_XTAL,
+};
+
 static const struct i2c_device_id vc5_id[] = {
 	{ "5p49v5923", .driver_data = IDT_VC5_5P49V5923 },
 	{ "5p49v5933", .driver_data = IDT_VC5_5P49V5933 },
+	{ "5p49v5935", .driver_data = IDT_VC5_5P49V5935 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, vc5_id);
@@ -800,6 +810,7 @@ MODULE_DEVICE_TABLE(i2c, vc5_id);
 static const struct of_device_id clk_vc5_of_match[] = {
 	{ .compatible = "idt,5p49v5923", .data = &idt_5p49v5923_info },
 	{ .compatible = "idt,5p49v5933", .data = &idt_5p49v5933_info },
+	{ .compatible = "idt,5p49v5935", .data = &idt_5p49v5935_info },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, clk_vc5_of_match);
-- 
cgit v1.2.3


From 9a6e904226b26716424bf4ad5b69d591b68119fb Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Tue, 11 Apr 2017 10:37:48 +0800
Subject: clk: clk-imx7d: fix ahb clk definition

MX7D ahb clk actually has no LPCG gate, current LPCG offset 0x4200
used actually is for adc, not ahb. After fix, correct ocram_s_clk
parent accordingly as well.

Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Fabio Estevam <fabio.estevam@nxp.com>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Tested-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/imx/clk-imx7d.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c
index ae1d31be906e..ad044836d469 100644
--- a/drivers/clk/imx/clk-imx7d.c
+++ b/drivers/clk/imx/clk-imx7d.c
@@ -725,7 +725,7 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_DISP_AXI_ROOT_DIV] = imx_clk_divider2("disp_axi_post_div", "disp_axi_pre_div", base + 0x8880, 0, 6);
 	clks[IMX7D_ENET_AXI_ROOT_DIV] = imx_clk_divider2("enet_axi_post_div", "enet_axi_pre_div", base + 0x8900, 0, 6);
 	clks[IMX7D_NAND_USDHC_BUS_ROOT_DIV] = imx_clk_divider2("nand_usdhc_post_div", "nand_usdhc_pre_div", base + 0x8980, 0, 6);
-	clks[IMX7D_AHB_CHANNEL_ROOT_DIV] = imx_clk_divider2("ahb_post_div", "ahb_pre_div", base + 0x9000, 0, 6);
+	clks[IMX7D_AHB_CHANNEL_ROOT_DIV] = imx_clk_divider2("ahb_root_clk", "ahb_pre_div", base + 0x9000, 0, 6);
 	clks[IMX7D_DRAM_ROOT_DIV] = imx_clk_divider2("dram_post_div", "dram_cg", base + 0x9880, 0, 3);
 	clks[IMX7D_DRAM_PHYM_ALT_ROOT_DIV] = imx_clk_divider2("dram_phym_alt_post_div", "dram_phym_alt_pre_div", base + 0xa000, 0, 3);
 	clks[IMX7D_DRAM_ALT_ROOT_DIV] = imx_clk_divider2("dram_alt_post_div", "dram_alt_pre_div", base + 0xa080, 0, 3);
@@ -796,9 +796,8 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_DISP_AXI_ROOT_CLK] = imx_clk_gate4("disp_axi_root_clk", "disp_axi_post_div", base + 0x4050, 0);
 	clks[IMX7D_ENET_AXI_ROOT_CLK] = imx_clk_gate4("enet_axi_root_clk", "enet_axi_post_div", base + 0x4060, 0);
 	clks[IMX7D_OCRAM_CLK] = imx_clk_gate4("ocram_clk", "axi_post_div", base + 0x4110, 0);
-	clks[IMX7D_OCRAM_S_CLK] = imx_clk_gate4("ocram_s_clk", "ahb_post_div", base + 0x4120, 0);
+	clks[IMX7D_OCRAM_S_CLK] = imx_clk_gate4("ocram_s_clk", "ahb_root_clk", base + 0x4120, 0);
 	clks[IMX7D_NAND_USDHC_BUS_ROOT_CLK] = imx_clk_gate4("nand_usdhc_root_clk", "nand_usdhc_post_div", base + 0x4130, 0);
-	clks[IMX7D_AHB_CHANNEL_ROOT_CLK] = imx_clk_gate4("ahb_root_clk", "ahb_post_div", base + 0x4200, 0);
 	clks[IMX7D_DRAM_ROOT_CLK] = imx_clk_gate4("dram_root_clk", "dram_post_div", base + 0x4130, 0);
 	clks[IMX7D_DRAM_PHYM_ROOT_CLK] = imx_clk_gate4("dram_phym_root_clk", "dram_phym_cg", base + 0x4130, 0);
 	clks[IMX7D_DRAM_PHYM_ALT_ROOT_CLK] = imx_clk_gate4("dram_phym_alt_root_clk", "dram_phym_alt_post_div", base + 0x4130, 0);
-- 
cgit v1.2.3


From 40e00eff796d997de8041893829b5ffa785508e8 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Tue, 11 Apr 2017 10:37:49 +0800
Subject: clk: imx7d: add the missing ipg_root_clk

Add the missing ipg_root_clk which actually is already used by many
orphan clks in current tree.

Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Fabio Estevam <fabio.estevam@nxp.com>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Tested-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/imx/clk-imx7d.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c
index ad044836d469..562055129ed8 100644
--- a/drivers/clk/imx/clk-imx7d.c
+++ b/drivers/clk/imx/clk-imx7d.c
@@ -386,7 +386,7 @@ static int const clks_init_on[] __initconst = {
 	IMX7D_PLL_SYS_MAIN_480M_CLK, IMX7D_NAND_USDHC_BUS_ROOT_CLK,
 	IMX7D_DRAM_PHYM_ROOT_CLK, IMX7D_DRAM_ROOT_CLK,
 	IMX7D_DRAM_PHYM_ALT_ROOT_CLK, IMX7D_DRAM_ALT_ROOT_CLK,
-	IMX7D_AHB_CHANNEL_ROOT_CLK,
+	IMX7D_AHB_CHANNEL_ROOT_CLK, IMX7D_IPG_ROOT_CLK,
 };
 
 static struct clk_onecell_data clk_data;
@@ -726,6 +726,7 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_ENET_AXI_ROOT_DIV] = imx_clk_divider2("enet_axi_post_div", "enet_axi_pre_div", base + 0x8900, 0, 6);
 	clks[IMX7D_NAND_USDHC_BUS_ROOT_DIV] = imx_clk_divider2("nand_usdhc_post_div", "nand_usdhc_pre_div", base + 0x8980, 0, 6);
 	clks[IMX7D_AHB_CHANNEL_ROOT_DIV] = imx_clk_divider2("ahb_root_clk", "ahb_pre_div", base + 0x9000, 0, 6);
+	clks[IMX7D_IPG_ROOT_CLK] = imx_clk_divider2("ipg_root_clk", "ahb_root_clk", base + 0x9080, 0, 2);
 	clks[IMX7D_DRAM_ROOT_DIV] = imx_clk_divider2("dram_post_div", "dram_cg", base + 0x9880, 0, 3);
 	clks[IMX7D_DRAM_PHYM_ALT_ROOT_DIV] = imx_clk_divider2("dram_phym_alt_post_div", "dram_phym_alt_pre_div", base + 0xa000, 0, 3);
 	clks[IMX7D_DRAM_ALT_ROOT_DIV] = imx_clk_divider2("dram_alt_post_div", "dram_alt_pre_div", base + 0xa080, 0, 3);
-- 
cgit v1.2.3


From df0225a45a8818396d01a3bbd6d1310c25401de2 Mon Sep 17 00:00:00 2001
From: Mars Cheng <mars.cheng@mediatek.com>
Date: Sat, 8 Apr 2017 09:20:29 +0800
Subject: clk: mediatek: add mt6797 clock IDs

Signed-off-by: Mars Cheng <mars.cheng@mediatek.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 include/dt-bindings/clock/mt6797-clk.h | 281 +++++++++++++++++++++++++++++++++
 1 file changed, 281 insertions(+)
 create mode 100644 include/dt-bindings/clock/mt6797-clk.h

diff --git a/include/dt-bindings/clock/mt6797-clk.h b/include/dt-bindings/clock/mt6797-clk.h
new file mode 100644
index 000000000000..2f25a5aca019
--- /dev/null
+++ b/include/dt-bindings/clock/mt6797-clk.h
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ * Author: Kevin Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_MT6797_H
+#define _DT_BINDINGS_CLK_MT6797_H
+
+/* TOPCKGEN */
+#define	CLK_TOP_MUX_ULPOSC_AXI_CK_MUX_PRE	1
+#define	CLK_TOP_MUX_ULPOSC_AXI_CK_MUX		2
+#define	CLK_TOP_MUX_AXI				3
+#define	CLK_TOP_MUX_MEM				4
+#define	CLK_TOP_MUX_DDRPHYCFG			5
+#define	CLK_TOP_MUX_MM				6
+#define	CLK_TOP_MUX_PWM				7
+#define	CLK_TOP_MUX_VDEC			8
+#define	CLK_TOP_MUX_VENC			9
+#define	CLK_TOP_MUX_MFG				10
+#define	CLK_TOP_MUX_CAMTG			11
+#define	CLK_TOP_MUX_UART			12
+#define	CLK_TOP_MUX_SPI				13
+#define	CLK_TOP_MUX_ULPOSC_SPI_CK_MUX		14
+#define	CLK_TOP_MUX_USB20			15
+#define	CLK_TOP_MUX_MSDC50_0_HCLK		16
+#define	CLK_TOP_MUX_MSDC50_0			17
+#define	CLK_TOP_MUX_MSDC30_1			18
+#define	CLK_TOP_MUX_MSDC30_2			19
+#define	CLK_TOP_MUX_AUDIO			20
+#define	CLK_TOP_MUX_AUD_INTBUS			21
+#define	CLK_TOP_MUX_PMICSPI			22
+#define	CLK_TOP_MUX_SCP				23
+#define	CLK_TOP_MUX_ATB				24
+#define	CLK_TOP_MUX_MJC				25
+#define	CLK_TOP_MUX_DPI0			26
+#define	CLK_TOP_MUX_AUD_1			27
+#define	CLK_TOP_MUX_AUD_2			28
+#define	CLK_TOP_MUX_SSUSB_TOP_SYS		29
+#define	CLK_TOP_MUX_SPM				30
+#define	CLK_TOP_MUX_BSI_SPI			31
+#define	CLK_TOP_MUX_AUDIO_H			32
+#define	CLK_TOP_MUX_ANC_MD32			33
+#define	CLK_TOP_MUX_MFG_52M			34
+#define	CLK_TOP_SYSPLL_CK			35
+#define	CLK_TOP_SYSPLL_D2			36
+#define	CLK_TOP_SYSPLL1_D2			37
+#define	CLK_TOP_SYSPLL1_D4			38
+#define	CLK_TOP_SYSPLL1_D8			39
+#define	CLK_TOP_SYSPLL1_D16			40
+#define	CLK_TOP_SYSPLL_D3			41
+#define	CLK_TOP_SYSPLL_D3_D3			42
+#define	CLK_TOP_SYSPLL2_D2			43
+#define	CLK_TOP_SYSPLL2_D4			44
+#define	CLK_TOP_SYSPLL2_D8			45
+#define	CLK_TOP_SYSPLL_D5			46
+#define	CLK_TOP_SYSPLL3_D2			47
+#define	CLK_TOP_SYSPLL3_D4			48
+#define	CLK_TOP_SYSPLL_D7			49
+#define	CLK_TOP_SYSPLL4_D2			50
+#define	CLK_TOP_SYSPLL4_D4			51
+#define	CLK_TOP_UNIVPLL_CK			52
+#define	CLK_TOP_UNIVPLL_D7			53
+#define	CLK_TOP_UNIVPLL_D26			54
+#define	CLK_TOP_SSUSB_PHY_48M_CK		55
+#define	CLK_TOP_USB_PHY48M_CK			56
+#define	CLK_TOP_UNIVPLL_D2			57
+#define	CLK_TOP_UNIVPLL1_D2			58
+#define	CLK_TOP_UNIVPLL1_D4			59
+#define	CLK_TOP_UNIVPLL1_D8			60
+#define	CLK_TOP_UNIVPLL_D3			61
+#define	CLK_TOP_UNIVPLL2_D2			62
+#define	CLK_TOP_UNIVPLL2_D4			63
+#define	CLK_TOP_UNIVPLL2_D8			64
+#define	CLK_TOP_UNIVPLL_D5			65
+#define	CLK_TOP_UNIVPLL3_D2			66
+#define	CLK_TOP_UNIVPLL3_D4			67
+#define	CLK_TOP_UNIVPLL3_D8			68
+#define	CLK_TOP_ULPOSC_CK_ORG			69
+#define	CLK_TOP_ULPOSC_CK			70
+#define	CLK_TOP_ULPOSC_D2			71
+#define	CLK_TOP_ULPOSC_D3			72
+#define	CLK_TOP_ULPOSC_D4			73
+#define	CLK_TOP_ULPOSC_D8			74
+#define	CLK_TOP_ULPOSC_D10			75
+#define	CLK_TOP_APLL1_CK			76
+#define	CLK_TOP_APLL2_CK			77
+#define	CLK_TOP_MFGPLL_CK			78
+#define	CLK_TOP_MFGPLL_D2			79
+#define	CLK_TOP_IMGPLL_CK			80
+#define	CLK_TOP_IMGPLL_D2			81
+#define	CLK_TOP_IMGPLL_D4			82
+#define	CLK_TOP_CODECPLL_CK			83
+#define	CLK_TOP_CODECPLL_D2			84
+#define	CLK_TOP_VDECPLL_CK			85
+#define	CLK_TOP_TVDPLL_CK			86
+#define	CLK_TOP_TVDPLL_D2			87
+#define	CLK_TOP_TVDPLL_D4			88
+#define	CLK_TOP_TVDPLL_D8			89
+#define	CLK_TOP_TVDPLL_D16			90
+#define	CLK_TOP_MSDCPLL_CK			91
+#define	CLK_TOP_MSDCPLL_D2			92
+#define	CLK_TOP_MSDCPLL_D4			93
+#define	CLK_TOP_MSDCPLL_D8			94
+#define	CLK_TOP_NR				95
+
+/* APMIXED_SYS */
+#define CLK_APMIXED_MAINPLL			1
+#define CLK_APMIXED_UNIVPLL			2
+#define CLK_APMIXED_MFGPLL			3
+#define CLK_APMIXED_MSDCPLL			4
+#define CLK_APMIXED_IMGPLL			5
+#define CLK_APMIXED_TVDPLL			6
+#define CLK_APMIXED_CODECPLL			7
+#define CLK_APMIXED_VDECPLL			8
+#define CLK_APMIXED_APLL1			9
+#define CLK_APMIXED_APLL2			10
+#define CLK_APMIXED_NR				11
+
+/* INFRA_SYS */
+#define	CLK_INFRA_PMIC_TMR			1
+#define	CLK_INFRA_PMIC_AP			2
+#define	CLK_INFRA_PMIC_MD			3
+#define	CLK_INFRA_PMIC_CONN			4
+#define	CLK_INFRA_SCP				5
+#define	CLK_INFRA_SEJ				6
+#define	CLK_INFRA_APXGPT			7
+#define	CLK_INFRA_SEJ_13M			8
+#define	CLK_INFRA_ICUSB				9
+#define	CLK_INFRA_GCE				10
+#define	CLK_INFRA_THERM				11
+#define	CLK_INFRA_I2C0				12
+#define	CLK_INFRA_I2C1				13
+#define	CLK_INFRA_I2C2				14
+#define	CLK_INFRA_I2C3				15
+#define	CLK_INFRA_PWM_HCLK			16
+#define	CLK_INFRA_PWM1				17
+#define	CLK_INFRA_PWM2				18
+#define	CLK_INFRA_PWM3				19
+#define	CLK_INFRA_PWM4				20
+#define	CLK_INFRA_PWM				21
+#define	CLK_INFRA_UART0				22
+#define	CLK_INFRA_UART1				23
+#define	CLK_INFRA_UART2				24
+#define	CLK_INFRA_UART3				25
+#define	CLK_INFRA_MD2MD_CCIF_0			26
+#define	CLK_INFRA_MD2MD_CCIF_1			27
+#define	CLK_INFRA_MD2MD_CCIF_2			28
+#define	CLK_INFRA_FHCTL				29
+#define	CLK_INFRA_BTIF				30
+#define	CLK_INFRA_MD2MD_CCIF_3			31
+#define	CLK_INFRA_SPI				32
+#define	CLK_INFRA_MSDC0				33
+#define	CLK_INFRA_MD2MD_CCIF_4			34
+#define	CLK_INFRA_MSDC1				35
+#define	CLK_INFRA_MSDC2				36
+#define	CLK_INFRA_MD2MD_CCIF_5			37
+#define	CLK_INFRA_GCPU				38
+#define	CLK_INFRA_TRNG				39
+#define	CLK_INFRA_AUXADC			40
+#define	CLK_INFRA_CPUM				41
+#define	CLK_INFRA_AP_C2K_CCIF_0			42
+#define	CLK_INFRA_AP_C2K_CCIF_1			43
+#define	CLK_INFRA_CLDMA				44
+#define	CLK_INFRA_DISP_PWM			45
+#define	CLK_INFRA_AP_DMA			46
+#define	CLK_INFRA_DEVICE_APC			47
+#define	CLK_INFRA_L2C_SRAM			48
+#define	CLK_INFRA_CCIF_AP			49
+#define	CLK_INFRA_AUDIO				50
+#define	CLK_INFRA_CCIF_MD			51
+#define	CLK_INFRA_DRAMC_F26M			52
+#define	CLK_INFRA_I2C4				53
+#define	CLK_INFRA_I2C_APPM			54
+#define	CLK_INFRA_I2C_GPUPM			55
+#define	CLK_INFRA_I2C2_IMM			56
+#define	CLK_INFRA_I2C2_ARB			57
+#define	CLK_INFRA_I2C3_IMM			58
+#define	CLK_INFRA_I2C3_ARB			59
+#define	CLK_INFRA_I2C5				60
+#define	CLK_INFRA_SYS_CIRQ			61
+#define	CLK_INFRA_SPI1				62
+#define	CLK_INFRA_DRAMC_B_F26M			63
+#define	CLK_INFRA_ANC_MD32			64
+#define	CLK_INFRA_ANC_MD32_32K			65
+#define	CLK_INFRA_DVFS_SPM1			66
+#define	CLK_INFRA_AES_TOP0			67
+#define	CLK_INFRA_AES_TOP1			68
+#define	CLK_INFRA_SSUSB_BUS			69
+#define	CLK_INFRA_SPI2				70
+#define	CLK_INFRA_SPI3				71
+#define	CLK_INFRA_SPI4				72
+#define	CLK_INFRA_SPI5				73
+#define	CLK_INFRA_IRTX				74
+#define	CLK_INFRA_SSUSB_SYS			75
+#define	CLK_INFRA_SSUSB_REF			76
+#define	CLK_INFRA_AUDIO_26M			77
+#define	CLK_INFRA_AUDIO_26M_PAD_TOP		78
+#define	CLK_INFRA_MODEM_TEMP_SHARE		79
+#define	CLK_INFRA_VAD_WRAP_SOC			80
+#define	CLK_INFRA_DRAMC_CONF			81
+#define	CLK_INFRA_DRAMC_B_CONF			82
+#define	CLK_INFRA_MFG_VCG			83
+#define	CLK_INFRA_13M				84
+#define	CLK_INFRA_NR				85
+
+/* IMG_SYS */
+#define	CLK_IMG_FDVT				1
+#define	CLK_IMG_DPE				2
+#define	CLK_IMG_DIP				3
+#define	CLK_IMG_LARB6				4
+#define	CLK_IMG_NR				5
+
+/* MM_SYS */
+#define	CLK_MM_SMI_COMMON			1
+#define	CLK_MM_SMI_LARB0			2
+#define	CLK_MM_SMI_LARB5			3
+#define	CLK_MM_CAM_MDP				4
+#define	CLK_MM_MDP_RDMA0			5
+#define	CLK_MM_MDP_RDMA1			6
+#define	CLK_MM_MDP_RSZ0				7
+#define	CLK_MM_MDP_RSZ1				8
+#define	CLK_MM_MDP_RSZ2				9
+#define	CLK_MM_MDP_TDSHP			10
+#define	CLK_MM_MDP_COLOR			11
+#define	CLK_MM_MDP_WDMA				12
+#define	CLK_MM_MDP_WROT0			13
+#define	CLK_MM_MDP_WROT1			14
+#define	CLK_MM_FAKE_ENG				15
+#define	CLK_MM_DISP_OVL0			16
+#define	CLK_MM_DISP_OVL1			17
+#define	CLK_MM_DISP_OVL0_2L			18
+#define	CLK_MM_DISP_OVL1_2L			19
+#define	CLK_MM_DISP_RDMA0			20
+#define	CLK_MM_DISP_RDMA1			21
+#define	CLK_MM_DISP_WDMA0			22
+#define	CLK_MM_DISP_WDMA1			23
+#define	CLK_MM_DISP_COLOR			24
+#define	CLK_MM_DISP_CCORR			25
+#define	CLK_MM_DISP_AAL				26
+#define	CLK_MM_DISP_GAMMA			27
+#define	CLK_MM_DISP_OD				28
+#define	CLK_MM_DISP_DITHER			29
+#define	CLK_MM_DISP_UFOE			30
+#define	CLK_MM_DISP_DSC				31
+#define	CLK_MM_DISP_SPLIT			32
+#define	CLK_MM_DSI0_MM_CLOCK			33
+#define	CLK_MM_DSI1_MM_CLOCK			34
+#define	CLK_MM_DPI_MM_CLOCK			35
+#define	CLK_MM_DPI_INTERFACE_CLOCK		36
+#define	CLK_MM_LARB4_AXI_ASIF_MM_CLOCK		37
+#define	CLK_MM_LARB4_AXI_ASIF_MJC_CLOCK		38
+#define	CLK_MM_DISP_OVL0_MOUT_CLOCK		39
+#define	CLK_MM_FAKE_ENG2			40
+#define	CLK_MM_DSI0_INTERFACE_CLOCK		41
+#define	CLK_MM_DSI1_INTERFACE_CLOCK		42
+#define	CLK_MM_NR				43
+
+/* VDEC_SYS */
+#define	CLK_VDEC_CKEN_ENG			1
+#define	CLK_VDEC_ACTIVE				2
+#define	CLK_VDEC_CKEN				3
+#define	CLK_VDEC_LARB1_CKEN			4
+#define	CLK_VDEC_NR				5
+
+/* VENC_SYS */
+#define	CLK_VENC_0				1
+#define	CLK_VENC_1				2
+#define	CLK_VENC_2				3
+#define	CLK_VENC_3				4
+#define	CLK_VENC_NR				5
+
+#endif /* _DT_BINDINGS_CLK_MT6797_H */
-- 
cgit v1.2.3


From 2b51f514a23563b036d5ab16e9212285c14d21e8 Mon Sep 17 00:00:00 2001
From: Kevin-CW Chen <kevin-cw.chen@mediatek.com>
Date: Sat, 8 Apr 2017 09:20:28 +0800
Subject: dt-bindings: arm: mediatek: document clk bindings for MT6797

This patch adds the binding documentation for apmixedsys, imgsys,
infracfg, mmsys, topckgen, vdecsys and vencsys for MT6797.

Signed-off-by: Kevin-CW Chen <kevin-cw.chen@mediatek.com>
Signed-off-by: Mars Cheng <mars.cheng@mediatek.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt | 1 +
 Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt     | 1 +
 Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt   | 1 +
 Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt      | 1 +
 Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt   | 1 +
 Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt    | 1 +
 Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt    | 3 ++-
 7 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
index cb0054ac7121..cd977db7630c 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-apmixedsys"
+	- "mediatek,mt6797-apmixedsys"
 	- "mediatek,mt8135-apmixedsys"
 	- "mediatek,mt8173-apmixedsys"
 - #clock-cells: Must be 1
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
index f6a916686f4c..047b11ae5f45 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-imgsys", "syscon"
+	- "mediatek,mt6797-imgsys", "syscon"
 	- "mediatek,mt8173-imgsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
index 1620ec2a5a3f..58d58e2006b8 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
@@ -8,6 +8,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-infracfg", "syscon"
+	- "mediatek,mt6797-infracfg", "syscon"
 	- "mediatek,mt8135-infracfg", "syscon"
 	- "mediatek,mt8173-infracfg", "syscon"
 - #clock-cells: Must be 1
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
index 67dd2e473d25..70529e0b58e9 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-mmsys", "syscon"
+	- "mediatek,mt6797-mmsys", "syscon"
 	- "mediatek,mt8173-mmsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
index 9f2fe7860114..ec93ecbb9f3c 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-topckgen"
+	- "mediatek,mt6797-topckgen"
 	- "mediatek,mt8135-topckgen"
 	- "mediatek,mt8173-topckgen"
 - #clock-cells: Must be 1
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
index 2440f73450c3..d150104f928a 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
@@ -7,6 +7,7 @@ Required Properties:
 
 - compatible: Should be one of:
 	- "mediatek,mt2701-vdecsys", "syscon"
+	- "mediatek,mt6797-vdecsys", "syscon"
 	- "mediatek,mt8173-vdecsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt
index 5bb2866a2b50..8a93be643647 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt
@@ -5,7 +5,8 @@ The Mediatek vencsys controller provides various clocks to the system.
 
 Required Properties:
 
-- compatible: Should be:
+- compatible: Should be one of:
+	- "mediatek,mt6797-vencsys", "syscon"
 	- "mediatek,mt8173-vencsys", "syscon"
 - #clock-cells: Must be 1
 
-- 
cgit v1.2.3


From 96596aa06628e86ea0e1c08c34b0ccc7619e43ac Mon Sep 17 00:00:00 2001
From: Kevin-CW Chen <kevin-cw.chen@mediatek.com>
Date: Sat, 8 Apr 2017 09:20:30 +0800
Subject: clk: mediatek: add clk support for MT6797

Add MT6797 clock support, include topckgen, apmixedsys, infracfg
and subsystem clocks

Signed-off-by: Kevin-CW Chen <kevin-cw.chen@mediatek.com>
Signed-off-by: Mars Cheng <mars.cheng@mediatek.com>
Tested-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/mediatek/Kconfig           |  32 ++
 drivers/clk/mediatek/Makefile          |   5 +
 drivers/clk/mediatek/clk-mt6797-img.c  |  76 ++++
 drivers/clk/mediatek/clk-mt6797-mm.c   | 136 +++++++
 drivers/clk/mediatek/clk-mt6797-vdec.c |  93 +++++
 drivers/clk/mediatek/clk-mt6797-venc.c |  78 ++++
 drivers/clk/mediatek/clk-mt6797.c      | 714 +++++++++++++++++++++++++++++++++
 7 files changed, 1134 insertions(+)
 create mode 100644 drivers/clk/mediatek/clk-mt6797-img.c
 create mode 100644 drivers/clk/mediatek/clk-mt6797-mm.c
 create mode 100644 drivers/clk/mediatek/clk-mt6797-vdec.c
 create mode 100644 drivers/clk/mediatek/clk-mt6797-venc.c
 create mode 100644 drivers/clk/mediatek/clk-mt6797.c

diff --git a/drivers/clk/mediatek/Kconfig b/drivers/clk/mediatek/Kconfig
index a01ef7806aed..28739a9a6e37 100644
--- a/drivers/clk/mediatek/Kconfig
+++ b/drivers/clk/mediatek/Kconfig
@@ -50,6 +50,38 @@ config COMMON_CLK_MT2701_BDPSYS
 	---help---
 	  This driver supports Mediatek MT2701 bdpsys clocks.
 
+config COMMON_CLK_MT6797
+       bool "Clock driver for Mediatek MT6797"
+       depends on (ARCH_MEDIATEK && ARM64) || COMPILE_TEST
+       select COMMON_CLK_MEDIATEK
+       default ARCH_MEDIATEK && ARM64
+       ---help---
+         This driver supports Mediatek MT6797 basic clocks.
+
+config COMMON_CLK_MT6797_MMSYS
+       bool "Clock driver for Mediatek MT6797 mmsys"
+       depends on COMMON_CLK_MT6797
+       ---help---
+         This driver supports Mediatek MT6797 mmsys clocks.
+
+config COMMON_CLK_MT6797_IMGSYS
+       bool "Clock driver for Mediatek MT6797 imgsys"
+       depends on COMMON_CLK_MT6797
+       ---help---
+         This driver supports Mediatek MT6797 imgsys clocks.
+
+config COMMON_CLK_MT6797_VDECSYS
+       bool "Clock driver for Mediatek MT6797 vdecsys"
+       depends on COMMON_CLK_MT6797
+       ---help---
+         This driver supports Mediatek MT6797 vdecsys clocks.
+
+config COMMON_CLK_MT6797_VENCSYS
+       bool "Clock driver for Mediatek MT6797 vencsys"
+       depends on COMMON_CLK_MT6797
+       ---help---
+         This driver supports Mediatek MT6797 vencsys clocks.
+
 config COMMON_CLK_MT8135
 	bool "Clock driver for Mediatek MT8135"
 	depends on (ARCH_MEDIATEK && ARM) || COMPILE_TEST
diff --git a/drivers/clk/mediatek/Makefile b/drivers/clk/mediatek/Makefile
index 19ae7ef79b57..5c3afb86b9ec 100644
--- a/drivers/clk/mediatek/Makefile
+++ b/drivers/clk/mediatek/Makefile
@@ -1,5 +1,10 @@
 obj-$(CONFIG_COMMON_CLK_MEDIATEK) += clk-mtk.o clk-pll.o clk-gate.o clk-apmixed.o
 obj-$(CONFIG_RESET_CONTROLLER) += reset.o
+obj-$(CONFIG_COMMON_CLK_MT6797) += clk-mt6797.o
+obj-$(CONFIG_COMMON_CLK_MT6797_IMGSYS) += clk-mt6797-img.o
+obj-$(CONFIG_COMMON_CLK_MT6797_MMSYS) += clk-mt6797-mm.o
+obj-$(CONFIG_COMMON_CLK_MT6797_VDECSYS) += clk-mt6797-vdec.o
+obj-$(CONFIG_COMMON_CLK_MT6797_VENCSYS) += clk-mt6797-venc.o
 obj-$(CONFIG_COMMON_CLK_MT2701) += clk-mt2701.o
 obj-$(CONFIG_COMMON_CLK_MT2701_BDPSYS) += clk-mt2701-bdp.o
 obj-$(CONFIG_COMMON_CLK_MT2701_ETHSYS) += clk-mt2701-eth.o
diff --git a/drivers/clk/mediatek/clk-mt6797-img.c b/drivers/clk/mediatek/clk-mt6797-img.c
new file mode 100644
index 000000000000..94cc48065918
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt6797-img.c
@@ -0,0 +1,76 @@
+/* Copyright (c) 2017 MediaTek Inc.
+ * Author: Kevin Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+#include <dt-bindings/clock/mt6797-clk.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+static const struct mtk_gate_regs img_cg_regs = {
+	.set_ofs = 0x0004,
+	.clr_ofs = 0x0008,
+	.sta_ofs = 0x0000,
+};
+
+#define GATE_IMG(_id, _name, _parent, _shift) {		\
+		.id = _id,				\
+		.name = _name,				\
+		.parent_name = _parent,			\
+		.regs = &img_cg_regs,			\
+		.shift = _shift,			\
+		.ops = &mtk_clk_gate_ops_setclr,	\
+	}
+
+static const struct mtk_gate img_clks[] = {
+	GATE_IMG(CLK_IMG_FDVT, "img_fdvt", "mm_sel", 11),
+	GATE_IMG(CLK_IMG_DPE, "img_dpe", "mm_sel", 10),
+	GATE_IMG(CLK_IMG_DIP, "img_dip", "mm_sel", 6),
+	GATE_IMG(CLK_IMG_LARB6, "img_larb6", "mm_sel", 0),
+};
+
+static const struct of_device_id of_match_clk_mt6797_img[] = {
+	{ .compatible = "mediatek,mt6797-imgsys", },
+	{}
+};
+
+static int clk_mt6797_img_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	int r;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_IMG_NR);
+
+	mtk_clk_register_gates(node, img_clks, ARRAY_SIZE(img_clks),
+			       clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r)
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt6797_img_drv = {
+	.probe = clk_mt6797_img_probe,
+	.driver = {
+		.name = "clk-mt6797-img",
+		.of_match_table = of_match_clk_mt6797_img,
+	},
+};
+
+builtin_platform_driver(clk_mt6797_img_drv);
diff --git a/drivers/clk/mediatek/clk-mt6797-mm.c b/drivers/clk/mediatek/clk-mt6797-mm.c
new file mode 100644
index 000000000000..c57d3eed270d
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt6797-mm.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ * Author: Kevin Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+#include <dt-bindings/clock/mt6797-clk.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+static const struct mtk_gate_regs mm0_cg_regs = {
+	.set_ofs = 0x0104,
+	.clr_ofs = 0x0108,
+	.sta_ofs = 0x0100,
+};
+
+static const struct mtk_gate_regs mm1_cg_regs = {
+	.set_ofs = 0x0114,
+	.clr_ofs = 0x0118,
+	.sta_ofs = 0x0110,
+};
+
+#define GATE_MM0(_id, _name, _parent, _shift) {			\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &mm0_cg_regs,				\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr,		\
+}
+
+#define GATE_MM1(_id, _name, _parent, _shift) {			\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &mm1_cg_regs,				\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr,		\
+}
+
+static const struct mtk_gate mm_clks[] = {
+	GATE_MM0(CLK_MM_SMI_COMMON, "mm_smi_common", "mm_sel", 0),
+	GATE_MM0(CLK_MM_SMI_LARB0, "mm_smi_larb0", "mm_sel", 1),
+	GATE_MM0(CLK_MM_SMI_LARB5, "mm_smi_larb5", "mm_sel", 2),
+	GATE_MM0(CLK_MM_CAM_MDP, "mm_cam_mdp", "mm_sel", 3),
+	GATE_MM0(CLK_MM_MDP_RDMA0, "mm_mdp_rdma0", "mm_sel", 4),
+	GATE_MM0(CLK_MM_MDP_RDMA1, "mm_mdp_rdma1", "mm_sel", 5),
+	GATE_MM0(CLK_MM_MDP_RSZ0, "mm_mdp_rsz0", "mm_sel", 6),
+	GATE_MM0(CLK_MM_MDP_RSZ1, "mm_mdp_rsz1", "mm_sel", 7),
+	GATE_MM0(CLK_MM_MDP_RSZ2, "mm_mdp_rsz2", "mm_sel", 8),
+	GATE_MM0(CLK_MM_MDP_TDSHP, "mm_mdp_tdshp", "mm_sel", 9),
+	GATE_MM0(CLK_MM_MDP_COLOR, "mm_mdp_color", "mm_sel", 10),
+	GATE_MM0(CLK_MM_MDP_WDMA, "mm_mdp_wdma", "mm_sel", 11),
+	GATE_MM0(CLK_MM_MDP_WROT0, "mm_mdp_wrot0", "mm_sel", 12),
+	GATE_MM0(CLK_MM_MDP_WROT1, "mm_mdp_wrot1", "mm_sel", 13),
+	GATE_MM0(CLK_MM_FAKE_ENG, "mm_fake_eng", "mm_sel", 14),
+	GATE_MM0(CLK_MM_DISP_OVL0, "mm_disp_ovl0", "mm_sel", 15),
+	GATE_MM0(CLK_MM_DISP_OVL1, "mm_disp_ovl1", "mm_sel", 16),
+	GATE_MM0(CLK_MM_DISP_OVL0_2L, "mm_disp_ovl0_2l", "mm_sel", 17),
+	GATE_MM0(CLK_MM_DISP_OVL1_2L, "mm_disp_ovl1_2l", "mm_sel", 18),
+	GATE_MM0(CLK_MM_DISP_RDMA0, "mm_disp_rdma0", "mm_sel", 19),
+	GATE_MM0(CLK_MM_DISP_RDMA1, "mm_disp_rdma1", "mm_sel", 20),
+	GATE_MM0(CLK_MM_DISP_WDMA0, "mm_disp_wdma0", "mm_sel", 21),
+	GATE_MM0(CLK_MM_DISP_WDMA1, "mm_disp_wdma1", "mm_sel", 22),
+	GATE_MM0(CLK_MM_DISP_COLOR, "mm_disp_color", "mm_sel", 23),
+	GATE_MM0(CLK_MM_DISP_CCORR, "mm_disp_ccorr", "mm_sel", 24),
+	GATE_MM0(CLK_MM_DISP_AAL, "mm_disp_aal", "mm_sel", 25),
+	GATE_MM0(CLK_MM_DISP_GAMMA, "mm_disp_gamma", "mm_sel", 26),
+	GATE_MM0(CLK_MM_DISP_OD, "mm_disp_od", "mm_sel", 27),
+	GATE_MM0(CLK_MM_DISP_DITHER, "mm_disp_dither", "mm_sel", 28),
+	GATE_MM0(CLK_MM_DISP_UFOE, "mm_disp_ufoe", "mm_sel", 29),
+	GATE_MM0(CLK_MM_DISP_DSC, "mm_disp_dsc", "mm_sel", 30),
+	GATE_MM0(CLK_MM_DISP_SPLIT, "mm_disp_split", "mm_sel", 31),
+	GATE_MM1(CLK_MM_DSI0_MM_CLOCK, "mm_dsi0_mm_clock", "mm_sel", 0),
+	GATE_MM1(CLK_MM_DSI1_MM_CLOCK, "mm_dsi1_mm_clock", "mm_sel", 2),
+	GATE_MM1(CLK_MM_DPI_MM_CLOCK, "mm_dpi_mm_clock", "mm_sel", 4),
+	GATE_MM1(CLK_MM_DPI_INTERFACE_CLOCK, "mm_dpi_interface_clock",
+		 "dpi0_sel", 5),
+	GATE_MM1(CLK_MM_LARB4_AXI_ASIF_MM_CLOCK, "mm_larb4_axi_asif_mm_clock",
+		 "mm_sel", 6),
+	GATE_MM1(CLK_MM_LARB4_AXI_ASIF_MJC_CLOCK, "mm_larb4_axi_asif_mjc_clock",
+		 "mjc_sel", 7),
+	GATE_MM1(CLK_MM_DISP_OVL0_MOUT_CLOCK, "mm_disp_ovl0_mout_clock",
+		 "mm_sel", 8),
+	GATE_MM1(CLK_MM_FAKE_ENG2, "mm_fake_eng2", "mm_sel", 9),
+	GATE_MM1(CLK_MM_DSI0_INTERFACE_CLOCK, "mm_dsi0_interface_clock",
+		 "clk26m", 1),
+	GATE_MM1(CLK_MM_DSI1_INTERFACE_CLOCK, "mm_dsi1_interface_clock",
+		 "clk26m", 3),
+};
+
+static const struct of_device_id of_match_clk_mt6797_mm[] = {
+	{ .compatible = "mediatek,mt6797-mmsys", },
+	{}
+};
+
+static int clk_mt6797_mm_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	int r;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_MM_NR);
+
+	mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks),
+			       clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r)
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt6797_mm_drv = {
+	.probe = clk_mt6797_mm_probe,
+	.driver = {
+		.name = "clk-mt6797-mm",
+		.of_match_table = of_match_clk_mt6797_mm,
+	},
+};
+
+builtin_platform_driver(clk_mt6797_mm_drv);
diff --git a/drivers/clk/mediatek/clk-mt6797-vdec.c b/drivers/clk/mediatek/clk-mt6797-vdec.c
new file mode 100644
index 000000000000..7c402ca6c0b2
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt6797-vdec.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ * Author: Kevin-CW Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6797-clk.h>
+
+static const struct mtk_gate_regs vdec0_cg_regs = {
+	.set_ofs = 0x0000,
+	.clr_ofs = 0x0004,
+	.sta_ofs = 0x0000,
+};
+
+static const struct mtk_gate_regs vdec1_cg_regs = {
+	.set_ofs = 0x0008,
+	.clr_ofs = 0x000c,
+	.sta_ofs = 0x0008,
+};
+
+#define GATE_VDEC0(_id, _name, _parent, _shift) {		\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &vdec0_cg_regs,				\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr_inv,		\
+}
+
+#define GATE_VDEC1(_id, _name, _parent, _shift) {		\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &vdec1_cg_regs,				\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr_inv,		\
+}
+
+static const struct mtk_gate vdec_clks[] = {
+	GATE_VDEC0(CLK_VDEC_CKEN_ENG, "vdec_cken_eng", "vdec_sel", 8),
+	GATE_VDEC0(CLK_VDEC_ACTIVE, "vdec_active", "vdec_sel", 4),
+	GATE_VDEC0(CLK_VDEC_CKEN, "vdec_cken", "vdec_sel", 0),
+	GATE_VDEC1(CLK_VDEC_LARB1_CKEN, "vdec_larb1_cken", "mm_sel", 0),
+};
+
+static const struct of_device_id of_match_clk_mt6797_vdec[] = {
+	{ .compatible = "mediatek,mt6797-vdecsys", },
+	{}
+};
+
+static int clk_mt6797_vdec_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	int r;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_VDEC_NR);
+
+	mtk_clk_register_gates(node, vdec_clks, ARRAY_SIZE(vdec_clks),
+			       clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r)
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt6797_vdec_drv = {
+	.probe = clk_mt6797_vdec_probe,
+	.driver = {
+		.name = "clk-mt6797-vdec",
+		.of_match_table = of_match_clk_mt6797_vdec,
+	},
+};
+
+builtin_platform_driver(clk_mt6797_vdec_drv);
diff --git a/drivers/clk/mediatek/clk-mt6797-venc.c b/drivers/clk/mediatek/clk-mt6797-venc.c
new file mode 100644
index 000000000000..e73d51756f13
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt6797-venc.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ * Author: Kevin Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6797-clk.h>
+
+static const struct mtk_gate_regs venc_cg_regs = {
+	.set_ofs = 0x0004,
+	.clr_ofs = 0x0008,
+	.sta_ofs = 0x0000,
+};
+
+#define GATE_VENC(_id, _name, _parent, _shift) {	\
+		.id = _id,				\
+		.name = _name,				\
+		.parent_name = _parent,			\
+		.regs = &venc_cg_regs,			\
+		.shift = _shift,			\
+		.ops = &mtk_clk_gate_ops_setclr_inv,	\
+	}
+
+static const struct mtk_gate venc_clks[] = {
+	GATE_VENC(CLK_VENC_0, "venc_0", "mm_sel", 0),
+	GATE_VENC(CLK_VENC_1, "venc_1", "venc_sel", 4),
+	GATE_VENC(CLK_VENC_2, "venc_2", "venc_sel", 8),
+	GATE_VENC(CLK_VENC_3, "venc_3", "venc_sel", 12),
+};
+
+static const struct of_device_id of_match_clk_mt6797_venc[] = {
+	{ .compatible = "mediatek,mt6797-vencsys", },
+	{}
+};
+
+static int clk_mt6797_venc_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	int r;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_VENC_NR);
+
+	mtk_clk_register_gates(node, venc_clks, ARRAY_SIZE(venc_clks),
+			       clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r)
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt6797_venc_drv = {
+	.probe = clk_mt6797_venc_probe,
+	.driver = {
+		.name = "clk-mt6797-venc",
+		.of_match_table = of_match_clk_mt6797_venc,
+	},
+};
+
+builtin_platform_driver(clk_mt6797_venc_drv);
diff --git a/drivers/clk/mediatek/clk-mt6797.c b/drivers/clk/mediatek/clk-mt6797.c
new file mode 100644
index 000000000000..5702bc974ed9
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt6797.c
@@ -0,0 +1,714 @@
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Kevin Chen <kevin-cw.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt6797-clk.h>
+
+/*
+ * For some clocks, we don't care what their actual rates are. And these
+ * clocks may change their rate on different products or different scenarios.
+ * So we model these clocks' rate as 0, to denote it's not an actual rate.
+ */
+
+static DEFINE_SPINLOCK(mt6797_clk_lock);
+
+static const struct mtk_fixed_factor top_fixed_divs[] = {
+	FACTOR(CLK_TOP_SYSPLL_CK, "syspll_ck", "mainpll", 1, 1),
+	FACTOR(CLK_TOP_SYSPLL_D2, "syspll_d2", "mainpll", 1, 2),
+	FACTOR(CLK_TOP_SYSPLL1_D2, "syspll1_d2", "syspll_d2", 1, 2),
+	FACTOR(CLK_TOP_SYSPLL1_D4, "syspll1_d4", "syspll_d2", 1, 4),
+	FACTOR(CLK_TOP_SYSPLL1_D8, "syspll1_d8", "syspll_d2", 1, 8),
+	FACTOR(CLK_TOP_SYSPLL1_D16, "syspll1_d16", "syspll_d2", 1, 16),
+	FACTOR(CLK_TOP_SYSPLL_D3, "syspll_d3", "mainpll", 1, 3),
+	FACTOR(CLK_TOP_SYSPLL_D3_D3, "syspll_d3_d3", "syspll_d3", 1, 3),
+	FACTOR(CLK_TOP_SYSPLL2_D2, "syspll2_d2", "syspll_d3", 1, 2),
+	FACTOR(CLK_TOP_SYSPLL2_D4, "syspll2_d4", "syspll_d3", 1, 4),
+	FACTOR(CLK_TOP_SYSPLL2_D8, "syspll2_d8", "syspll_d3", 1, 8),
+	FACTOR(CLK_TOP_SYSPLL_D5, "syspll_d5", "mainpll", 1, 5),
+	FACTOR(CLK_TOP_SYSPLL3_D2, "syspll3_d2", "syspll_d5", 1, 2),
+	FACTOR(CLK_TOP_SYSPLL3_D4, "syspll3_d4", "syspll_d5", 1, 4),
+	FACTOR(CLK_TOP_SYSPLL_D7, "syspll_d7", "mainpll", 1, 7),
+	FACTOR(CLK_TOP_SYSPLL4_D2, "syspll4_d2", "syspll_d7", 1, 2),
+	FACTOR(CLK_TOP_SYSPLL4_D4, "syspll4_d4", "syspll_d7", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL_CK, "univpll_ck", "univpll", 1, 1),
+	FACTOR(CLK_TOP_UNIVPLL_D7, "univpll_d7", "univpll", 1, 7),
+	FACTOR(CLK_TOP_UNIVPLL_D26, "univpll_d26", "univpll", 1, 26),
+	FACTOR(CLK_TOP_SSUSB_PHY_48M_CK, "ssusb_phy_48m_ck", "univpll", 1, 1),
+	FACTOR(CLK_TOP_USB_PHY48M_CK, "usb_phy48m_ck", "univpll", 1, 1),
+	FACTOR(CLK_TOP_UNIVPLL_D2, "univpll_d2", "univpll", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL1_D2, "univpll1_d2", "univpll_d2", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL1_D4, "univpll1_d4", "univpll_d2", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL1_D8, "univpll1_d8", "univpll_d2", 1, 8),
+	FACTOR(CLK_TOP_UNIVPLL_D3, "univpll_d3", "univpll", 1, 3),
+	FACTOR(CLK_TOP_UNIVPLL2_D2, "univpll2_d2", "univpll", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL2_D4, "univpll2_d4", "univpll", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL2_D8, "univpll2_d8", "univpll", 1, 8),
+	FACTOR(CLK_TOP_UNIVPLL_D5, "univpll_d5", "univpll", 1, 5),
+	FACTOR(CLK_TOP_UNIVPLL3_D2, "univpll3_d2", "univpll_d5", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL3_D4, "univpll3_d4", "univpll_d5", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL3_D8, "univpll3_d8", "univpll_d5", 1, 8),
+	FACTOR(CLK_TOP_ULPOSC_CK_ORG, "ulposc_ck_org", "ulposc", 1, 1),
+	FACTOR(CLK_TOP_ULPOSC_CK, "ulposc_ck", "ulposc_ck_org", 1, 3),
+	FACTOR(CLK_TOP_ULPOSC_D2, "ulposc_d2", "ulposc_ck", 1, 2),
+	FACTOR(CLK_TOP_ULPOSC_D3, "ulposc_d3", "ulposc_ck", 1, 4),
+	FACTOR(CLK_TOP_ULPOSC_D4, "ulposc_d4", "ulposc_ck", 1, 8),
+	FACTOR(CLK_TOP_ULPOSC_D8, "ulposc_d8", "ulposc_ck", 1, 10),
+	FACTOR(CLK_TOP_ULPOSC_D10, "ulposc_d10", "ulposc_ck_org", 1, 1),
+	FACTOR(CLK_TOP_APLL1_CK, "apll1_ck", "apll1", 1, 1),
+	FACTOR(CLK_TOP_APLL2_CK, "apll2_ck", "apll2", 1, 1),
+	FACTOR(CLK_TOP_MFGPLL_CK, "mfgpll_ck", "mfgpll", 1, 1),
+	FACTOR(CLK_TOP_MFGPLL_D2, "mfgpll_d2", "mfgpll_ck", 1, 2),
+	FACTOR(CLK_TOP_IMGPLL_CK, "imgpll_ck", "imgpll", 1, 1),
+	FACTOR(CLK_TOP_IMGPLL_D2, "imgpll_d2", "imgpll_ck", 1, 2),
+	FACTOR(CLK_TOP_IMGPLL_D4, "imgpll_d4", "imgpll_ck", 1, 4),
+	FACTOR(CLK_TOP_CODECPLL_CK, "codecpll_ck", "codecpll", 1, 1),
+	FACTOR(CLK_TOP_CODECPLL_D2, "codecpll_d2", "codecpll_ck", 1, 2),
+	FACTOR(CLK_TOP_VDECPLL_CK, "vdecpll_ck", "vdecpll", 1, 1),
+	FACTOR(CLK_TOP_TVDPLL_CK, "tvdpll_ck", "tvdpll", 1, 1),
+	FACTOR(CLK_TOP_TVDPLL_D2, "tvdpll_d2", "tvdpll_ck", 1, 2),
+	FACTOR(CLK_TOP_TVDPLL_D4, "tvdpll_d4", "tvdpll_ck", 1, 4),
+	FACTOR(CLK_TOP_TVDPLL_D8, "tvdpll_d8", "tvdpll_ck", 1, 8),
+	FACTOR(CLK_TOP_TVDPLL_D16, "tvdpll_d16", "tvdpll_ck", 1, 16),
+	FACTOR(CLK_TOP_MSDCPLL_CK, "msdcpll_ck", "msdcpll", 1, 1),
+	FACTOR(CLK_TOP_MSDCPLL_D2, "msdcpll_d2", "msdcpll_ck", 1, 2),
+	FACTOR(CLK_TOP_MSDCPLL_D4, "msdcpll_d4", "msdcpll_ck", 1, 4),
+	FACTOR(CLK_TOP_MSDCPLL_D8, "msdcpll_d8", "msdcpll_ck", 1, 8),
+};
+
+static const char * const axi_parents[] = {
+	"clk26m",
+	"syspll_d7",
+	"ulposc_axi_ck_mux",
+};
+
+static const char * const ulposc_axi_ck_mux_parents[] = {
+	"syspll1_d4",
+	"ulposc_axi_ck_mux_pre",
+};
+
+static const char * const ulposc_axi_ck_mux_pre_parents[] = {
+	"ulposc_d2",
+	"ulposc_d3",
+};
+
+static const char * const ddrphycfg_parents[] = {
+	"clk26m",
+	"syspll3_d2",
+	"syspll2_d4",
+	"syspll1_d8",
+};
+
+static const char * const mm_parents[] = {
+	"clk26m",
+	"imgpll_ck",
+	"univpll1_d2",
+	"syspll1_d2",
+};
+
+static const char * const pwm_parents[] = {
+	"clk26m",
+	"univpll2_d4",
+	"ulposc_d2",
+	"ulposc_d3",
+	"ulposc_d8",
+	"ulposc_d10",
+	"ulposc_d4",
+};
+
+static const char * const vdec_parents[] = {
+	"clk26m",
+	"vdecpll_ck",
+	"imgpll_ck",
+	"syspll_d3",
+	"univpll_d5",
+	"clk26m",
+	"clk26m",
+};
+
+static const char * const venc_parents[] = {
+	"clk26m",
+	"codecpll_ck",
+	"syspll_d3",
+};
+
+static const char * const mfg_parents[] = {
+	"clk26m",
+	"mfgpll_ck",
+	"syspll_d3",
+	"univpll_d3",
+};
+
+static const char * const camtg[] = {
+	"clk26m",
+	"univpll_d26",
+	"univpll2_d2",
+};
+
+static const char * const uart_parents[] = {
+	"clk26m",
+	"univpll2_d8",
+};
+
+static const char * const spi_parents[] = {
+	"clk26m",
+	"syspll3_d2",
+	"syspll2_d4",
+	"ulposc_spi_ck_mux",
+};
+
+static const char * const ulposc_spi_ck_mux_parents[] = {
+	"ulposc_d2",
+	"ulposc_d3",
+};
+
+static const char * const usb20_parents[] = {
+	"clk26m",
+	"univpll1_d8",
+	"syspll4_d2",
+};
+
+static const char * const msdc50_0_hclk_parents[] = {
+	"clk26m",
+	"syspll1_d2",
+	"syspll2_d2",
+	"syspll4_d2",
+};
+
+static const char * const msdc50_0_parents[] = {
+	"clk26m",
+	"msdcpll",
+	"syspll_d3",
+	"univpll1_d4",
+	"syspll2_d2",
+	"syspll_d7",
+	"msdcpll_d2",
+	"univpll1_d2",
+	"univpll_d3",
+};
+
+static const char * const msdc30_1_parents[] = {
+	"clk26m",
+	"univpll2_d2",
+	"msdcpll_d2",
+	"univpll1_d4",
+	"syspll2_d2",
+	"syspll_d7",
+	"univpll_d7",
+};
+
+static const char * const msdc30_2_parents[] = {
+	"clk26m",
+	"univpll2_d8",
+	"syspll2_d8",
+	"syspll1_d8",
+	"msdcpll_d8",
+	"syspll3_d4",
+	"univpll_d26",
+};
+
+static const char * const audio_parents[] = {
+	"clk26m",
+	"syspll3_d4",
+	"syspll4_d4",
+	"syspll1_d16",
+};
+
+static const char * const aud_intbus_parents[] = {
+	"clk26m",
+	"syspll1_d4",
+	"syspll4_d2",
+};
+
+static const char * const pmicspi_parents[] = {
+	"clk26m",
+	"univpll_d26",
+	"syspll3_d4",
+	"syspll1_d8",
+	"ulposc_d4",
+	"ulposc_d8",
+	"syspll2_d8",
+};
+
+static const char * const scp_parents[] = {
+	"clk26m",
+	"syspll_d3",
+	"ulposc_ck",
+	"univpll_d5",
+};
+
+static const char * const atb_parents[] = {
+	"clk26m",
+	"syspll1_d2",
+	"syspll_d5",
+};
+
+static const char * const mjc_parents[] = {
+	"clk26m",
+	"imgpll_ck",
+	"univpll_d5",
+	"syspll1_d2",
+};
+
+static const char * const dpi0_parents[] = {
+	"clk26m",
+	"tvdpll_d2",
+	"tvdpll_d4",
+	"tvdpll_d8",
+	"tvdpll_d16",
+	"clk26m",
+	"clk26m",
+};
+
+static const char * const aud_1_parents[] = {
+	"clk26m",
+	"apll1_ck",
+};
+
+static const char * const aud_2_parents[] = {
+	"clk26m",
+	"apll2_ck",
+};
+
+static const char * const ssusb_top_sys_parents[] = {
+	"clk26m",
+	"univpll3_d2",
+};
+
+static const char * const spm_parents[] = {
+	"clk26m",
+	"syspll1_d8",
+};
+
+static const char * const bsi_spi_parents[] = {
+	"clk26m",
+	"syspll_d3_d3",
+	"syspll1_d4",
+	"syspll_d7",
+};
+
+static const char * const audio_h_parents[] = {
+	"clk26m",
+	"apll2_ck",
+	"apll1_ck",
+	"univpll_d7",
+};
+
+static const char * const mfg_52m_parents[] = {
+	"clk26m",
+	"univpll2_d8",
+	"univpll2_d4",
+	"univpll2_d4",
+};
+
+static const char * const anc_md32_parents[] = {
+	"clk26m",
+	"syspll1_d2",
+	"univpll_d5",
+};
+
+static const struct mtk_composite top_muxes[] = {
+	MUX(CLK_TOP_MUX_ULPOSC_AXI_CK_MUX_PRE, "ulposc_axi_ck_mux_pre",
+	    ulposc_axi_ck_mux_pre_parents, 0x0040, 3, 1),
+	MUX(CLK_TOP_MUX_ULPOSC_AXI_CK_MUX, "ulposc_axi_ck_mux",
+	    ulposc_axi_ck_mux_parents, 0x0040, 2, 1),
+	MUX(CLK_TOP_MUX_AXI, "axi_sel", axi_parents,
+	    0x0040, 0, 2),
+	MUX(CLK_TOP_MUX_DDRPHYCFG, "ddrphycfg_sel", ddrphycfg_parents,
+	    0x0040, 16, 2),
+	MUX(CLK_TOP_MUX_MM, "mm_sel", mm_parents,
+	    0x0040, 24, 2),
+	MUX_GATE(CLK_TOP_MUX_PWM, "pwm_sel", pwm_parents, 0x0050, 0, 3, 7),
+	MUX_GATE(CLK_TOP_MUX_VDEC, "vdec_sel", vdec_parents, 0x0050, 8, 3, 15),
+	MUX_GATE(CLK_TOP_MUX_VENC, "venc_sel", venc_parents, 0x0050, 16, 2, 23),
+	MUX_GATE(CLK_TOP_MUX_MFG, "mfg_sel", mfg_parents, 0x0050, 24, 2, 31),
+	MUX_GATE(CLK_TOP_MUX_CAMTG, "camtg_sel", camtg, 0x0060, 0, 2, 7),
+	MUX_GATE(CLK_TOP_MUX_UART, "uart_sel", uart_parents, 0x0060, 8, 1, 15),
+	MUX_GATE(CLK_TOP_MUX_SPI, "spi_sel", spi_parents, 0x0060, 16, 2, 23),
+	MUX(CLK_TOP_MUX_ULPOSC_SPI_CK_MUX, "ulposc_spi_ck_mux",
+	    ulposc_spi_ck_mux_parents, 0x0060, 18, 1),
+	MUX_GATE(CLK_TOP_MUX_USB20, "usb20_sel", usb20_parents,
+		 0x0060, 24, 2, 31),
+	MUX(CLK_TOP_MUX_MSDC50_0_HCLK, "msdc50_0_hclk_sel",
+	    msdc50_0_hclk_parents, 0x0070, 8, 2),
+	MUX_GATE(CLK_TOP_MUX_MSDC50_0, "msdc50_0_sel", msdc50_0_parents,
+		 0x0070, 16, 4, 23),
+	MUX_GATE(CLK_TOP_MUX_MSDC30_1, "msdc30_1_sel", msdc30_1_parents,
+		 0x0070, 24, 3, 31),
+	MUX_GATE(CLK_TOP_MUX_MSDC30_2, "msdc30_2_sel", msdc30_2_parents,
+		 0x0080, 0, 3, 7),
+	MUX_GATE(CLK_TOP_MUX_AUDIO, "audio_sel", audio_parents,
+		 0x0080, 16, 2, 23),
+	MUX(CLK_TOP_MUX_AUD_INTBUS, "aud_intbus_sel", aud_intbus_parents,
+	    0x0080, 24, 2),
+	MUX(CLK_TOP_MUX_PMICSPI, "pmicspi_sel", pmicspi_parents,
+	    0x0090, 0, 3),
+	MUX(CLK_TOP_MUX_SCP, "scp_sel", scp_parents,
+	    0x0090, 8, 2),
+	MUX(CLK_TOP_MUX_ATB, "atb_sel", atb_parents,
+	    0x0090, 16, 2),
+	MUX_GATE(CLK_TOP_MUX_MJC, "mjc_sel", mjc_parents, 0x0090, 24, 2, 31),
+	MUX_GATE(CLK_TOP_MUX_DPI0, "dpi0_sel", dpi0_parents, 0x00A0, 0, 3, 7),
+	MUX_GATE(CLK_TOP_MUX_AUD_1, "aud_1_sel", aud_1_parents,
+		 0x00A0, 16, 1, 23),
+	MUX_GATE(CLK_TOP_MUX_AUD_2, "aud_2_sel", aud_2_parents,
+		 0x00A0, 24, 1, 31),
+	MUX(CLK_TOP_MUX_SSUSB_TOP_SYS, "ssusb_top_sys_sel",
+	    ssusb_top_sys_parents, 0x00B0, 8, 1),
+	MUX(CLK_TOP_MUX_SPM, "spm_sel", spm_parents,
+	    0x00C0, 0, 1),
+	MUX(CLK_TOP_MUX_BSI_SPI, "bsi_spi_sel", bsi_spi_parents,
+	    0x00C0, 8, 2),
+	MUX_GATE(CLK_TOP_MUX_AUDIO_H, "audio_h_sel", audio_h_parents,
+		 0x00C0, 16, 2, 23),
+	MUX_GATE(CLK_TOP_MUX_ANC_MD32, "anc_md32_sel", anc_md32_parents,
+		 0x00C0, 24, 2, 31),
+	MUX(CLK_TOP_MUX_MFG_52M, "mfg_52m_sel", mfg_52m_parents,
+	    0x0104, 1, 2),
+};
+
+static int mtk_topckgen_init(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	void __iomem *base;
+	struct device_node *node = pdev->dev.of_node;
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	clk_data = mtk_alloc_clk_data(CLK_TOP_NR);
+
+	mtk_clk_register_factors(top_fixed_divs, ARRAY_SIZE(top_fixed_divs),
+				 clk_data);
+
+	mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base,
+				    &mt6797_clk_lock, clk_data);
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static const struct mtk_gate_regs infra0_cg_regs = {
+	.set_ofs = 0x0080,
+	.clr_ofs = 0x0084,
+	.sta_ofs = 0x0090,
+};
+
+static const struct mtk_gate_regs infra1_cg_regs = {
+	.set_ofs = 0x0088,
+	.clr_ofs = 0x008c,
+	.sta_ofs = 0x0094,
+};
+
+static const struct mtk_gate_regs infra2_cg_regs = {
+	.set_ofs = 0x00a8,
+	.clr_ofs = 0x00ac,
+	.sta_ofs = 0x00b0,
+};
+
+#define GATE_ICG0(_id, _name, _parent, _shift) {	\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &infra0_cg_regs,			\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr,		\
+}
+
+#define GATE_ICG1(_id, _name, _parent, _shift) {	\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &infra1_cg_regs,			\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr,		\
+}
+
+#define GATE_ICG2(_id, _name, _parent, _shift) {	\
+	.id = _id,					\
+	.name = _name,					\
+	.parent_name = _parent,				\
+	.regs = &infra2_cg_regs,			\
+	.shift = _shift,				\
+	.ops = &mtk_clk_gate_ops_setclr,		\
+}
+
+static const struct mtk_gate infra_clks[] = {
+	GATE_ICG0(CLK_INFRA_PMIC_TMR, "infra_pmic_tmr", "ulposc", 0),
+	GATE_ICG0(CLK_INFRA_PMIC_AP, "infra_pmic_ap", "pmicspi_sel", 1),
+	GATE_ICG0(CLK_INFRA_PMIC_MD, "infra_pmic_md", "pmicspi_sel", 2),
+	GATE_ICG0(CLK_INFRA_PMIC_CONN, "infra_pmic_conn", "pmicspi_sel", 3),
+	GATE_ICG0(CLK_INFRA_SCP, "infra_scp", "scp_sel", 4),
+	GATE_ICG0(CLK_INFRA_SEJ, "infra_sej", "axi_sel", 5),
+	GATE_ICG0(CLK_INFRA_APXGPT, "infra_apxgpt", "axi_sel", 6),
+	GATE_ICG0(CLK_INFRA_SEJ_13M, "infra_sej_13m", "clk26m", 7),
+	GATE_ICG0(CLK_INFRA_ICUSB, "infra_icusb", "usb20_sel", 8),
+	GATE_ICG0(CLK_INFRA_GCE, "infra_gce", "axi_sel", 9),
+	GATE_ICG0(CLK_INFRA_THERM, "infra_therm", "axi_sel", 10),
+	GATE_ICG0(CLK_INFRA_I2C0, "infra_i2c0", "axi_sel", 11),
+	GATE_ICG0(CLK_INFRA_I2C1, "infra_i2c1", "axi_sel", 12),
+	GATE_ICG0(CLK_INFRA_I2C2, "infra_i2c2", "axi_sel", 13),
+	GATE_ICG0(CLK_INFRA_I2C3, "infra_i2c3", "axi_sel", 14),
+	GATE_ICG0(CLK_INFRA_PWM_HCLK, "infra_pwm_hclk", "axi_sel", 15),
+	GATE_ICG0(CLK_INFRA_PWM1, "infra_pwm1", "axi_sel", 16),
+	GATE_ICG0(CLK_INFRA_PWM2, "infra_pwm2", "axi_sel", 17),
+	GATE_ICG0(CLK_INFRA_PWM3, "infra_pwm3", "axi_sel", 18),
+	GATE_ICG0(CLK_INFRA_PWM4, "infra_pwm4", "axi_sel", 19),
+	GATE_ICG0(CLK_INFRA_PWM, "infra_pwm", "axi_sel", 21),
+	GATE_ICG0(CLK_INFRA_UART0, "infra_uart0", "uart_sel", 22),
+	GATE_ICG0(CLK_INFRA_UART1, "infra_uart1", "uart_sel", 23),
+	GATE_ICG0(CLK_INFRA_UART2, "infra_uart2", "uart_sel", 24),
+	GATE_ICG0(CLK_INFRA_UART3, "infra_uart3", "uart_sel", 25),
+	GATE_ICG0(CLK_INFRA_MD2MD_CCIF_0, "infra_md2md_ccif_0", "axi_sel", 27),
+	GATE_ICG0(CLK_INFRA_MD2MD_CCIF_1, "infra_md2md_ccif_1", "axi_sel", 28),
+	GATE_ICG0(CLK_INFRA_MD2MD_CCIF_2, "infra_md2md_ccif_2", "axi_sel", 29),
+	GATE_ICG0(CLK_INFRA_FHCTL, "infra_fhctl", "clk26m", 30),
+	GATE_ICG0(CLK_INFRA_BTIF, "infra_btif", "axi_sel", 31),
+	GATE_ICG1(CLK_INFRA_MD2MD_CCIF_3, "infra_md2md_ccif_3", "axi_sel", 0),
+	GATE_ICG1(CLK_INFRA_SPI, "infra_spi", "spi_sel", 1),
+	GATE_ICG1(CLK_INFRA_MSDC0, "infra_msdc0", "msdc50_0_sel", 2),
+	GATE_ICG1(CLK_INFRA_MD2MD_CCIF_4, "infra_md2md_ccif_4", "axi_sel", 3),
+	GATE_ICG1(CLK_INFRA_MSDC1, "infra_msdc1", "msdc30_1_sel", 4),
+	GATE_ICG1(CLK_INFRA_MSDC2, "infra_msdc2", "msdc30_2_sel", 5),
+	GATE_ICG1(CLK_INFRA_MD2MD_CCIF_5, "infra_md2md_ccif_5", "axi_sel", 7),
+	GATE_ICG1(CLK_INFRA_GCPU, "infra_gcpu", "axi_sel", 8),
+	GATE_ICG1(CLK_INFRA_TRNG, "infra_trng", "axi_sel", 9),
+	GATE_ICG1(CLK_INFRA_AUXADC, "infra_auxadc", "clk26m", 10),
+	GATE_ICG1(CLK_INFRA_CPUM, "infra_cpum", "axi_sel", 11),
+	GATE_ICG1(CLK_INFRA_AP_C2K_CCIF_0, "infra_ap_c2k_ccif_0",
+		  "axi_sel", 12),
+	GATE_ICG1(CLK_INFRA_AP_C2K_CCIF_1, "infra_ap_c2k_ccif_1",
+		  "axi_sel", 13),
+	GATE_ICG1(CLK_INFRA_CLDMA, "infra_cldma", "axi_sel", 16),
+	GATE_ICG1(CLK_INFRA_DISP_PWM, "infra_disp_pwm", "pwm_sel", 17),
+	GATE_ICG1(CLK_INFRA_AP_DMA, "infra_ap_dma", "axi_sel", 18),
+	GATE_ICG1(CLK_INFRA_DEVICE_APC, "infra_device_apc", "axi_sel", 20),
+	GATE_ICG1(CLK_INFRA_L2C_SRAM, "infra_l2c_sram", "mm_sel", 22),
+	GATE_ICG1(CLK_INFRA_CCIF_AP, "infra_ccif_ap", "axi_sel", 23),
+	GATE_ICG1(CLK_INFRA_AUDIO, "infra_audio", "axi_sel", 25),
+	GATE_ICG1(CLK_INFRA_CCIF_MD, "infra_ccif_md", "axi_sel", 26),
+	GATE_ICG1(CLK_INFRA_DRAMC_F26M, "infra_dramc_f26m", "clk26m", 31),
+	GATE_ICG2(CLK_INFRA_I2C4, "infra_i2c4", "axi_sel", 0),
+	GATE_ICG2(CLK_INFRA_I2C_APPM, "infra_i2c_appm", "axi_sel", 1),
+	GATE_ICG2(CLK_INFRA_I2C_GPUPM, "infra_i2c_gpupm", "axi_sel", 2),
+	GATE_ICG2(CLK_INFRA_I2C2_IMM, "infra_i2c2_imm", "axi_sel", 3),
+	GATE_ICG2(CLK_INFRA_I2C2_ARB, "infra_i2c2_arb", "axi_sel", 4),
+	GATE_ICG2(CLK_INFRA_I2C3_IMM, "infra_i2c3_imm", "axi_sel", 5),
+	GATE_ICG2(CLK_INFRA_I2C3_ARB, "infra_i2c3_arb", "axi_sel", 6),
+	GATE_ICG2(CLK_INFRA_I2C5, "infra_i2c5", "axi_sel", 7),
+	GATE_ICG2(CLK_INFRA_SYS_CIRQ, "infra_sys_cirq", "axi_sel", 8),
+	GATE_ICG2(CLK_INFRA_SPI1, "infra_spi1", "spi_sel", 10),
+	GATE_ICG2(CLK_INFRA_DRAMC_B_F26M, "infra_dramc_b_f26m", "clk26m", 11),
+	GATE_ICG2(CLK_INFRA_ANC_MD32, "infra_anc_md32", "anc_md32_sel", 12),
+	GATE_ICG2(CLK_INFRA_ANC_MD32_32K, "infra_anc_md32_32k", "clk26m", 13),
+	GATE_ICG2(CLK_INFRA_DVFS_SPM1, "infra_dvfs_spm1", "axi_sel", 15),
+	GATE_ICG2(CLK_INFRA_AES_TOP0, "infra_aes_top0", "axi_sel", 16),
+	GATE_ICG2(CLK_INFRA_AES_TOP1, "infra_aes_top1", "axi_sel", 17),
+	GATE_ICG2(CLK_INFRA_SSUSB_BUS, "infra_ssusb_bus", "axi_sel", 18),
+	GATE_ICG2(CLK_INFRA_SPI2, "infra_spi2", "spi_sel", 19),
+	GATE_ICG2(CLK_INFRA_SPI3, "infra_spi3", "spi_sel", 20),
+	GATE_ICG2(CLK_INFRA_SPI4, "infra_spi4", "spi_sel", 21),
+	GATE_ICG2(CLK_INFRA_SPI5, "infra_spi5", "spi_sel", 22),
+	GATE_ICG2(CLK_INFRA_IRTX, "infra_irtx", "spi_sel", 23),
+	GATE_ICG2(CLK_INFRA_SSUSB_SYS, "infra_ssusb_sys",
+		  "ssusb_top_sys_sel", 24),
+	GATE_ICG2(CLK_INFRA_SSUSB_REF, "infra_ssusb_ref", "clk26m", 9),
+	GATE_ICG2(CLK_INFRA_AUDIO_26M, "infra_audio_26m", "clk26m", 26),
+	GATE_ICG2(CLK_INFRA_AUDIO_26M_PAD_TOP, "infra_audio_26m_pad_top",
+		  "clk26m", 27),
+	GATE_ICG2(CLK_INFRA_MODEM_TEMP_SHARE, "infra_modem_temp_share",
+		  "axi_sel", 28),
+	GATE_ICG2(CLK_INFRA_VAD_WRAP_SOC, "infra_vad_wrap_soc", "axi_sel", 29),
+	GATE_ICG2(CLK_INFRA_DRAMC_CONF, "infra_dramc_conf", "axi_sel", 30),
+	GATE_ICG2(CLK_INFRA_DRAMC_B_CONF, "infra_dramc_b_conf", "axi_sel", 31),
+	GATE_ICG1(CLK_INFRA_MFG_VCG, "infra_mfg_vcg", "mfg_52m_sel", 14),
+};
+
+static const struct mtk_fixed_factor infra_fixed_divs[] = {
+	FACTOR(CLK_INFRA_13M, "clk13m", "clk26m", 1, 2),
+};
+
+static struct clk_onecell_data *infra_clk_data;
+
+static void mtk_infrasys_init_early(struct device_node *node)
+{
+	int r, i;
+
+	if (!infra_clk_data) {
+		infra_clk_data = mtk_alloc_clk_data(CLK_INFRA_NR);
+
+		for (i = 0; i < CLK_INFRA_NR; i++)
+			infra_clk_data->clks[i] = ERR_PTR(-EPROBE_DEFER);
+	}
+
+	mtk_clk_register_factors(infra_fixed_divs, ARRAY_SIZE(infra_fixed_divs),
+				 infra_clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, infra_clk_data);
+	if (r)
+		pr_err("%s(): could not register clock provider: %d\n",
+		       __func__, r);
+}
+
+CLK_OF_DECLARE_DRIVER(mtk_infra, "mediatek,mt6797-infracfg",
+		      mtk_infrasys_init_early);
+
+static int mtk_infrasys_init(struct platform_device *pdev)
+{
+	int r, i;
+	struct device_node *node = pdev->dev.of_node;
+
+	if (!infra_clk_data) {
+		infra_clk_data = mtk_alloc_clk_data(CLK_INFRA_NR);
+	} else {
+		for (i = 0; i < CLK_INFRA_NR; i++) {
+			if (infra_clk_data->clks[i] == ERR_PTR(-EPROBE_DEFER))
+				infra_clk_data->clks[i] = ERR_PTR(-ENOENT);
+		}
+	}
+
+	mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks),
+			       infra_clk_data);
+	mtk_clk_register_factors(infra_fixed_divs, ARRAY_SIZE(infra_fixed_divs),
+				 infra_clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, infra_clk_data);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+#define MT6797_PLL_FMAX		(3000UL * MHZ)
+
+#define CON0_MT6797_RST_BAR	BIT(24)
+
+#define PLL_B(_id, _name, _reg, _pwr_reg, _en_mask, _flags, _pcwbits,	\
+			_pd_reg, _pd_shift, _tuner_reg, _pcw_reg,	\
+			_pcw_shift, _div_table) {			\
+	.id = _id,						\
+	.name = _name,						\
+	.reg = _reg,						\
+	.pwr_reg = _pwr_reg,					\
+	.en_mask = _en_mask,					\
+	.flags = _flags,					\
+	.rst_bar_mask = CON0_MT6797_RST_BAR,			\
+	.fmax = MT6797_PLL_FMAX,				\
+	.pcwbits = _pcwbits,					\
+	.pd_reg = _pd_reg,					\
+	.pd_shift = _pd_shift,					\
+	.tuner_reg = _tuner_reg,				\
+	.pcw_reg = _pcw_reg,					\
+	.pcw_shift = _pcw_shift,				\
+	.div_table = _div_table,				\
+}
+
+#define PLL(_id, _name, _reg, _pwr_reg, _en_mask, _flags, _pcwbits,	\
+			_pd_reg, _pd_shift, _tuner_reg, _pcw_reg,	\
+			_pcw_shift)					\
+		PLL_B(_id, _name, _reg, _pwr_reg, _en_mask, _flags, _pcwbits, \
+			_pd_reg, _pd_shift, _tuner_reg, _pcw_reg, _pcw_shift, \
+			NULL)
+
+static const struct mtk_pll_data plls[] = {
+	PLL(CLK_APMIXED_MAINPLL, "mainpll", 0x0220, 0x022C, 0xF0000101, PLL_AO,
+	    21, 0x220, 4, 0x0, 0x224, 0),
+	PLL(CLK_APMIXED_UNIVPLL, "univpll", 0x0230, 0x023C, 0xFE000011, 0, 7,
+	    0x230, 4, 0x0, 0x234, 14),
+	PLL(CLK_APMIXED_MFGPLL, "mfgpll", 0x0240, 0x024C, 0x00000101, 0, 21,
+	    0x244, 24, 0x0, 0x244, 0),
+	PLL(CLK_APMIXED_MSDCPLL, "msdcpll", 0x0250, 0x025C, 0x00000121, 0, 21,
+	    0x250, 4, 0x0, 0x254, 0),
+	PLL(CLK_APMIXED_IMGPLL, "imgpll", 0x0260, 0x026C, 0x00000121, 0, 21,
+	    0x260, 4, 0x0, 0x264, 0),
+	PLL(CLK_APMIXED_TVDPLL, "tvdpll", 0x0270, 0x027C, 0xC0000121, 0, 21,
+	    0x270, 4, 0x0, 0x274, 0),
+	PLL(CLK_APMIXED_CODECPLL, "codecpll", 0x0290, 0x029C, 0x00000121, 0, 21,
+	    0x290, 4, 0x0, 0x294, 0),
+	PLL(CLK_APMIXED_VDECPLL, "vdecpll", 0x02E4, 0x02F0, 0x00000121, 0, 21,
+	    0x2E4, 4, 0x0, 0x2E8, 0),
+	PLL(CLK_APMIXED_APLL1, "apll1", 0x02A0, 0x02B0, 0x00000131, 0, 31,
+	    0x2A0, 4, 0x2A8, 0x2A4, 0),
+	PLL(CLK_APMIXED_APLL2, "apll2", 0x02B4, 0x02C4, 0x00000131, 0, 31,
+	    0x2B4, 4, 0x2BC, 0x2B8, 0),
+};
+
+static int mtk_apmixedsys_init(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR);
+	if (!clk_data)
+		return -ENOMEM;
+
+	mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data);
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static const struct of_device_id of_match_clk_mt6797[] = {
+	{
+		.compatible = "mediatek,mt6797-topckgen",
+		.data = mtk_topckgen_init,
+	}, {
+		.compatible = "mediatek,mt6797-infracfg",
+		.data = mtk_infrasys_init,
+	}, {
+		.compatible = "mediatek,mt6797-apmixedsys",
+		.data = mtk_apmixedsys_init,
+	}, {
+		/* sentinel */
+	}
+};
+
+static int clk_mt6797_probe(struct platform_device *pdev)
+{
+	int (*clk_init)(struct platform_device *);
+	int r;
+
+	clk_init = of_device_get_match_data(&pdev->dev);
+	if (!clk_init)
+		return -EINVAL;
+
+	r = clk_init(pdev);
+	if (r)
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt6797_drv = {
+	.probe = clk_mt6797_probe,
+	.driver = {
+		.name = "clk-mt6797",
+		.of_match_table = of_match_clk_mt6797,
+	},
+};
+
+static int __init clk_mt6797_init(void)
+{
+	return platform_driver_register(&clk_mt6797_drv);
+}
+
+arch_initcall(clk_mt6797_init);
-- 
cgit v1.2.3


From a5dd63efda3d07b50c0feda6fb4796bece237e61 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 31 Jan 2017 07:45:13 -0800
Subject: lockdep: Use "WARNING" tag on lockdep splats

This commit changes lockdep splats to begin lines with "WARNING" and
to use pr_warn() instead of printk().  This change eases scripted
analysis of kernel console output.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/locking/lockdep.c       | 86 +++++++++++++++++++++---------------------
 kernel/locking/rtmutex-debug.c |  9 +++--
 2 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index a95e5d1f4a9c..e9d4f85b290c 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1144,10 +1144,10 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
 		return 0;
 
 	printk("\n");
-	printk("======================================================\n");
-	printk("[ INFO: possible circular locking dependency detected ]\n");
+	pr_warn("======================================================\n");
+	pr_warn("WARNING: possible circular locking dependency detected\n");
 	print_kernel_ident();
-	printk("-------------------------------------------------------\n");
+	pr_warn("------------------------------------------------------\n");
 	printk("%s/%d is trying to acquire lock:\n",
 		curr->comm, task_pid_nr(curr));
 	print_lock(check_src);
@@ -1482,11 +1482,11 @@ print_bad_irq_dependency(struct task_struct *curr,
 		return 0;
 
 	printk("\n");
-	printk("======================================================\n");
-	printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
+	pr_warn("=====================================================\n");
+	pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n",
 		irqclass, irqclass);
 	print_kernel_ident();
-	printk("------------------------------------------------------\n");
+	pr_warn("-----------------------------------------------------\n");
 	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
 		curr->comm, task_pid_nr(curr),
 		curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
@@ -1711,10 +1711,10 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 		return 0;
 
 	printk("\n");
-	printk("=============================================\n");
-	printk("[ INFO: possible recursive locking detected ]\n");
+	pr_warn("============================================\n");
+	pr_warn("WARNING: possible recursive locking detected\n");
 	print_kernel_ident();
-	printk("---------------------------------------------\n");
+	pr_warn("--------------------------------------------\n");
 	printk("%s/%d is trying to acquire lock:\n",
 		curr->comm, task_pid_nr(curr));
 	print_lock(next);
@@ -2061,10 +2061,10 @@ static void print_collision(struct task_struct *curr,
 			struct lock_chain *chain)
 {
 	printk("\n");
-	printk("======================\n");
-	printk("[chain_key collision ]\n");
+	pr_warn("============================\n");
+	pr_warn("WARNING: chain_key collision\n");
 	print_kernel_ident();
-	printk("----------------------\n");
+	pr_warn("----------------------------\n");
 	printk("%s/%d: ", current->comm, task_pid_nr(current));
 	printk("Hash chain already cached but the contents don't match!\n");
 
@@ -2360,10 +2360,10 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 		return 0;
 
 	printk("\n");
-	printk("=================================\n");
-	printk("[ INFO: inconsistent lock state ]\n");
+	pr_warn("================================\n");
+	pr_warn("WARNING: inconsistent lock state\n");
 	print_kernel_ident();
-	printk("---------------------------------\n");
+	pr_warn("--------------------------------\n");
 
 	printk("inconsistent {%s} -> {%s} usage.\n",
 		usage_str[prev_bit], usage_str[new_bit]);
@@ -2425,10 +2425,10 @@ print_irq_inversion_bug(struct task_struct *curr,
 		return 0;
 
 	printk("\n");
-	printk("=========================================================\n");
-	printk("[ INFO: possible irq lock inversion dependency detected ]\n");
+	pr_warn("========================================================\n");
+	pr_warn("WARNING: possible irq lock inversion dependency detected\n");
 	print_kernel_ident();
-	printk("---------------------------------------------------------\n");
+	pr_warn("--------------------------------------------------------\n");
 	printk("%s/%d just changed the state of lock:\n",
 		curr->comm, task_pid_nr(curr));
 	print_lock(this);
@@ -3170,10 +3170,10 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
 		return 0;
 
 	printk("\n");
-	printk("==================================\n");
-	printk("[ BUG: Nested lock was not taken ]\n");
+	pr_warn("==================================\n");
+	pr_warn("WARNING: Nested lock was not taken\n");
 	print_kernel_ident();
-	printk("----------------------------------\n");
+	pr_warn("----------------------------------\n");
 
 	printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
 	print_lock(hlock);
@@ -3383,10 +3383,10 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
 		return 0;
 
 	printk("\n");
-	printk("=====================================\n");
-	printk("[ BUG: bad unlock balance detected! ]\n");
+	pr_warn("=====================================\n");
+	pr_warn("WARNING: bad unlock balance detected!\n");
 	print_kernel_ident();
-	printk("-------------------------------------\n");
+	pr_warn("-------------------------------------\n");
 	printk("%s/%d is trying to release lock (",
 		curr->comm, task_pid_nr(curr));
 	print_lockdep_cache(lock);
@@ -3880,10 +3880,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
 		return 0;
 
 	printk("\n");
-	printk("=================================\n");
-	printk("[ BUG: bad contention detected! ]\n");
+	pr_warn("=================================\n");
+	pr_warn("WARNING: bad contention detected!\n");
 	print_kernel_ident();
-	printk("---------------------------------\n");
+	pr_warn("---------------------------------\n");
 	printk("%s/%d is trying to contend lock (",
 		curr->comm, task_pid_nr(curr));
 	print_lockdep_cache(lock);
@@ -4244,10 +4244,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
 		return;
 
 	printk("\n");
-	printk("=========================\n");
-	printk("[ BUG: held lock freed! ]\n");
+	pr_warn("=========================\n");
+	pr_warn("WARNING: held lock freed!\n");
 	print_kernel_ident();
-	printk("-------------------------\n");
+	pr_warn("-------------------------\n");
 	printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
 		curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
 	print_lock(hlock);
@@ -4302,11 +4302,11 @@ static void print_held_locks_bug(void)
 		return;
 
 	printk("\n");
-	printk("=====================================\n");
-	printk("[ BUG: %s/%d still has locks held! ]\n",
+	pr_warn("====================================\n");
+	pr_warn("WARNING: %s/%d still has locks held!\n",
 	       current->comm, task_pid_nr(current));
 	print_kernel_ident();
-	printk("-------------------------------------\n");
+	pr_warn("------------------------------------\n");
 	lockdep_print_held_locks(current);
 	printk("\nstack backtrace:\n");
 	dump_stack();
@@ -4371,7 +4371,7 @@ retry:
 	} while_each_thread(g, p);
 
 	printk("\n");
-	printk("=============================================\n\n");
+	pr_warn("=============================================\n\n");
 
 	if (unlock)
 		read_unlock(&tasklist_lock);
@@ -4401,10 +4401,10 @@ asmlinkage __visible void lockdep_sys_exit(void)
 		if (!debug_locks_off())
 			return;
 		printk("\n");
-		printk("================================================\n");
-		printk("[ BUG: lock held when returning to user space! ]\n");
+		pr_warn("================================================\n");
+		pr_warn("WARNING: lock held when returning to user space!\n");
 		print_kernel_ident();
-		printk("------------------------------------------------\n");
+		pr_warn("------------------------------------------------\n");
 		printk("%s/%d is leaving the kernel with locks still held!\n",
 				curr->comm, curr->pid);
 		lockdep_print_held_locks(curr);
@@ -4421,13 +4421,13 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
 	/* Note: the following can be executed concurrently, so be careful. */
 	printk("\n");
-	pr_err("===============================\n");
-	pr_err("[ ERR: suspicious RCU usage.  ]\n");
+	pr_warn("=============================\n");
+	pr_warn("WARNING: suspicious RCU usage\n");
 	print_kernel_ident();
-	pr_err("-------------------------------\n");
-	pr_err("%s:%d %s!\n", file, line, s);
-	pr_err("\nother info that might help us debug this:\n\n");
-	pr_err("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
+	pr_warn("-----------------------------\n");
+	printk("%s:%d %s!\n", file, line, s);
+	printk("\nother info that might help us debug this:\n\n");
+	printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
 	       !rcu_lockdep_current_cpu_online()
 			? "RCU used illegally from offline CPU!\n"
 			: !rcu_is_watching()
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
index 97ee9df32e0f..db4f55211b04 100644
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c
@@ -102,10 +102,11 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
 		return;
 	}
 
-	printk("\n============================================\n");
-	printk(  "[ BUG: circular locking deadlock detected! ]\n");
-	printk("%s\n", print_tainted());
-	printk(  "--------------------------------------------\n");
+	pr_warn("\n");
+	pr_warn("============================================\n");
+	pr_warn("WARNING: circular locking deadlock detected!\n");
+	pr_warn("%s\n", print_tainted());
+	pr_warn("--------------------------------------------\n");
 	printk("%s/%d is deadlocking current task %s/%d\n\n",
 	       task->comm, task_pid_nr(task),
 	       current->comm, task_pid_nr(current));
-- 
cgit v1.2.3


From 1b21401307a6a705688faf1bc9e6e02663708ec1 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Mon, 17 Apr 2017 14:46:45 +0200
Subject: clk: spear: fix ADC clock definition on SPEAr600

There is no SPEAr600 device named "adc". Instead, the description of the
ADC was recently added to the Device Tree, and the device name is
"d820b000.adc", so we should associate the ADC gatable clock to this
device name.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/spear/spear6xx_clock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c
index 7c9383c3c2c6..f911d9f77763 100644
--- a/drivers/clk/spear/spear6xx_clock.c
+++ b/drivers/clk/spear/spear6xx_clock.c
@@ -313,7 +313,7 @@ void __init spear6xx_clk_init(void __iomem *misc_base)
 	/* clock derived from apb clk */
 	clk = clk_register_gate(NULL, "adc_clk", "apb_clk", 0, PERIP1_CLK_ENB,
 			ADC_CLK_ENB, 0, &_lock);
-	clk_register_clkdev(clk, NULL, "adc");
+	clk_register_clkdev(clk, NULL, "d820b000.adc");
 
 	clk = clk_register_fixed_factor(NULL, "gpio0_clk", "apb_clk", 0, 1, 1);
 	clk_register_clkdev(clk, NULL, "f0100000.gpio");
-- 
cgit v1.2.3


From 468d01bec544286bb5283f012b95b5b84636565b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 2 Feb 2017 11:40:15 -0800
Subject: types: Update obsolete callback_head comment

The comment header for callback_head (and thus for rcu_head) states that
the bottom two bits of a pointer to these structures must be zero.  This
is obsolete:  The new requirement is that only the bottom bit need be
zero.  This commit therefore updates this comment.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/types.h b/include/linux/types.h
index 1e7bd24848fc..258099a4ed82 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -209,7 +209,7 @@ struct ustat {
  * naturally due ABI requirements, but some architectures (like CRIS) have
  * weird ABI and we need to ask it explicitly.
  *
- * The alignment is required to guarantee that bits 0 and 1 of @next will be
+ * The alignment is required to guarantee that bit 0 of @next will be
  * clear under normal conditions -- as long as we use call_rcu(),
  * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
  *
-- 
cgit v1.2.3


From 024828800926dcb385b094e2438151647c3f8251 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 3 Feb 2017 09:27:00 -0800
Subject: rcu: Make RCU_FANOUT_LEAF help text more explicit about skew_tick

If you set RCU_FANOUT_LEAF too high, you can get lock contention
on the leaf rcu_node, and you should boot with the skew_tick kernel
parameter set in order to avoid this lock contention.  This commit
therefore upgrades the RCU_FANOUT_LEAF help text to explicitly state
this.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 init/Kconfig | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index a92f27da4a27..c549618c72f0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -612,11 +612,17 @@ config RCU_FANOUT_LEAF
 	  initialization.  These systems tend to run CPU-bound, and thus
 	  are not helped by synchronized interrupts, and thus tend to
 	  skew them, which reduces lock contention enough that large
-	  leaf-level fanouts work well.
+	  leaf-level fanouts work well.  That said, setting leaf-level
+	  fanout to a large number will likely cause problematic
+	  lock contention on the leaf-level rcu_node structures unless
+	  you boot with the skew_tick kernel parameter.
 
 	  Select a specific number if testing RCU itself.
 
-	  Select the maximum permissible value for large systems.
+	  Select the maximum permissible value for large systems, but
+	  please understand that you may also need to set the skew_tick
+	  kernel boot parameter to avoid contention on the rcu_node
+	  structure's locks.
 
 	  Take the default if unsure.
 
-- 
cgit v1.2.3


From d1e4f01d09b88c6df3e9f6b8ca5f1a3882069ccc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 8 Feb 2017 14:58:41 -0800
Subject: rcu: Remove obsolete comment from rcu_future_gp_cleanup() header

The rcu_nocb_gp_cleanup() function is now invoked elsewhere, so this
commit drags this comment into the year 2017.

Reported-by: Michalis Kokologiannakis <mixaskok@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 50fee7689e71..bdaa69d23a8a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1793,9 +1793,7 @@ out:
 
 /*
  * Clean up any old requests for the just-ended grace period.  Also return
- * whether any additional grace periods have been requested.  Also invoke
- * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
- * waiting for this grace period to complete.
+ * whether any additional grace periods have been requested.
  */
 static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 {
-- 
cgit v1.2.3


From 48ac34666ff76843d8743db1cc78b303759916f1 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 27 Feb 2017 21:14:19 +0200
Subject: hlist_add_tail_rcu disable sparse warning

sparse is unhappy about this code in hlist_add_tail_rcu:

        struct hlist_node *i, *last = NULL;

        for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i))
                last = i;

This is because hlist_next_rcu and hlist_next_rcu return
__rcu pointers.

It's a false positive - it's a write side primitive and so
does not need to be called in a read side critical section.

The following trivial patch disables the warning
without changing the behaviour in any way.

Note: __hlist_for_each_rcu would also remove the warning but it would be
confusing since it calls rcu_derefence and is designed to run in the rcu
read side critical section.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 4f7a9561b8c4..b1fd8bf85fdc 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -509,7 +509,8 @@ static inline void hlist_add_tail_rcu(struct hlist_node *n,
 {
 	struct hlist_node *i, *last = NULL;
 
-	for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i))
+	/* Note: write side code, so rcu accessors are not needed. */
+	for (i = h->first; i; i = i->next)
 		last = i;
 
 	if (last) {
-- 
cgit v1.2.3


From deb34f3643980832c03bdfb315d5a7e3371bd269 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 23 Mar 2017 13:21:30 -0700
Subject: rcu: Improve comments for hotplug/suspend/hibernate functions

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 41 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index bdaa69d23a8a..c4f195dd7c94 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3894,6 +3894,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
+/*
+ * Invoked early in the CPU-online process, when pretty much all
+ * services are available.  The incoming CPU is not present.
+ */
 int rcutree_prepare_cpu(unsigned int cpu)
 {
 	struct rcu_state *rsp;
@@ -3907,6 +3911,9 @@ int rcutree_prepare_cpu(unsigned int cpu)
 	return 0;
 }
 
+/*
+ * Update RCU priority boot kthread affinity for CPU-hotplug changes.
+ */
 static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
 {
 	struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
@@ -3914,6 +3921,10 @@ static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
 	rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);
 }
 
+/*
+ * Near the end of the CPU-online process.  Pretty much all services
+ * enabled, and the CPU is now very much alive.
+ */
 int rcutree_online_cpu(unsigned int cpu)
 {
 	sync_sched_exp_online_cleanup(cpu);
@@ -3921,13 +3932,19 @@ int rcutree_online_cpu(unsigned int cpu)
 	return 0;
 }
 
+/*
+ * Near the beginning of the process.  The CPU is still very much alive
+ * with pretty much all services enabled.
+ */
 int rcutree_offline_cpu(unsigned int cpu)
 {
 	rcutree_affinity_setting(cpu, cpu);
 	return 0;
 }
 
-
+/*
+ * Near the end of the offline process.  We do only tracing here.
+ */
 int rcutree_dying_cpu(unsigned int cpu)
 {
 	struct rcu_state *rsp;
@@ -3937,6 +3954,9 @@ int rcutree_dying_cpu(unsigned int cpu)
 	return 0;
 }
 
+/*
+ * The outgoing CPU is gone and we are running elsewhere.
+ */
 int rcutree_dead_cpu(unsigned int cpu)
 {
 	struct rcu_state *rsp;
@@ -3954,6 +3974,10 @@ int rcutree_dead_cpu(unsigned int cpu)
  * incoming CPUs are not allowed to use RCU read-side critical sections
  * until this function is called.  Failing to observe this restriction
  * will result in lockdep splats.
+ *
+ * Note that this function is special in that it is invoked directly
+ * from the incoming CPU rather than from the cpuhp_step mechanism.
+ * This is because this function must be invoked at a precise location.
  */
 void rcu_cpu_starting(unsigned int cpu)
 {
@@ -3976,9 +4000,6 @@ void rcu_cpu_starting(unsigned int cpu)
 
 #ifdef CONFIG_HOTPLUG_CPU
 /*
- * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
- * function.  We now remove it from the rcu_node tree's ->qsmaskinit
- * bit masks.
  * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
  * function.  We now remove it from the rcu_node tree's ->qsmaskinit
  * bit masks.
@@ -3997,6 +4018,14 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
+/*
+ * The outgoing function has no further need of RCU, so remove it from
+ * the list of CPUs that RCU must track.
+ *
+ * Note that this function is special in that it is invoked directly
+ * from the outgoing CPU rather than from the cpuhp_step mechanism.
+ * This is because this function must be invoked at a precise location.
+ */
 void rcu_report_dead(unsigned int cpu)
 {
 	struct rcu_state *rsp;
@@ -4011,6 +4040,10 @@ void rcu_report_dead(unsigned int cpu)
 }
 #endif
 
+/*
+ * On non-huge systems, use expedited RCU grace periods to make suspend
+ * and hibernation run faster.
+ */
 static int rcu_pm_notify(struct notifier_block *self,
 			 unsigned long action, void *hcpu)
 {
-- 
cgit v1.2.3


From 6bb7ff175ea63dffebadd3f0e921618332809af9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 25 Mar 2017 10:19:50 -0700
Subject: torture: Use correct path for Kconfig fragment for duplicates

Currently, the rcutorture scripting will give an error message if
running a duplicate scenario that happens also to have a non-existent
build directory (b1, b2, ... in the rcutorture directory).  Worse yet, if
the build directory has already been created and used for a real build,
the script will silently grab the wrong Kconfig fragment, which could
cause confusion to the poor sap (me) analyzing old test results.  At
least the actual test runs correctly...

This commit therefore accesses the Kconfig fragment from the results
directory corresponding to the first of the duplicate scenarios, for
which a build was actually carried out.  This prevents both the messages
and at least one form of later confusion.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index ea6e373edc27..93eede4e8fbe 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -170,7 +170,7 @@ qemu_append="`identify_qemu_append "$QEMU"`"
 # Pull in Kconfig-fragment boot parameters
 boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
 # Generate kernel-version-specific boot parameters
-boot_args="`per_version_boot_params "$boot_args" $builddir/.config $seconds`"
+boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
 
 if test -n "$TORTURE_BUILDONLY"
 then
-- 
cgit v1.2.3


From 50dc7def4a9e05d1329e0cc129a5146821130998 Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <der.herr@hofr.at>
Date: Sat, 25 Mar 2017 20:46:01 +0100
Subject: rcu: Use bool value directly

The beenonline variable is declared bool so there is no need for an
explicit comparison, especially not against the constant zero.

Signed-off-by: Nicholas Mc Guire <der.herr@hofr.at>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c4f195dd7c94..7c238604df18 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3085,7 +3085,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 	bool needwake;
 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
 
-	WARN_ON_ONCE(rdp->beenonline == 0);
+	WARN_ON_ONCE(!rdp->beenonline);
 
 	/* Update RCU state based on any recent quiescent states. */
 	rcu_check_quiescent_state(rsp, rdp);
-- 
cgit v1.2.3


From 5455a7f6a8dc66a354c6dbf0a9f40e7cdacd11db Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <der.herr@hofr.at>
Date: Sat, 25 Mar 2017 20:46:02 +0100
Subject: rcu: Use true/false in assignment to bool

This commit makes the parse_rcu_nocb_poll() function assign true
(rather than the constant 1) to the bool variable rcu_nocb_poll.

Signed-off-by: Nicholas Mc Guire <der.herr@hofr.at>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree_plugin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0a62a8f1caac..f4b7a9be1a44 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1709,7 +1709,7 @@ __setup("rcu_nocbs=", rcu_nocb_setup);
 
 static int __init parse_rcu_nocb_poll(char *arg)
 {
-	rcu_nocb_poll = 1;
+	rcu_nocb_poll = true;
 	return 0;
 }
 early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
-- 
cgit v1.2.3


From bfd090be14edeff2040b65f1a396c5343a0210c5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 8 Feb 2017 14:49:27 -0800
Subject: rcu: Fix typo in PER_RCU_NODE_PERIOD header comment

This commit just changes a "the the" to "the" to reduce repetition.

Reported-by: Michalis Kokologiannakis <mixaskok@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7c238604df18..b1679e8cc5ed 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -199,7 +199,7 @@ static const int gp_cleanup_delay;
 
 /*
  * Number of grace periods between delays, normalized by the duration of
- * the delay.  The longer the the delay, the more the grace periods between
+ * the delay.  The longer the delay, the more the grace periods between
  * each delay.  The reason for this normalization is that it means that,
  * for non-zero delays, the overall slowdown of grace periods is constant
  * regardless of the duration of the delay.  This arrangement balances
-- 
cgit v1.2.3


From e24f5287cd88ebd0df524b55b70b1fcd6d76d8c6 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Mon, 10 Apr 2017 14:00:14 -0700
Subject: clk: imx7d: fix USDHC NAND clock

The USDHC NAND root clock is not gated by any CCM clock gate. Remove
the bogus gate definition.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Acked-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/imx/clk-imx7d.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c
index 562055129ed8..93b03640da9b 100644
--- a/drivers/clk/imx/clk-imx7d.c
+++ b/drivers/clk/imx/clk-imx7d.c
@@ -724,7 +724,7 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_MAIN_AXI_ROOT_DIV] = imx_clk_divider2("axi_post_div", "axi_pre_div", base + 0x8800, 0, 6);
 	clks[IMX7D_DISP_AXI_ROOT_DIV] = imx_clk_divider2("disp_axi_post_div", "disp_axi_pre_div", base + 0x8880, 0, 6);
 	clks[IMX7D_ENET_AXI_ROOT_DIV] = imx_clk_divider2("enet_axi_post_div", "enet_axi_pre_div", base + 0x8900, 0, 6);
-	clks[IMX7D_NAND_USDHC_BUS_ROOT_DIV] = imx_clk_divider2("nand_usdhc_post_div", "nand_usdhc_pre_div", base + 0x8980, 0, 6);
+	clks[IMX7D_NAND_USDHC_BUS_ROOT_CLK] = imx_clk_divider2("nand_usdhc_root_clk", "nand_usdhc_pre_div", base + 0x8980, 0, 6);
 	clks[IMX7D_AHB_CHANNEL_ROOT_DIV] = imx_clk_divider2("ahb_root_clk", "ahb_pre_div", base + 0x9000, 0, 6);
 	clks[IMX7D_IPG_ROOT_CLK] = imx_clk_divider2("ipg_root_clk", "ahb_root_clk", base + 0x9080, 0, 2);
 	clks[IMX7D_DRAM_ROOT_DIV] = imx_clk_divider2("dram_post_div", "dram_cg", base + 0x9880, 0, 3);
@@ -798,7 +798,6 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_ENET_AXI_ROOT_CLK] = imx_clk_gate4("enet_axi_root_clk", "enet_axi_post_div", base + 0x4060, 0);
 	clks[IMX7D_OCRAM_CLK] = imx_clk_gate4("ocram_clk", "axi_post_div", base + 0x4110, 0);
 	clks[IMX7D_OCRAM_S_CLK] = imx_clk_gate4("ocram_s_clk", "ahb_root_clk", base + 0x4120, 0);
-	clks[IMX7D_NAND_USDHC_BUS_ROOT_CLK] = imx_clk_gate4("nand_usdhc_root_clk", "nand_usdhc_post_div", base + 0x4130, 0);
 	clks[IMX7D_DRAM_ROOT_CLK] = imx_clk_gate4("dram_root_clk", "dram_post_div", base + 0x4130, 0);
 	clks[IMX7D_DRAM_PHYM_ROOT_CLK] = imx_clk_gate4("dram_phym_root_clk", "dram_phym_cg", base + 0x4130, 0);
 	clks[IMX7D_DRAM_PHYM_ALT_ROOT_CLK] = imx_clk_gate4("dram_phym_alt_root_clk", "dram_phym_alt_post_div", base + 0x4130, 0);
-- 
cgit v1.2.3


From 46a7253df472721e97cc9d9d088d223aed6f870a Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 21 Mar 2017 10:12:59 +0000
Subject: clk: qcom: clk-smd-rpm: fix rate for branch clks during handoff

rpm branch clk rate should requested as either 0 or 1 but not INT_MAX.
This patch fixes rate request for branch clocks during clk handoff.

Suggested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/qcom/clk-smd-rpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c
index 3487c267833e..d990fe44aef3 100644
--- a/drivers/clk/qcom/clk-smd-rpm.c
+++ b/drivers/clk/qcom/clk-smd-rpm.c
@@ -165,7 +165,7 @@ static int clk_smd_rpm_handoff(struct clk_smd_rpm *r)
 	struct clk_smd_rpm_req req = {
 		.key = cpu_to_le32(r->rpm_key),
 		.nbytes = cpu_to_le32(sizeof(u32)),
-		.value = cpu_to_le32(INT_MAX),
+		.value = cpu_to_le32(r->branch ? 1 : INT_MAX),
 	};
 
 	ret = qcom_rpm_smd_write(r->rpm, QCOM_SMD_RPM_ACTIVE_STATE,
-- 
cgit v1.2.3


From 8973aa4aecac223548366ca81818309a0f0efa6d Mon Sep 17 00:00:00 2001
From: Bharat Kumar Reddy Gooty <bharat.gooty@broadcom.com>
Date: Mon, 20 Mar 2017 18:12:14 -0400
Subject: clk: ns2: Correct SDIO bits

Corrected the bits for power and iso.

Signed-off-by: Bharat Kumar Reddy Gooty <bharat.gooty@broadcom.com>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: f7225a83 ("clk: ns2: add clock support for Broadcom Northstar 2 SoC")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/bcm/clk-ns2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/bcm/clk-ns2.c b/drivers/clk/bcm/clk-ns2.c
index a564e9248814..adc14145861a 100644
--- a/drivers/clk/bcm/clk-ns2.c
+++ b/drivers/clk/bcm/clk-ns2.c
@@ -103,7 +103,7 @@ CLK_OF_DECLARE(ns2_genpll_src_clk, "brcm,ns2-genpll-scr",
 
 static const struct iproc_pll_ctrl genpll_sw = {
 	.flags = IPROC_CLK_AON | IPROC_CLK_PLL_SPLIT_STAT_CTRL,
-	.aon = AON_VAL(0x0, 2, 9, 8),
+	.aon = AON_VAL(0x0, 1, 11, 10),
 	.reset = RESET_VAL(0x4, 2, 1),
 	.dig_filter = DF_VAL(0x0, 9, 3, 5, 4, 2, 3),
 	.ndiv_int = REG_VAL(0x8, 4, 10),
-- 
cgit v1.2.3


From 41ee7caf59e1c68d696162523cdf1fa7f717a731 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 20 Feb 2017 14:23:56 -0600
Subject: clk: x86: add "mclk" alias for Baytrail/Cherrytrail

Due to timing requirements, TI and Conexant manage the audio
reference clock from their ASoC codec drivers using the "mclk"
string. This patch adds another lookup for the "pmc_plt_clk_3"
clock to avoid Intel-specific tests in those codec drivers and
use code as-is.

To avoid a leak, clk_add_alias() is not used in this patch.
Instead the lookup is created manually as part of the .probe()
step and dropped in the .remove() step.

"pmc_plt_clk_3" is used exclusively for audio on all known
Baytrail/CherryTrail designs and is e.g. routed on the MCLK
(pin 26) of the MinnowBoardMAX Turbot LSE connector.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/x86/clk-pmc-atom.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c
index 2b60577703ef..fafc99120dc2 100644
--- a/drivers/clk/x86/clk-pmc-atom.c
+++ b/drivers/clk/x86/clk-pmc-atom.c
@@ -54,6 +54,7 @@ struct clk_plt_data {
 	struct clk_plt_fixed **parents;
 	u8 nparents;
 	struct clk_plt *clks[PMC_CLK_NUM];
+	struct clk_lookup *mclk_lookup;
 };
 
 /* Return an index in parent table */
@@ -337,6 +338,11 @@ static int plt_clk_probe(struct platform_device *pdev)
 			goto err_unreg_clk_plt;
 		}
 	}
+	data->mclk_lookup = clkdev_hw_create(&data->clks[3]->hw, "mclk", NULL);
+	if (IS_ERR(data->mclk_lookup)) {
+		err = PTR_ERR(data->mclk_lookup);
+		goto err_unreg_clk_plt;
+	}
 
 	plt_clk_free_parent_names_loop(parent_names, data->nparents);
 
@@ -356,6 +362,7 @@ static int plt_clk_remove(struct platform_device *pdev)
 
 	data = platform_get_drvdata(pdev);
 
+	clkdev_drop(data->mclk_lookup);
 	plt_clk_unregister_loop(data, PMC_CLK_NUM);
 	plt_clk_unregister_parents(data);
 	return 0;
-- 
cgit v1.2.3


From 4a43e35d19c1379c255a8abab3d6303ed382dcf1 Mon Sep 17 00:00:00 2001
From: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Date: Thu, 9 Feb 2017 17:20:30 +0200
Subject: clk: qcom: add parent for venus core0 and core1 gdsc's

Make venus_gdsc parent of venus gdsc core0 and core1.

Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/qcom/mmcc-msm8996.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/clk/qcom/mmcc-msm8996.c b/drivers/clk/qcom/mmcc-msm8996.c
index 20d8fea32a99..352394d8fd8c 100644
--- a/drivers/clk/qcom/mmcc-msm8996.c
+++ b/drivers/clk/qcom/mmcc-msm8996.c
@@ -2944,6 +2944,7 @@ static struct gdsc venus_core0_gdsc = {
 	.pd = {
 		.name = "venus_core0",
 	},
+	.parent = &venus_gdsc.pd,
 	.pwrsts = PWRSTS_OFF_ON,
 	.flags = HW_CTRL,
 };
@@ -2955,6 +2956,7 @@ static struct gdsc venus_core1_gdsc = {
 	.pd = {
 		.name = "venus_core1",
 	},
+	.parent = &venus_gdsc.pd,
 	.pwrsts = PWRSTS_OFF_ON,
 	.flags = HW_CTRL,
 };
-- 
cgit v1.2.3


From d127967a7b18598b8922bf9a4b984fae899787dc Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 18 Apr 2017 02:18:19 +0000
Subject: clk: cs2000: enable clock skipping mode

CLK_IN skipping mode allows the PLL to maintain lock even when the
CLK_IN signal has missing pulses for up to 20 ms (t CS) at a time.
This patch enables it

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-cs2000-cp.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/clk/clk-cs2000-cp.c b/drivers/clk/clk-cs2000-cp.c
index 4df38c5ff96c..a8fa6bdd0e55 100644
--- a/drivers/clk/clk-cs2000-cp.c
+++ b/drivers/clk/clk-cs2000-cp.c
@@ -54,6 +54,7 @@
 #define ENDEV2		(0x1)
 
 /* FUNC_CFG1 */
+#define CLKSKIPEN	(1 << 7)
 #define REFCLKDIV(x)	(((x) & 0x3) << 3)
 #define REFCLKDIV_MASK	REFCLKDIV(0x3)
 
@@ -122,6 +123,11 @@ static int cs2000_enable_dev_config(struct cs2000_priv *priv, bool enable)
 	if (ret < 0)
 		return ret;
 
+	ret = cs2000_bset(priv, FUNC_CFG1, CLKSKIPEN,
+			  enable ? CLKSKIPEN : 0);
+	if (ret < 0)
+		return ret;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From b295a015cdfdbf36ce87076dbac8a490f2797cad Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 18 Apr 2017 02:19:37 +0000
Subject: clk: cs2000: tidyup DEVICE_CFG2 settings

DEVICE_CFG2 can select ratio from user defined ratio and LOCKCLK is
for it. But current driver sets fixed 0 value. This patch fixes it.
Note is that current cs2000 driver is using/supporting only ratio0
(= ch0) now.

DEVICE_CFG2 can select STATIC/DYNAMIC ratio mode, and current cs2000
driver is selecting STATIC mode, but it was not understandable on
current code. This patch also solve this issue.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-cs2000-cp.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/clk-cs2000-cp.c b/drivers/clk/clk-cs2000-cp.c
index a8fa6bdd0e55..f3ab0cad2c9d 100644
--- a/drivers/clk/clk-cs2000-cp.c
+++ b/drivers/clk/clk-cs2000-cp.c
@@ -48,7 +48,9 @@
 #define AUTORMOD	(1 << 3)
 #define LOCKCLK(x)	(((x) & 0x3) << 1)
 #define LOCKCLK_MASK	LOCKCLK(0x3)
-#define FRACNSRC	(1 << 0)
+#define FRACNSRC_MASK	(1 << 0)
+#define FRACNSRC_STATIC		(0 << 0)
+#define FRACNSRC_DYNAMIC	(1 << 1)
 
 /* GLOBAL_CFG */
 #define ENDEV2		(0x1)
@@ -267,8 +269,8 @@ static int cs2000_ratio_select(struct cs2000_priv *priv, int ch)
 		return ret;
 
 	ret = cs2000_bset(priv, DEVICE_CFG2,
-			  (AUTORMOD | LOCKCLK_MASK | FRACNSRC),
-			  0);
+			  (AUTORMOD | LOCKCLK_MASK | FRACNSRC_MASK),
+			  (LOCKCLK(ch) | FRACNSRC_STATIC));
 	if (ret < 0)
 		return ret;
 
-- 
cgit v1.2.3


From b3da896e2f325f8fef29f698d9cbdbb0e821a474 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 18 Apr 2017 02:20:03 +0000
Subject: clk: cs2000: select 12.20 High Accuracy on LFRatioCfg

cs2000 can select Static/Dynamic ratio based Frequency Synthesizer
Mode, it can select 20.12 High Multiplier interpret for 32-bit
User Defined Ratio if Dynamic ratio mode. Otherwise it should select
12.20 High Accuracy mode.
Current cs2000 is supporting Static ratio mode only, so it should
select 12.20 High Accuracy mode, not 20.12 High Multiplier mode.
This patch fixes it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-cs2000-cp.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/clk/clk-cs2000-cp.c b/drivers/clk/clk-cs2000-cp.c
index f3ab0cad2c9d..c54baede4d68 100644
--- a/drivers/clk/clk-cs2000-cp.c
+++ b/drivers/clk/clk-cs2000-cp.c
@@ -60,6 +60,11 @@
 #define REFCLKDIV(x)	(((x) & 0x3) << 3)
 #define REFCLKDIV_MASK	REFCLKDIV(0x3)
 
+/* FUNC_CFG2 */
+#define LFRATIO_MASK	(1 << 3)
+#define LFRATIO_20_12	(0 << 3)
+#define LFRATIO_12_20	(1 << 3)
+
 #define CH_SIZE_ERR(ch)		((ch < 0) || (ch >= CH_MAX))
 #define hw_to_priv(_hw)		container_of(_hw, struct cs2000_priv, hw)
 #define priv_to_client(priv)	(priv->client)
@@ -130,6 +135,12 @@ static int cs2000_enable_dev_config(struct cs2000_priv *priv, bool enable)
 	if (ret < 0)
 		return ret;
 
+	/* FIXME: for Static ratio mode */
+	ret = cs2000_bset(priv, FUNC_CFG2, LFRATIO_MASK,
+			  LFRATIO_12_20);
+	if (ret < 0)
+		return ret;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7b9bae176a5c63fb467149174c11a48f3b3c5947 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Tue, 18 Apr 2017 09:25:47 +0200
Subject: clk: hisilicon: Use kcalloc() in hisi_clk_init()

* A multiplication for the size determination of a memory allocation
  indicated that an array data structure should be processed.
  Thus use the corresponding function "kcalloc".

  This issue was detected by using the Coccinelle software.

* Replace the specification of a data type by a pointer dereference
  to make the corresponding size determination a bit safer according to
  the Linux coding style convention.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/hisilicon/clk.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/clk/hisilicon/clk.c b/drivers/clk/hisilicon/clk.c
index 9ba2d91f4d3a..9268e80b7566 100644
--- a/drivers/clk/hisilicon/clk.c
+++ b/drivers/clk/hisilicon/clk.c
@@ -85,8 +85,7 @@ struct hisi_clock_data *hisi_clk_init(struct device_node *np,
 		goto err;
 	}
 	clk_data->base = base;
-
-	clk_table = kzalloc(sizeof(struct clk *) * nr_clks, GFP_KERNEL);
+	clk_table = kcalloc(nr_clks, sizeof(*clk_table), GFP_KERNEL);
 	if (!clk_table) {
 		pr_err("%s: could not allocate clock lookup table\n", __func__);
 		goto err_data;
-- 
cgit v1.2.3


From 8d9bdc46c780325414af7f09827c78c69f9b1e3d Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Tue, 18 Apr 2017 10:15:19 +0200
Subject: clk: hisilicon: Use devm_kmalloc_array() in hisi_clk_alloc()

* A multiplication for the size determination of a memory allocation
  indicated that an array data structure should be processed.
  Thus use the corresponding function "devm_kmalloc_array".

  This issue was detected by using the Coccinelle software.

* Replace the specification of a data type by a pointer dereference
  to make the corresponding size determination a bit safer according to
  the Linux coding style convention.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/hisilicon/clk.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/hisilicon/clk.c b/drivers/clk/hisilicon/clk.c
index 9268e80b7566..7e1e22eb5689 100644
--- a/drivers/clk/hisilicon/clk.c
+++ b/drivers/clk/hisilicon/clk.c
@@ -54,8 +54,9 @@ struct hisi_clock_data *hisi_clk_alloc(struct platform_device *pdev,
 	if (!clk_data->base)
 		return NULL;
 
-	clk_table = devm_kmalloc(&pdev->dev, sizeof(struct clk *) * nr_clks,
-				GFP_KERNEL);
+	clk_table = devm_kmalloc_array(&pdev->dev, nr_clks,
+				       sizeof(*clk_table),
+				       GFP_KERNEL);
 	if (!clk_table)
 		return NULL;
 
-- 
cgit v1.2.3


From 840e56326fd90a435644ee0eeba99f4cddd31759 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Tue, 18 Apr 2017 10:12:32 +0200
Subject: clk: hisilicon: Delete error messages for failed memory allocations
 in hisi_clk_init()

The script "checkpatch.pl" pointed information out like the following.

WARNING: Possible unnecessary 'out of memory' message

Thus remove such statements here.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/hisilicon/clk.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/hisilicon/clk.c b/drivers/clk/hisilicon/clk.c
index 7e1e22eb5689..b73c1dfae7f1 100644
--- a/drivers/clk/hisilicon/clk.c
+++ b/drivers/clk/hisilicon/clk.c
@@ -81,16 +81,14 @@ struct hisi_clock_data *hisi_clk_init(struct device_node *np,
 	}
 
 	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
-	if (!clk_data) {
-		pr_err("%s: could not allocate clock data\n", __func__);
+	if (!clk_data)
 		goto err;
-	}
+
 	clk_data->base = base;
 	clk_table = kcalloc(nr_clks, sizeof(*clk_table), GFP_KERNEL);
-	if (!clk_table) {
-		pr_err("%s: could not allocate clock lookup table\n", __func__);
+	if (!clk_table)
 		goto err_data;
-	}
+
 	clk_data->clk_data.clks = clk_table;
 	clk_data->clk_data.clk_num = nr_clks;
 	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data->clk_data);
-- 
cgit v1.2.3


From 781de7ade6c3ccc5efca9c635d64c1c07427a6d1 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Tue, 18 Apr 2017 10:30:04 +0200
Subject: clk: hi3620: Use kcalloc() in hi3620_mmc_clk_init()

* A multiplication for the size determination of a memory allocation
  indicated that an array data structure should be processed.
  Thus use the corresponding function "kcalloc".

  This issue was detected by using the Coccinelle software.

* Replace the specification of a data type by a pointer dereference
  to make the corresponding size determination a bit safer according to
  the Linux coding style convention.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/hisilicon/clk-hi3620.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c
index d04a104ce1b4..f6dd971c9d3c 100644
--- a/drivers/clk/hisilicon/clk-hi3620.c
+++ b/drivers/clk/hisilicon/clk-hi3620.c
@@ -482,7 +482,7 @@ static void __init hi3620_mmc_clk_init(struct device_node *node)
 	if (WARN_ON(!clk_data))
 		return;
 
-	clk_data->clks = kzalloc(sizeof(struct clk *) * num, GFP_KERNEL);
+	clk_data->clks = kcalloc(num, sizeof(*clk_data->clks), GFP_KERNEL);
 	if (!clk_data->clks) {
 		pr_err("%s: fail to allocate mmc clk\n", __func__);
 		return;
-- 
cgit v1.2.3


From 3cda284b77cba833baea9a0f5e256a34d249761f Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Tue, 18 Apr 2017 10:50:53 +0200
Subject: clk: hi3620: Delete error messages for a failed memory allocation in
 two functions

The script "checkpatch.pl" pointed information out like the following.

WARNING: Possible unnecessary 'out of memory' message

Thus remove such statements here.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/hisilicon/clk-hi3620.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c
index f6dd971c9d3c..33a713ac8fe6 100644
--- a/drivers/clk/hisilicon/clk-hi3620.c
+++ b/drivers/clk/hisilicon/clk-hi3620.c
@@ -430,10 +430,8 @@ static struct clk *hisi_register_clk_mmc(struct hisi_mmc_clock *mmc_clk,
 	struct clk_init_data init;
 
 	mclk = kzalloc(sizeof(*mclk), GFP_KERNEL);
-	if (!mclk) {
-		pr_err("%s: fail to allocate mmc clk\n", __func__);
+	if (!mclk)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	init.name = mmc_clk->name;
 	init.ops = &clk_mmc_ops;
@@ -483,10 +481,8 @@ static void __init hi3620_mmc_clk_init(struct device_node *node)
 		return;
 
 	clk_data->clks = kcalloc(num, sizeof(*clk_data->clks), GFP_KERNEL);
-	if (!clk_data->clks) {
-		pr_err("%s: fail to allocate mmc clk\n", __func__);
+	if (!clk_data->clks)
 		return;
-	}
 
 	for (i = 0; i < num; i++) {
 		struct hisi_mmc_clock *mmc_clk = &hi3620_mmc_clks[i];
-- 
cgit v1.2.3


From 34675d67032ae568331a2a230973c7b58ddcaf9d Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Tue, 18 Apr 2017 11:15:56 +0200
Subject: clk: hi3620: Fix a typo in one variable name

The script "checkpatch.pl" pointed information out like the following.

CHECK: 'seperated' may be misspelled - perhaps 'separated'?

Thus rename the affected variable.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/hisilicon/clk-hi3620.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c
index 33a713ac8fe6..fa0fba653898 100644
--- a/drivers/clk/hisilicon/clk-hi3620.c
+++ b/drivers/clk/hisilicon/clk-hi3620.c
@@ -144,7 +144,7 @@ static struct hisi_divider_clock hi3620_div_clks[] __initdata = {
 	{ HI3620_MMC3_DIV,     "mmc3_div",   "mmc3_mux",  0, 0x140, 5, 4, CLK_DIVIDER_HIWORD_MASK, NULL, },
 };
 
-static struct hisi_gate_clock hi3620_seperated_gate_clks[] __initdata = {
+static struct hisi_gate_clock hi3620_separated_gate_clks[] __initdata = {
 	{ HI3620_TIMERCLK01,   "timerclk01",   "timer_rclk01", CLK_SET_RATE_PARENT, 0x20, 0, 0, },
 	{ HI3620_TIMER_RCLK01, "timer_rclk01", "rclk_tcxo",    CLK_SET_RATE_PARENT, 0x20, 1, 0, },
 	{ HI3620_TIMERCLK23,   "timerclk23",   "timer_rclk23", CLK_SET_RATE_PARENT, 0x20, 2, 0, },
@@ -224,8 +224,8 @@ static void __init hi3620_clk_init(struct device_node *np)
 			      clk_data);
 	hisi_clk_register_divider(hi3620_div_clks, ARRAY_SIZE(hi3620_div_clks),
 				  clk_data);
-	hisi_clk_register_gate_sep(hi3620_seperated_gate_clks,
-				   ARRAY_SIZE(hi3620_seperated_gate_clks),
+	hisi_clk_register_gate_sep(hi3620_separated_gate_clks,
+				   ARRAY_SIZE(hi3620_separated_gate_clks),
 				   clk_data);
 }
 CLK_OF_DECLARE(hi3620_clk, "hisilicon,hi3620-clock", hi3620_clk_init);
-- 
cgit v1.2.3


From 8918821f370467937afdda4bf7aa6c9c1a732be5 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 23 Feb 2017 00:34:00 +0000
Subject: jffs2: fix spelling mistake: "requestied" -> "requested"

trivial fix to spelling mistake in JFFS2_ERROR message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
[Brian: also fix 'an' -> 'a']
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 fs/jffs2/readinode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 06a71dbd4833..389ea53ea487 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1366,7 +1366,7 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
 		jffs2_add_ino_cache(c, f->inocache);
 	}
 	if (!f->inocache) {
-		JFFS2_ERROR("requestied to read an nonexistent ino %u\n", ino);
+		JFFS2_ERROR("requested to read a nonexistent ino %u\n", ino);
 		return -ENOENT;
 	}
 
-- 
cgit v1.2.3


From 4a67c9fde04fc1b6752fa68c495310ca3ed29eeb Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Fri, 31 Mar 2017 11:11:48 +0200
Subject: mtd: use dev_of_node helper in mtd_get_of_node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows better compile-time optimizations with CONFIG_OF disabled.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/mtd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index eebdc63cf6af..f8db5b2e4028 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -393,7 +393,7 @@ static inline void mtd_set_of_node(struct mtd_info *mtd,
 
 static inline struct device_node *mtd_get_of_node(struct mtd_info *mtd)
 {
-	return mtd->dev.of_node;
+	return dev_of_node(&mtd->dev);
 }
 
 static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops)
-- 
cgit v1.2.3


From 8c925b263584e5a37244297ea9bd072020265fd4 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 30 Mar 2017 17:36:39 +0200
Subject: mtd: physmap_of: really fix the physmap add-ons

The current way of building the of_physmap add-ons result in just
the add-on being in the object code, and not the actual core
implementation and regress the Gemini and Versatile.

Bake the physmap_of.o object by baking physmap_of_core.o and
adding the Versatile and/or Gemini add-ons to the final object.
Rename the source file physmap_of_core.c to get the desired
build components.

Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Fixes: 4f04f68e1598 ("mtd: physmap_of: fixup gemini/versatile dependencies")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/maps/Makefile          |  10 +-
 drivers/mtd/maps/physmap_of.c      | 389 -------------------------------------
 drivers/mtd/maps/physmap_of_core.c | 389 +++++++++++++++++++++++++++++++++++++
 3 files changed, 393 insertions(+), 395 deletions(-)
 delete mode 100644 drivers/mtd/maps/physmap_of.c
 create mode 100644 drivers/mtd/maps/physmap_of_core.c

diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index aef1846b4de2..5a09a72ab112 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -17,12 +17,10 @@ obj-$(CONFIG_MTD_CK804XROM)	+= ck804xrom.o
 obj-$(CONFIG_MTD_TSUNAMI)	+= tsunami_flash.o
 obj-$(CONFIG_MTD_PXA2XX)	+= pxa2xx-flash.o
 obj-$(CONFIG_MTD_PHYSMAP)	+= physmap.o
-ifdef CONFIG_MTD_PHYSMAP_OF_VERSATILE
-physmap_of-objs += physmap_of_versatile.o
-endif
-ifdef CONFIG_MTD_PHYSMAP_OF_GEMINI
-physmap_of-objs += physmap_of_gemini.o
-endif
+physmap_of-objs-y		+= physmap_of_core.o
+physmap_of-objs-$(CONFIG_MTD_PHYSMAP_OF_VERSATILE) += physmap_of_versatile.o
+physmap_of-objs-$(CONFIG_MTD_PHYSMAP_OF_GEMINI) += physmap_of_gemini.o
+physmap_of-objs			:= $(physmap_of-objs-y)
 obj-$(CONFIG_MTD_PHYSMAP_OF)	+= physmap_of.o
 obj-$(CONFIG_MTD_PISMO)		+= pismo.o
 obj-$(CONFIG_MTD_PMC_MSP_EVM)   += pmcmsp-flash.o
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
deleted file mode 100644
index 14e8909c9955..000000000000
--- a/drivers/mtd/maps/physmap_of.c
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * Flash mappings described by the OF (or flattened) device tree
- *
- * Copyright (C) 2006 MontaVista Software Inc.
- * Author: Vitaly Wool <vwool@ru.mvista.com>
- *
- * Revised to handle newer style flash binding by:
- *   Copyright (C) 2007 David Gibson, IBM Corporation.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <linux/mtd/concat.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-#include <linux/slab.h>
-#include "physmap_of_gemini.h"
-#include "physmap_of_versatile.h"
-
-struct of_flash_list {
-	struct mtd_info *mtd;
-	struct map_info map;
-	struct resource *res;
-};
-
-struct of_flash {
-	struct mtd_info		*cmtd;
-	int list_size; /* number of elements in of_flash_list */
-	struct of_flash_list	list[0];
-};
-
-static int of_flash_remove(struct platform_device *dev)
-{
-	struct of_flash *info;
-	int i;
-
-	info = dev_get_drvdata(&dev->dev);
-	if (!info)
-		return 0;
-	dev_set_drvdata(&dev->dev, NULL);
-
-	if (info->cmtd) {
-		mtd_device_unregister(info->cmtd);
-		if (info->cmtd != info->list[0].mtd)
-			mtd_concat_destroy(info->cmtd);
-	}
-
-	for (i = 0; i < info->list_size; i++) {
-		if (info->list[i].mtd)
-			map_destroy(info->list[i].mtd);
-
-		if (info->list[i].map.virt)
-			iounmap(info->list[i].map.virt);
-
-		if (info->list[i].res) {
-			release_resource(info->list[i].res);
-			kfree(info->list[i].res);
-		}
-	}
-	return 0;
-}
-
-static const char * const rom_probe_types[] = {
-	"cfi_probe", "jedec_probe", "map_rom" };
-
-/* Helper function to handle probing of the obsolete "direct-mapped"
- * compatible binding, which has an extra "probe-type" property
- * describing the type of flash probe necessary. */
-static struct mtd_info *obsolete_probe(struct platform_device *dev,
-				       struct map_info *map)
-{
-	struct device_node *dp = dev->dev.of_node;
-	const char *of_probe;
-	struct mtd_info *mtd;
-	int i;
-
-	dev_warn(&dev->dev, "Device tree uses obsolete \"direct-mapped\" "
-		 "flash binding\n");
-
-	of_probe = of_get_property(dp, "probe-type", NULL);
-	if (!of_probe) {
-		for (i = 0; i < ARRAY_SIZE(rom_probe_types); i++) {
-			mtd = do_map_probe(rom_probe_types[i], map);
-			if (mtd)
-				return mtd;
-		}
-		return NULL;
-	} else if (strcmp(of_probe, "CFI") == 0) {
-		return do_map_probe("cfi_probe", map);
-	} else if (strcmp(of_probe, "JEDEC") == 0) {
-		return do_map_probe("jedec_probe", map);
-	} else {
-		if (strcmp(of_probe, "ROM") != 0)
-			dev_warn(&dev->dev, "obsolete_probe: don't know probe "
-				 "type '%s', mapping as rom\n", of_probe);
-		return do_map_probe("map_rom", map);
-	}
-}
-
-/* When partitions are set we look for a linux,part-probe property which
-   specifies the list of partition probers to use. If none is given then the
-   default is use. These take precedence over other device tree
-   information. */
-static const char * const part_probe_types_def[] = {
-	"cmdlinepart", "RedBoot", "ofpart", "ofoldpart", NULL };
-
-static const char * const *of_get_probes(struct device_node *dp)
-{
-	const char *cp;
-	int cplen;
-	unsigned int l;
-	unsigned int count;
-	const char **res;
-
-	cp = of_get_property(dp, "linux,part-probe", &cplen);
-	if (cp == NULL)
-		return part_probe_types_def;
-
-	count = 0;
-	for (l = 0; l != cplen; l++)
-		if (cp[l] == 0)
-			count++;
-
-	res = kzalloc((count + 1)*sizeof(*res), GFP_KERNEL);
-	if (!res)
-		return NULL;
-	count = 0;
-	while (cplen > 0) {
-		res[count] = cp;
-		l = strlen(cp) + 1;
-		cp += l;
-		cplen -= l;
-		count++;
-	}
-	return res;
-}
-
-static void of_free_probes(const char * const *probes)
-{
-	if (probes != part_probe_types_def)
-		kfree(probes);
-}
-
-static const struct of_device_id of_flash_match[];
-static int of_flash_probe(struct platform_device *dev)
-{
-	const char * const *part_probe_types;
-	const struct of_device_id *match;
-	struct device_node *dp = dev->dev.of_node;
-	struct resource res;
-	struct of_flash *info;
-	const char *probe_type;
-	const __be32 *width;
-	int err;
-	int i;
-	int count;
-	const __be32 *p;
-	int reg_tuple_size;
-	struct mtd_info **mtd_list = NULL;
-	resource_size_t res_size;
-	bool map_indirect;
-	const char *mtd_name = NULL;
-
-	match = of_match_device(of_flash_match, &dev->dev);
-	if (!match)
-		return -EINVAL;
-	probe_type = match->data;
-
-	reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32);
-
-	of_property_read_string(dp, "linux,mtd-name", &mtd_name);
-
-	/*
-	 * Get number of "reg" tuples. Scan for MTD devices on area's
-	 * described by each "reg" region. This makes it possible (including
-	 * the concat support) to support the Intel P30 48F4400 chips which
-	 * consists internally of 2 non-identical NOR chips on one die.
-	 */
-	p = of_get_property(dp, "reg", &count);
-	if (!p || count % reg_tuple_size != 0) {
-		dev_err(&dev->dev, "Malformed reg property on %s\n",
-				dev->dev.of_node->full_name);
-		err = -EINVAL;
-		goto err_flash_remove;
-	}
-	count /= reg_tuple_size;
-
-	map_indirect = of_property_read_bool(dp, "no-unaligned-direct-access");
-
-	err = -ENOMEM;
-	info = devm_kzalloc(&dev->dev,
-			    sizeof(struct of_flash) +
-			    sizeof(struct of_flash_list) * count, GFP_KERNEL);
-	if (!info)
-		goto err_flash_remove;
-
-	dev_set_drvdata(&dev->dev, info);
-
-	mtd_list = kzalloc(sizeof(*mtd_list) * count, GFP_KERNEL);
-	if (!mtd_list)
-		goto err_flash_remove;
-
-	for (i = 0; i < count; i++) {
-		err = -ENXIO;
-		if (of_address_to_resource(dp, i, &res)) {
-			/*
-			 * Continue with next register tuple if this
-			 * one is not mappable
-			 */
-			continue;
-		}
-
-		dev_dbg(&dev->dev, "of_flash device: %pR\n", &res);
-
-		err = -EBUSY;
-		res_size = resource_size(&res);
-		info->list[i].res = request_mem_region(res.start, res_size,
-						       dev_name(&dev->dev));
-		if (!info->list[i].res)
-			goto err_out;
-
-		err = -ENXIO;
-		width = of_get_property(dp, "bank-width", NULL);
-		if (!width) {
-			dev_err(&dev->dev, "Can't get bank width from device"
-				" tree\n");
-			goto err_out;
-		}
-
-		info->list[i].map.name = mtd_name ?: dev_name(&dev->dev);
-		info->list[i].map.phys = res.start;
-		info->list[i].map.size = res_size;
-		info->list[i].map.bankwidth = be32_to_cpup(width);
-		info->list[i].map.device_node = dp;
-
-		err = of_flash_probe_gemini(dev, dp, &info->list[i].map);
-		if (err)
-			return err;
-		err = of_flash_probe_versatile(dev, dp, &info->list[i].map);
-		if (err)
-			return err;
-
-		err = -ENOMEM;
-		info->list[i].map.virt = ioremap(info->list[i].map.phys,
-						 info->list[i].map.size);
-		if (!info->list[i].map.virt) {
-			dev_err(&dev->dev, "Failed to ioremap() flash"
-				" region\n");
-			goto err_out;
-		}
-
-		simple_map_init(&info->list[i].map);
-
-		/*
-		 * On some platforms (e.g. MPC5200) a direct 1:1 mapping
-		 * may cause problems with JFFS2 usage, as the local bus (LPB)
-		 * doesn't support unaligned accesses as implemented in the
-		 * JFFS2 code via memcpy(). By setting NO_XIP, the
-		 * flash will not be exposed directly to the MTD users
-		 * (e.g. JFFS2) any more.
-		 */
-		if (map_indirect)
-			info->list[i].map.phys = NO_XIP;
-
-		if (probe_type) {
-			info->list[i].mtd = do_map_probe(probe_type,
-							 &info->list[i].map);
-		} else {
-			info->list[i].mtd = obsolete_probe(dev,
-							   &info->list[i].map);
-		}
-
-		/* Fall back to mapping region as ROM */
-		if (!info->list[i].mtd) {
-			dev_warn(&dev->dev,
-				"do_map_probe() failed for type %s\n",
-				 probe_type);
-
-			info->list[i].mtd = do_map_probe("map_rom",
-							 &info->list[i].map);
-		}
-		mtd_list[i] = info->list[i].mtd;
-
-		err = -ENXIO;
-		if (!info->list[i].mtd) {
-			dev_err(&dev->dev, "do_map_probe() failed\n");
-			goto err_out;
-		} else {
-			info->list_size++;
-		}
-		info->list[i].mtd->dev.parent = &dev->dev;
-	}
-
-	err = 0;
-	info->cmtd = NULL;
-	if (info->list_size == 1) {
-		info->cmtd = info->list[0].mtd;
-	} else if (info->list_size > 1) {
-		/*
-		 * We detected multiple devices. Concatenate them together.
-		 */
-		info->cmtd = mtd_concat_create(mtd_list, info->list_size,
-					       dev_name(&dev->dev));
-	}
-	if (info->cmtd == NULL)
-		err = -ENXIO;
-
-	if (err)
-		goto err_out;
-
-	info->cmtd->dev.parent = &dev->dev;
-	mtd_set_of_node(info->cmtd, dp);
-	part_probe_types = of_get_probes(dp);
-	if (!part_probe_types) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	mtd_device_parse_register(info->cmtd, part_probe_types, NULL,
-			NULL, 0);
-	of_free_probes(part_probe_types);
-
-	kfree(mtd_list);
-
-	return 0;
-
-err_out:
-	kfree(mtd_list);
-err_flash_remove:
-	of_flash_remove(dev);
-
-	return err;
-}
-
-static const struct of_device_id of_flash_match[] = {
-	{
-		.compatible	= "cfi-flash",
-		.data		= (void *)"cfi_probe",
-	},
-	{
-		/* FIXME: JEDEC chips can't be safely and reliably
-		 * probed, although the mtd code gets it right in
-		 * practice most of the time.  We should use the
-		 * vendor and device ids specified by the binding to
-		 * bypass the heuristic probe code, but the mtd layer
-		 * provides, at present, no interface for doing so
-		 * :(. */
-		.compatible	= "jedec-flash",
-		.data		= (void *)"jedec_probe",
-	},
-	{
-		.compatible     = "mtd-ram",
-		.data           = (void *)"map_ram",
-	},
-	{
-		.compatible     = "mtd-rom",
-		.data           = (void *)"map_rom",
-	},
-	{
-		.type		= "rom",
-		.compatible	= "direct-mapped"
-	},
-	{ },
-};
-MODULE_DEVICE_TABLE(of, of_flash_match);
-
-static struct platform_driver of_flash_driver = {
-	.driver = {
-		.name = "of-flash",
-		.of_match_table = of_flash_match,
-	},
-	.probe		= of_flash_probe,
-	.remove		= of_flash_remove,
-};
-
-module_platform_driver(of_flash_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Vitaly Wool <vwool@ru.mvista.com>");
-MODULE_DESCRIPTION("Device tree based MTD map driver");
diff --git a/drivers/mtd/maps/physmap_of_core.c b/drivers/mtd/maps/physmap_of_core.c
new file mode 100644
index 000000000000..14e8909c9955
--- /dev/null
+++ b/drivers/mtd/maps/physmap_of_core.c
@@ -0,0 +1,389 @@
+/*
+ * Flash mappings described by the OF (or flattened) device tree
+ *
+ * Copyright (C) 2006 MontaVista Software Inc.
+ * Author: Vitaly Wool <vwool@ru.mvista.com>
+ *
+ * Revised to handle newer style flash binding by:
+ *   Copyright (C) 2007 David Gibson, IBM Corporation.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/concat.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include "physmap_of_gemini.h"
+#include "physmap_of_versatile.h"
+
+struct of_flash_list {
+	struct mtd_info *mtd;
+	struct map_info map;
+	struct resource *res;
+};
+
+struct of_flash {
+	struct mtd_info		*cmtd;
+	int list_size; /* number of elements in of_flash_list */
+	struct of_flash_list	list[0];
+};
+
+static int of_flash_remove(struct platform_device *dev)
+{
+	struct of_flash *info;
+	int i;
+
+	info = dev_get_drvdata(&dev->dev);
+	if (!info)
+		return 0;
+	dev_set_drvdata(&dev->dev, NULL);
+
+	if (info->cmtd) {
+		mtd_device_unregister(info->cmtd);
+		if (info->cmtd != info->list[0].mtd)
+			mtd_concat_destroy(info->cmtd);
+	}
+
+	for (i = 0; i < info->list_size; i++) {
+		if (info->list[i].mtd)
+			map_destroy(info->list[i].mtd);
+
+		if (info->list[i].map.virt)
+			iounmap(info->list[i].map.virt);
+
+		if (info->list[i].res) {
+			release_resource(info->list[i].res);
+			kfree(info->list[i].res);
+		}
+	}
+	return 0;
+}
+
+static const char * const rom_probe_types[] = {
+	"cfi_probe", "jedec_probe", "map_rom" };
+
+/* Helper function to handle probing of the obsolete "direct-mapped"
+ * compatible binding, which has an extra "probe-type" property
+ * describing the type of flash probe necessary. */
+static struct mtd_info *obsolete_probe(struct platform_device *dev,
+				       struct map_info *map)
+{
+	struct device_node *dp = dev->dev.of_node;
+	const char *of_probe;
+	struct mtd_info *mtd;
+	int i;
+
+	dev_warn(&dev->dev, "Device tree uses obsolete \"direct-mapped\" "
+		 "flash binding\n");
+
+	of_probe = of_get_property(dp, "probe-type", NULL);
+	if (!of_probe) {
+		for (i = 0; i < ARRAY_SIZE(rom_probe_types); i++) {
+			mtd = do_map_probe(rom_probe_types[i], map);
+			if (mtd)
+				return mtd;
+		}
+		return NULL;
+	} else if (strcmp(of_probe, "CFI") == 0) {
+		return do_map_probe("cfi_probe", map);
+	} else if (strcmp(of_probe, "JEDEC") == 0) {
+		return do_map_probe("jedec_probe", map);
+	} else {
+		if (strcmp(of_probe, "ROM") != 0)
+			dev_warn(&dev->dev, "obsolete_probe: don't know probe "
+				 "type '%s', mapping as rom\n", of_probe);
+		return do_map_probe("map_rom", map);
+	}
+}
+
+/* When partitions are set we look for a linux,part-probe property which
+   specifies the list of partition probers to use. If none is given then the
+   default is use. These take precedence over other device tree
+   information. */
+static const char * const part_probe_types_def[] = {
+	"cmdlinepart", "RedBoot", "ofpart", "ofoldpart", NULL };
+
+static const char * const *of_get_probes(struct device_node *dp)
+{
+	const char *cp;
+	int cplen;
+	unsigned int l;
+	unsigned int count;
+	const char **res;
+
+	cp = of_get_property(dp, "linux,part-probe", &cplen);
+	if (cp == NULL)
+		return part_probe_types_def;
+
+	count = 0;
+	for (l = 0; l != cplen; l++)
+		if (cp[l] == 0)
+			count++;
+
+	res = kzalloc((count + 1)*sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return NULL;
+	count = 0;
+	while (cplen > 0) {
+		res[count] = cp;
+		l = strlen(cp) + 1;
+		cp += l;
+		cplen -= l;
+		count++;
+	}
+	return res;
+}
+
+static void of_free_probes(const char * const *probes)
+{
+	if (probes != part_probe_types_def)
+		kfree(probes);
+}
+
+static const struct of_device_id of_flash_match[];
+static int of_flash_probe(struct platform_device *dev)
+{
+	const char * const *part_probe_types;
+	const struct of_device_id *match;
+	struct device_node *dp = dev->dev.of_node;
+	struct resource res;
+	struct of_flash *info;
+	const char *probe_type;
+	const __be32 *width;
+	int err;
+	int i;
+	int count;
+	const __be32 *p;
+	int reg_tuple_size;
+	struct mtd_info **mtd_list = NULL;
+	resource_size_t res_size;
+	bool map_indirect;
+	const char *mtd_name = NULL;
+
+	match = of_match_device(of_flash_match, &dev->dev);
+	if (!match)
+		return -EINVAL;
+	probe_type = match->data;
+
+	reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32);
+
+	of_property_read_string(dp, "linux,mtd-name", &mtd_name);
+
+	/*
+	 * Get number of "reg" tuples. Scan for MTD devices on area's
+	 * described by each "reg" region. This makes it possible (including
+	 * the concat support) to support the Intel P30 48F4400 chips which
+	 * consists internally of 2 non-identical NOR chips on one die.
+	 */
+	p = of_get_property(dp, "reg", &count);
+	if (!p || count % reg_tuple_size != 0) {
+		dev_err(&dev->dev, "Malformed reg property on %s\n",
+				dev->dev.of_node->full_name);
+		err = -EINVAL;
+		goto err_flash_remove;
+	}
+	count /= reg_tuple_size;
+
+	map_indirect = of_property_read_bool(dp, "no-unaligned-direct-access");
+
+	err = -ENOMEM;
+	info = devm_kzalloc(&dev->dev,
+			    sizeof(struct of_flash) +
+			    sizeof(struct of_flash_list) * count, GFP_KERNEL);
+	if (!info)
+		goto err_flash_remove;
+
+	dev_set_drvdata(&dev->dev, info);
+
+	mtd_list = kzalloc(sizeof(*mtd_list) * count, GFP_KERNEL);
+	if (!mtd_list)
+		goto err_flash_remove;
+
+	for (i = 0; i < count; i++) {
+		err = -ENXIO;
+		if (of_address_to_resource(dp, i, &res)) {
+			/*
+			 * Continue with next register tuple if this
+			 * one is not mappable
+			 */
+			continue;
+		}
+
+		dev_dbg(&dev->dev, "of_flash device: %pR\n", &res);
+
+		err = -EBUSY;
+		res_size = resource_size(&res);
+		info->list[i].res = request_mem_region(res.start, res_size,
+						       dev_name(&dev->dev));
+		if (!info->list[i].res)
+			goto err_out;
+
+		err = -ENXIO;
+		width = of_get_property(dp, "bank-width", NULL);
+		if (!width) {
+			dev_err(&dev->dev, "Can't get bank width from device"
+				" tree\n");
+			goto err_out;
+		}
+
+		info->list[i].map.name = mtd_name ?: dev_name(&dev->dev);
+		info->list[i].map.phys = res.start;
+		info->list[i].map.size = res_size;
+		info->list[i].map.bankwidth = be32_to_cpup(width);
+		info->list[i].map.device_node = dp;
+
+		err = of_flash_probe_gemini(dev, dp, &info->list[i].map);
+		if (err)
+			return err;
+		err = of_flash_probe_versatile(dev, dp, &info->list[i].map);
+		if (err)
+			return err;
+
+		err = -ENOMEM;
+		info->list[i].map.virt = ioremap(info->list[i].map.phys,
+						 info->list[i].map.size);
+		if (!info->list[i].map.virt) {
+			dev_err(&dev->dev, "Failed to ioremap() flash"
+				" region\n");
+			goto err_out;
+		}
+
+		simple_map_init(&info->list[i].map);
+
+		/*
+		 * On some platforms (e.g. MPC5200) a direct 1:1 mapping
+		 * may cause problems with JFFS2 usage, as the local bus (LPB)
+		 * doesn't support unaligned accesses as implemented in the
+		 * JFFS2 code via memcpy(). By setting NO_XIP, the
+		 * flash will not be exposed directly to the MTD users
+		 * (e.g. JFFS2) any more.
+		 */
+		if (map_indirect)
+			info->list[i].map.phys = NO_XIP;
+
+		if (probe_type) {
+			info->list[i].mtd = do_map_probe(probe_type,
+							 &info->list[i].map);
+		} else {
+			info->list[i].mtd = obsolete_probe(dev,
+							   &info->list[i].map);
+		}
+
+		/* Fall back to mapping region as ROM */
+		if (!info->list[i].mtd) {
+			dev_warn(&dev->dev,
+				"do_map_probe() failed for type %s\n",
+				 probe_type);
+
+			info->list[i].mtd = do_map_probe("map_rom",
+							 &info->list[i].map);
+		}
+		mtd_list[i] = info->list[i].mtd;
+
+		err = -ENXIO;
+		if (!info->list[i].mtd) {
+			dev_err(&dev->dev, "do_map_probe() failed\n");
+			goto err_out;
+		} else {
+			info->list_size++;
+		}
+		info->list[i].mtd->dev.parent = &dev->dev;
+	}
+
+	err = 0;
+	info->cmtd = NULL;
+	if (info->list_size == 1) {
+		info->cmtd = info->list[0].mtd;
+	} else if (info->list_size > 1) {
+		/*
+		 * We detected multiple devices. Concatenate them together.
+		 */
+		info->cmtd = mtd_concat_create(mtd_list, info->list_size,
+					       dev_name(&dev->dev));
+	}
+	if (info->cmtd == NULL)
+		err = -ENXIO;
+
+	if (err)
+		goto err_out;
+
+	info->cmtd->dev.parent = &dev->dev;
+	mtd_set_of_node(info->cmtd, dp);
+	part_probe_types = of_get_probes(dp);
+	if (!part_probe_types) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	mtd_device_parse_register(info->cmtd, part_probe_types, NULL,
+			NULL, 0);
+	of_free_probes(part_probe_types);
+
+	kfree(mtd_list);
+
+	return 0;
+
+err_out:
+	kfree(mtd_list);
+err_flash_remove:
+	of_flash_remove(dev);
+
+	return err;
+}
+
+static const struct of_device_id of_flash_match[] = {
+	{
+		.compatible	= "cfi-flash",
+		.data		= (void *)"cfi_probe",
+	},
+	{
+		/* FIXME: JEDEC chips can't be safely and reliably
+		 * probed, although the mtd code gets it right in
+		 * practice most of the time.  We should use the
+		 * vendor and device ids specified by the binding to
+		 * bypass the heuristic probe code, but the mtd layer
+		 * provides, at present, no interface for doing so
+		 * :(. */
+		.compatible	= "jedec-flash",
+		.data		= (void *)"jedec_probe",
+	},
+	{
+		.compatible     = "mtd-ram",
+		.data           = (void *)"map_ram",
+	},
+	{
+		.compatible     = "mtd-rom",
+		.data           = (void *)"map_rom",
+	},
+	{
+		.type		= "rom",
+		.compatible	= "direct-mapped"
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, of_flash_match);
+
+static struct platform_driver of_flash_driver = {
+	.driver = {
+		.name = "of-flash",
+		.of_match_table = of_flash_match,
+	},
+	.probe		= of_flash_probe,
+	.remove		= of_flash_remove,
+};
+
+module_platform_driver(of_flash_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vitaly Wool <vwool@ru.mvista.com>");
+MODULE_DESCRIPTION("Device tree based MTD map driver");
-- 
cgit v1.2.3


From a897bf138c9b4435e17444efcc8269e9577f5165 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Wed, 19 Apr 2017 22:05:48 +0200
Subject: rtc: m41t80: Add proper compatible for rv4162

The correct compatible for the rv4162 (microcrystal,rv4162) was not used
upstream and so was not added by eb235c561d04e.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-m41t80.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 5b070aa711f9..5ec4653022ff 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -128,6 +128,11 @@ static const struct of_device_id m41t80_of_match[] = {
 		.compatible = "st,m41t87",
 		.data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ)
 	},
+	{
+		.compatible = "microcrystal,rv4162",
+		.data = (void *)(M41T80_FEATURE_SQ | M41T80_FEATURE_WD | M41T80_FEATURE_SQ_ALT)
+	},
+	/* DT compatibility only, do not use compatibles below: */
 	{
 		.compatible = "st,rv4162",
 		.data = (void *)(M41T80_FEATURE_SQ | M41T80_FEATURE_WD | M41T80_FEATURE_SQ_ALT)
-- 
cgit v1.2.3


From e8348dc554f108f603101bc49ff897f0c9313c23 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 16 Feb 2017 23:11:37 -0800
Subject: drivers/mtd: Convert remaining uses of pr_warning to pr_warn

To enable eventual removal of pr_warning

This makes pr_warn use consistent for drivers/mtd

Prior to this patch, there were 7 uses of pr_warning and
31 uses of pr_warn in drivers/mtd

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 12 ++++++++----
 drivers/mtd/nand/cmx270_nand.c      |  4 ++--
 drivers/mtd/ofpart.c                |  4 ++--
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 9dca881bb378..56aa6b75213d 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -323,7 +323,8 @@ static void fixup_sst38vf640x_sectorsize(struct mtd_info *mtd)
 	 * it should report a size of 8KBytes (0x0020*256).
 	 */
 	cfi->cfiq->EraseRegionInfo[0] = 0x002003ff;
-	pr_warning("%s: Bad 38VF640x CFI data; adjusting sector size from 64 to 8KiB\n", mtd->name);
+	pr_warn("%s: Bad 38VF640x CFI data; adjusting sector size from 64 to 8KiB\n",
+		mtd->name);
 }
 
 static void fixup_s29gl064n_sectors(struct mtd_info *mtd)
@@ -333,7 +334,8 @@ static void fixup_s29gl064n_sectors(struct mtd_info *mtd)
 
 	if ((cfi->cfiq->EraseRegionInfo[0] & 0xffff) == 0x003f) {
 		cfi->cfiq->EraseRegionInfo[0] |= 0x0040;
-		pr_warning("%s: Bad S29GL064N CFI data; adjust from 64 to 128 sectors\n", mtd->name);
+		pr_warn("%s: Bad S29GL064N CFI data; adjust from 64 to 128 sectors\n",
+			mtd->name);
 	}
 }
 
@@ -344,7 +346,8 @@ static void fixup_s29gl032n_sectors(struct mtd_info *mtd)
 
 	if ((cfi->cfiq->EraseRegionInfo[1] & 0xffff) == 0x007e) {
 		cfi->cfiq->EraseRegionInfo[1] &= ~0x0040;
-		pr_warning("%s: Bad S29GL032N CFI data; adjust from 127 to 63 sectors\n", mtd->name);
+		pr_warn("%s: Bad S29GL032N CFI data; adjust from 127 to 63 sectors\n",
+			mtd->name);
 	}
 }
 
@@ -358,7 +361,8 @@ static void fixup_s29ns512p_sectors(struct mtd_info *mtd)
 	 * which is not permitted by CFI.
 	 */
 	cfi->cfiq->EraseRegionInfo[0] = 0x020001ff;
-	pr_warning("%s: Bad S29NS512P CFI data; adjust to 512 sectors\n", mtd->name);
+	pr_warn("%s: Bad S29NS512P CFI data; adjust to 512 sectors\n",
+		mtd->name);
 }
 
 /* Used to fix CFI-Tables of chips without Extended Query Tables */
diff --git a/drivers/mtd/nand/cmx270_nand.c b/drivers/mtd/nand/cmx270_nand.c
index 226ac0bcafc6..949b9400dcb7 100644
--- a/drivers/mtd/nand/cmx270_nand.c
+++ b/drivers/mtd/nand/cmx270_nand.c
@@ -145,7 +145,7 @@ static int __init cmx270_init(void)
 
 	ret = gpio_request(GPIO_NAND_CS, "NAND CS");
 	if (ret) {
-		pr_warning("CM-X270: failed to request NAND CS gpio\n");
+		pr_warn("CM-X270: failed to request NAND CS gpio\n");
 		return ret;
 	}
 
@@ -153,7 +153,7 @@ static int __init cmx270_init(void)
 
 	ret = gpio_request(GPIO_NAND_RB, "NAND R/B");
 	if (ret) {
-		pr_warning("CM-X270: failed to request NAND R/B gpio\n");
+		pr_warn("CM-X270: failed to request NAND R/B gpio\n");
 		goto err_gpio_request;
 	}
 
diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index 464470122493..2861c7079d7b 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -166,8 +166,8 @@ static int parse_ofoldpart_partitions(struct mtd_info *master,
 	if (!part)
 		return 0; /* No partitions found */
 
-	pr_warning("Device tree uses obsolete partition map binding: %s\n",
-			dp->full_name);
+	pr_warn("Device tree uses obsolete partition map binding: %s\n",
+		dp->full_name);
 
 	nr_parts = plen / sizeof(part[0]);
 
-- 
cgit v1.2.3


From e4a8aad8e070e54e486666cc8bb97d69dc81adfb Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 20 Dec 2016 21:54:33 +0800
Subject: mtd: mtdswap: use MTDSWAP_ECNT_MIN/MAX

Since macros MTDSWAP_ECNT_MIN() and MTDSWAP_ECNT_MAX() have been
defined in mtdswap.c, use them instead of open-coding.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdswap.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index c40e2c951758..f12879a3d4ff 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -1235,10 +1235,8 @@ static int mtdswap_show(struct seq_file *s, void *data)
 
 		if (root->rb_node) {
 			count[i] = d->trees[i].count;
-			min[i] = rb_entry(rb_first(root), struct swap_eb,
-					rb)->erase_count;
-			max[i] = rb_entry(rb_last(root), struct swap_eb,
-					rb)->erase_count;
+			min[i] = MTDSWAP_ECNT_MIN(root);
+			max[i] = MTDSWAP_ECNT_MAX(root);
 		} else
 			count[i] = 0;
 	}
-- 
cgit v1.2.3


From b3bb6d6a0fe1c893fbcaaac8bf97c49f6ec6684e Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Wed, 19 Apr 2017 22:43:52 +0200
Subject: MAINTAINERS: change email address from atmel.com to wedev4u.fr

Switch to my alternative address as primary address.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c265a5fe4848..cea31977d401 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8224,7 +8224,7 @@ M:	Brian Norris <computersforpeace@gmail.com>
 M:	Boris Brezillon <boris.brezillon@free-electrons.com>
 M:	Marek Vasut <marek.vasut@gmail.com>
 M:	Richard Weinberger <richard@nod.at>
-M:	Cyrille Pitchen <cyrille.pitchen@atmel.com>
+M:	Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
@@ -11876,7 +11876,7 @@ S:	Maintained
 F:	drivers/clk/spear/
 
 SPI NOR SUBSYSTEM
-M:	Cyrille Pitchen <cyrille.pitchen@atmel.com>
+M:	Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
 M:	Marek Vasut <marek.vasut@gmail.com>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
-- 
cgit v1.2.3


From da4b1caa49cbfac4e5be2c4b080b8d01b04358dd Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Thu, 30 Mar 2017 17:58:53 +0200
Subject: mtd: physmap_of: use OF helpers for reading strings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OF core code provides helpers for counting strings and reading them so
use them instead of doing this manually. This simplifies the code a bit.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/maps/physmap_of_core.c | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/drivers/mtd/maps/physmap_of_core.c b/drivers/mtd/maps/physmap_of_core.c
index 14e8909c9955..62fa6836f218 100644
--- a/drivers/mtd/maps/physmap_of_core.c
+++ b/drivers/mtd/maps/physmap_of_core.c
@@ -116,32 +116,22 @@ static const char * const part_probe_types_def[] = {
 
 static const char * const *of_get_probes(struct device_node *dp)
 {
-	const char *cp;
-	int cplen;
-	unsigned int l;
-	unsigned int count;
 	const char **res;
+	int count;
 
-	cp = of_get_property(dp, "linux,part-probe", &cplen);
-	if (cp == NULL)
+	count = of_property_count_strings(dp, "linux,part-probe");
+	if (count < 0)
 		return part_probe_types_def;
 
-	count = 0;
-	for (l = 0; l != cplen; l++)
-		if (cp[l] == 0)
-			count++;
-
-	res = kzalloc((count + 1)*sizeof(*res), GFP_KERNEL);
+	res = kzalloc((count + 1) * sizeof(*res), GFP_KERNEL);
 	if (!res)
 		return NULL;
-	count = 0;
-	while (cplen > 0) {
-		res[count] = cp;
-		l = strlen(cp) + 1;
-		cp += l;
-		cplen -= l;
-		count++;
-	}
+
+	count = of_property_read_string_array(dp, "linux,part-probe", res,
+					      count);
+	if (count < 0)
+		return NULL;
+
 	return res;
 }
 
-- 
cgit v1.2.3


From 296f827d8a3ec5f57252717844146499e2c81155 Mon Sep 17 00:00:00 2001
From: Rahul Bedarkar <rahulbedarkar89@gmail.com>
Date: Tue, 18 Apr 2017 14:04:36 +0530
Subject: MAINTAINERS: Update email-id of Rahul Bedarkar

I'm no longer with Imagination Technologies. I am still interested in
maintaining or reviewing DTS patches for Ci40 if any. Update email-id
to an active one.

Signed-off-by: Rahul Bedarkar <rahulbedarkar89@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15990/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index fdd5350fe261..36b1a6654041 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7802,7 +7802,7 @@ L:	linux-man@vger.kernel.org
 S:	Maintained
 
 MARDUK (CREATOR CI40) DEVICE TREE SUPPORT
-M:	Rahul Bedarkar <rahul.bedarkar@imgtec.com>
+M:	Rahul Bedarkar <rahulbedarkar89@gmail.com>
 L:	linux-mips@linux-mips.org
 S:	Maintained
 F:	arch/mips/boot/dts/img/pistachio_marduk.dts
-- 
cgit v1.2.3


From 684497bfe8b4485325554b96b160b5ddb6e9ebaf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:21 +0100
Subject: Annotate hardware config module parameters in drivers/char/ipmi/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/char/ipmi/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Corey Minyard <cminyard@mvista.com>
cc: openipmi-developer@lists.sourceforge.net
---
 drivers/char/ipmi/ipmi_si_intf.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 2a7c425ddfa7..e2f34eb59998 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1375,39 +1375,39 @@ MODULE_PARM_DESC(type, "Defines the type of each interface, each"
 		 " interface separated by commas.  The types are 'kcs',"
 		 " 'smic', and 'bt'.  For example si_type=kcs,bt will set"
 		 " the first interface to kcs and the second to bt");
-module_param_array(addrs, ulong, &num_addrs, 0);
+module_param_hw_array(addrs, ulong, iomem, &num_addrs, 0);
 MODULE_PARM_DESC(addrs, "Sets the memory address of each interface, the"
 		 " addresses separated by commas.  Only use if an interface"
 		 " is in memory.  Otherwise, set it to zero or leave"
 		 " it blank.");
-module_param_array(ports, uint, &num_ports, 0);
+module_param_hw_array(ports, uint, ioport, &num_ports, 0);
 MODULE_PARM_DESC(ports, "Sets the port address of each interface, the"
 		 " addresses separated by commas.  Only use if an interface"
 		 " is a port.  Otherwise, set it to zero or leave"
 		 " it blank.");
-module_param_array(irqs, int, &num_irqs, 0);
+module_param_hw_array(irqs, int, irq, &num_irqs, 0);
 MODULE_PARM_DESC(irqs, "Sets the interrupt of each interface, the"
 		 " addresses separated by commas.  Only use if an interface"
 		 " has an interrupt.  Otherwise, set it to zero or leave"
 		 " it blank.");
-module_param_array(regspacings, int, &num_regspacings, 0);
+module_param_hw_array(regspacings, int, other, &num_regspacings, 0);
 MODULE_PARM_DESC(regspacings, "The number of bytes between the start address"
 		 " and each successive register used by the interface.  For"
 		 " instance, if the start address is 0xca2 and the spacing"
 		 " is 2, then the second address is at 0xca4.  Defaults"
 		 " to 1.");
-module_param_array(regsizes, int, &num_regsizes, 0);
+module_param_hw_array(regsizes, int, other, &num_regsizes, 0);
 MODULE_PARM_DESC(regsizes, "The size of the specific IPMI register in bytes."
 		 " This should generally be 1, 2, 4, or 8 for an 8-bit,"
 		 " 16-bit, 32-bit, or 64-bit register.  Use this if you"
 		 " the 8-bit IPMI register has to be read from a larger"
 		 " register.");
-module_param_array(regshifts, int, &num_regshifts, 0);
+module_param_hw_array(regshifts, int, other, &num_regshifts, 0);
 MODULE_PARM_DESC(regshifts, "The amount to shift the data read from the."
 		 " IPMI register, in bits.  For instance, if the data"
 		 " is read from a 32-bit word and the IPMI data is in"
 		 " bit 8-15, then the shift would be 8");
-module_param_array(slave_addrs, int, &num_slave_addrs, 0);
+module_param_hw_array(slave_addrs, int, other, &num_slave_addrs, 0);
 MODULE_PARM_DESC(slave_addrs, "Set the default IPMB slave address for"
 		 " the controller.  Normally this is 0x20, but can be"
 		 " overridden by this parm.  This is an array indexed"
-- 
cgit v1.2.3


From 94b599bc07c3c4f365f546218918dcbc363111b2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:21 +0100
Subject: Annotate hardware config module parameters in drivers/char/mwave/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/char/mwave/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 drivers/char/mwave/mwavedd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index 3a3ff2eb6cba..b5e3103c1175 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -80,10 +80,10 @@ int mwave_3780i_io = 0;
 int mwave_uart_irq = 0;
 int mwave_uart_io = 0;
 module_param(mwave_debug, int, 0);
-module_param(mwave_3780i_irq, int, 0);
-module_param(mwave_3780i_io, int, 0);
-module_param(mwave_uart_irq, int, 0);
-module_param(mwave_uart_io, int, 0);
+module_param_hw(mwave_3780i_irq, int, irq, 0);
+module_param_hw(mwave_3780i_io, int, ioport, 0);
+module_param_hw(mwave_uart_irq, int, irq, 0);
+module_param_hw(mwave_uart_io, int, ioport, 0);
 
 static int mwave_open(struct inode *inode, struct file *file);
 static int mwave_close(struct inode *inode, struct file *file);
-- 
cgit v1.2.3


From 1c37ab5e51792a5419bdc84804aec6379cb43adb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:22 +0100
Subject: Annotate hardware config module parameters in drivers/char/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/char/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Arnd Bergmann <arnd@arndb.de>
---
 drivers/char/applicom.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index e770ad977472..b67263d6e34b 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -94,9 +94,9 @@ static struct applicom_board {
 static unsigned int irq = 0;	/* interrupt number IRQ       */
 static unsigned long mem = 0;	/* physical segment of board  */
 
-module_param(irq, uint, 0);
+module_param_hw(irq, uint, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ of the Applicom board");
-module_param(mem, ulong, 0);
+module_param_hw(mem, ulong, iomem, 0);
 MODULE_PARM_DESC(mem, "Shared Memory Address of Applicom board");
 
 static unsigned int numboards;	/* number of installed boards */
-- 
cgit v1.2.3


From cc9c617557cd0442294138188ac8611659768a10 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:22 +0100
Subject: Annotate hardware config module parameters in drivers/clocksource/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/clocksource/.

[Note: With regard to cs5535-clockevt.c, Thomas Gleixner asked whether the
 timer_irq parameter is required for the driver to work on anything other than
 arbitrary hardware which has it mapped to 0.  Jens Rottmann replied that the
 parameter defaults to 0, which means:

	1. autodetect (=keep IRQ BIOS has set up)
	2. if that fails use CONFIG_CS5535_MFGPT_DEFAULT_IRQ
	   (see drivers/misc/cs5535-mfgpt.c: cs5535_mfgpt_set_irq())

 Jens further noted that there may not be any systems that have CS5535/36
 devices that support EFI and secure boot.]

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Daniel Lezcano <daniel.lezcano@linaro.org>
cc: Thomas Gleixner <tglx@linutronix.de>
cc: Jens Rottmann <Jens.Rottmann@ADLINKtech.com>
cc: linux-kernel@vger.kernel.org
---
 drivers/clocksource/cs5535-clockevt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c
index 9a7e37cf56b0..a1df588343f2 100644
--- a/drivers/clocksource/cs5535-clockevt.c
+++ b/drivers/clocksource/cs5535-clockevt.c
@@ -22,7 +22,7 @@
 #define DRV_NAME "cs5535-clockevt"
 
 static int timer_irq;
-module_param_named(irq, timer_irq, int, 0644);
+module_param_hw_named(irq, timer_irq, int, irq, 0644);
 MODULE_PARM_DESC(irq, "Which IRQ to use for the clock source MFGPT ticks.");
 
 /*
-- 
cgit v1.2.3


From 40059ec6701bd10d7d972ed302cca61cf8b6f2cf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:22 +0100
Subject: Annotate hardware config module parameters in drivers/cpufreq/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/cpufreq/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
cc: linux-pm@vger.kernel.org
---
 drivers/cpufreq/speedstep-smi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c
index 770a9ae1999a..37b30071c220 100644
--- a/drivers/cpufreq/speedstep-smi.c
+++ b/drivers/cpufreq/speedstep-smi.c
@@ -378,7 +378,7 @@ static void __exit speedstep_exit(void)
 	cpufreq_unregister_driver(&speedstep_driver);
 }
 
-module_param(smi_port, int, 0444);
+module_param_hw(smi_port, int, ioport, 0444);
 module_param(smi_cmd,  int, 0444);
 module_param(smi_sig, uint, 0444);
 
-- 
cgit v1.2.3


From d759f906794b3b2894780870227c3c05895d83c1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:22 +0100
Subject: Annotate hardware config module parameters in drivers/gpio/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/gpio/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
cc: Alexandre Courbot <gnurou@gmail.com>
cc: linux-gpio@vger.kernel.org
---
 drivers/gpio/gpio-104-dio-48e.c | 4 ++--
 drivers/gpio/gpio-104-idi-48.c  | 4 ++--
 drivers/gpio/gpio-104-idio-16.c | 4 ++--
 drivers/gpio/gpio-gpio-mm.c     | 2 +-
 drivers/gpio/gpio-ws16c48.c     | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 17bd2ab4ebe2..dfa1a298e4f6 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -33,11 +33,11 @@
 
 static unsigned int base[MAX_NUM_DIO48E];
 static unsigned int num_dio48e;
-module_param_array(base, uint, &num_dio48e, 0);
+module_param_hw_array(base, uint, ioport, &num_dio48e, 0);
 MODULE_PARM_DESC(base, "ACCES 104-DIO-48E base addresses");
 
 static unsigned int irq[MAX_NUM_DIO48E];
-module_param_array(irq, uint, NULL, 0);
+module_param_hw_array(irq, uint, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "ACCES 104-DIO-48E interrupt line numbers");
 
 /**
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index 568375a7ebc2..c369b2083876 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -33,11 +33,11 @@
 
 static unsigned int base[MAX_NUM_IDI_48];
 static unsigned int num_idi_48;
-module_param_array(base, uint, &num_idi_48, 0);
+module_param_hw_array(base, uint, ioport, &num_idi_48, 0);
 MODULE_PARM_DESC(base, "ACCES 104-IDI-48 base addresses");
 
 static unsigned int irq[MAX_NUM_IDI_48];
-module_param_array(irq, uint, NULL, 0);
+module_param_hw_array(irq, uint, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "ACCES 104-IDI-48 interrupt line numbers");
 
 /**
diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c
index 7053cf736648..5949123986f2 100644
--- a/drivers/gpio/gpio-104-idio-16.c
+++ b/drivers/gpio/gpio-104-idio-16.c
@@ -33,11 +33,11 @@
 
 static unsigned int base[MAX_NUM_IDIO_16];
 static unsigned int num_idio_16;
-module_param_array(base, uint, &num_idio_16, 0);
+module_param_hw_array(base, uint, ioport, &num_idio_16, 0);
 MODULE_PARM_DESC(base, "ACCES 104-IDIO-16 base addresses");
 
 static unsigned int irq[MAX_NUM_IDIO_16];
-module_param_array(irq, uint, NULL, 0);
+module_param_hw_array(irq, uint, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "ACCES 104-IDIO-16 interrupt line numbers");
 
 /**
diff --git a/drivers/gpio/gpio-gpio-mm.c b/drivers/gpio/gpio-gpio-mm.c
index fa4baa2543db..11ade5b288f8 100644
--- a/drivers/gpio/gpio-gpio-mm.c
+++ b/drivers/gpio/gpio-gpio-mm.c
@@ -31,7 +31,7 @@
 
 static unsigned int base[MAX_NUM_GPIOMM];
 static unsigned int num_gpiomm;
-module_param_array(base, uint, &num_gpiomm, 0);
+module_param_hw_array(base, uint, ioport, &num_gpiomm, 0);
 MODULE_PARM_DESC(base, "Diamond Systems GPIO-MM base addresses");
 
 /**
diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c
index 901b5ccb032d..f8a4f91f36c7 100644
--- a/drivers/gpio/gpio-ws16c48.c
+++ b/drivers/gpio/gpio-ws16c48.c
@@ -30,11 +30,11 @@
 
 static unsigned int base[MAX_NUM_WS16C48];
 static unsigned int num_ws16c48;
-module_param_array(base, uint, &num_ws16c48, 0);
+module_param_hw_array(base, uint, ioport, &num_ws16c48, 0);
 MODULE_PARM_DESC(base, "WinSystems WS16C48 base addresses");
 
 static unsigned int irq[MAX_NUM_WS16C48];
-module_param_array(irq, uint, NULL, 0);
+module_param_hw_array(irq, uint, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "WinSystems WS16C48 interrupt line numbers");
 
 /**
-- 
cgit v1.2.3


From c78babcc7d25ffd44a579c796fb4e9a313c0b127 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:23 +0100
Subject: Annotate hardware config module parameters in drivers/i2c/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/i2c/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Jean Delvare <jdelvare@suse.de>
cc: linux-i2c@vger.kernel.org
---
 drivers/i2c/busses/i2c-ali15x3.c       | 2 +-
 drivers/i2c/busses/i2c-elektor.c       | 6 +++---
 drivers/i2c/busses/i2c-parport-light.c | 4 ++--
 drivers/i2c/busses/i2c-pca-isa.c       | 4 ++--
 drivers/i2c/busses/i2c-piix4.c         | 2 +-
 drivers/i2c/busses/i2c-sis5595.c       | 2 +-
 drivers/i2c/busses/i2c-viapro.c        | 2 +-
 drivers/i2c/busses/scx200_acb.c        | 2 +-
 8 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c
index 45c5c4883022..6e6bf46bcb52 100644
--- a/drivers/i2c/busses/i2c-ali15x3.c
+++ b/drivers/i2c/busses/i2c-ali15x3.c
@@ -119,7 +119,7 @@
 /* If force_addr is set to anything different from 0, we forcibly enable
    the device at the given address. */
 static u16 force_addr;
-module_param(force_addr, ushort, 0);
+module_param_hw(force_addr, ushort, ioport, 0);
 MODULE_PARM_DESC(force_addr,
 		 "Initialize the base address of the i2c controller");
 
diff --git a/drivers/i2c/busses/i2c-elektor.c b/drivers/i2c/busses/i2c-elektor.c
index 8af62fb3fe41..5416003e0605 100644
--- a/drivers/i2c/busses/i2c-elektor.c
+++ b/drivers/i2c/busses/i2c-elektor.c
@@ -323,9 +323,9 @@ MODULE_AUTHOR("Hans Berglund <hb@spacetec.no>");
 MODULE_DESCRIPTION("I2C-Bus adapter routines for PCF8584 ISA bus adapter");
 MODULE_LICENSE("GPL");
 
-module_param(base, int, 0);
-module_param(irq, int, 0);
+module_param_hw(base, int, ioport_or_iomem, 0);
+module_param_hw(irq, int, irq, 0);
 module_param(clock, int, 0);
 module_param(own, int, 0);
-module_param(mmapped, int, 0);
+module_param_hw(mmapped, int, other, 0);
 module_isa_driver(i2c_elektor_driver, 1);
diff --git a/drivers/i2c/busses/i2c-parport-light.c b/drivers/i2c/busses/i2c-parport-light.c
index 1bcdd10b68b9..faa8fb8f2b8f 100644
--- a/drivers/i2c/busses/i2c-parport-light.c
+++ b/drivers/i2c/busses/i2c-parport-light.c
@@ -38,11 +38,11 @@
 static struct platform_device *pdev;
 
 static u16 base;
-module_param(base, ushort, 0);
+module_param_hw(base, ushort, ioport, 0);
 MODULE_PARM_DESC(base, "Base I/O address");
 
 static int irq;
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ (optional)");
 
 /* ----- Low-level parallel port access ----------------------------------- */
diff --git a/drivers/i2c/busses/i2c-pca-isa.c b/drivers/i2c/busses/i2c-pca-isa.c
index ba88f17f636c..946ac646de2a 100644
--- a/drivers/i2c/busses/i2c-pca-isa.c
+++ b/drivers/i2c/busses/i2c-pca-isa.c
@@ -197,9 +197,9 @@ MODULE_AUTHOR("Ian Campbell <icampbell@arcom.com>");
 MODULE_DESCRIPTION("ISA base PCA9564/PCA9665 driver");
 MODULE_LICENSE("GPL");
 
-module_param(base, ulong, 0);
+module_param_hw(base, ulong, ioport, 0);
 MODULE_PARM_DESC(base, "I/O base address");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ");
 module_param(clock, int, 0);
 MODULE_PARM_DESC(clock, "Clock rate in hertz.\n\t\t"
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index c21ca7bf2efe..0ecdb47a23ab 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -106,7 +106,7 @@ MODULE_PARM_DESC(force, "Forcibly enable the PIIX4. DANGEROUS!");
 /* If force_addr is set to anything different from 0, we forcibly enable
    the PIIX4 at the given address. VERY DANGEROUS! */
 static int force_addr;
-module_param (force_addr, int, 0);
+module_param_hw(force_addr, int, ioport, 0);
 MODULE_PARM_DESC(force_addr,
 		 "Forcibly enable the PIIX4 at the given address. "
 		 "EXTREMELY DANGEROUS!");
diff --git a/drivers/i2c/busses/i2c-sis5595.c b/drivers/i2c/busses/i2c-sis5595.c
index 7d58a40faf2d..d543a9867ba4 100644
--- a/drivers/i2c/busses/i2c-sis5595.c
+++ b/drivers/i2c/busses/i2c-sis5595.c
@@ -119,7 +119,7 @@ static int blacklist[] = {
 /* If force_addr is set to anything different from 0, we forcibly enable
    the device at the given address. */
 static u16 force_addr;
-module_param(force_addr, ushort, 0);
+module_param_hw(force_addr, ushort, ioport, 0);
 MODULE_PARM_DESC(force_addr, "Initialize the base address of the i2c controller");
 
 static struct pci_driver sis5595_driver;
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 0ee2646f3b00..0dc45e12bb1d 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -94,7 +94,7 @@ MODULE_PARM_DESC(force, "Forcibly enable the SMBus. DANGEROUS!");
 /* If force_addr is set to anything different from 0, we forcibly enable
    the VT596 at the given address. VERY DANGEROUS! */
 static u16 force_addr;
-module_param(force_addr, ushort, 0);
+module_param_hw(force_addr, ushort, ioport, 0);
 MODULE_PARM_DESC(force_addr,
 		 "Forcibly enable the SMBus at the given address. "
 		 "EXTREMELY DANGEROUS!");
diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c
index 0a7e410b6195..e0923bee8d1f 100644
--- a/drivers/i2c/busses/scx200_acb.c
+++ b/drivers/i2c/busses/scx200_acb.c
@@ -42,7 +42,7 @@ MODULE_LICENSE("GPL");
 
 #define MAX_DEVICES 4
 static int base[MAX_DEVICES] = { 0x820, 0x840 };
-module_param_array(base, int, NULL, 0);
+module_param_hw_array(base, int, ioport, NULL, 0);
 MODULE_PARM_DESC(base, "Base addresses for the ACCESS.bus controllers");
 
 #define POLL_TIMEOUT	(HZ/5)
-- 
cgit v1.2.3


From 8863b3e785b8054b981760e26e63a25897467c42 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:23 +0100
Subject: Annotate hardware config module parameters in drivers/iio/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/iio/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Acked-by: Jonathan Cameron <jic23@kernel.org>
cc: linux-iio@vger.kernel.org
---
 drivers/iio/adc/stx104.c  | 2 +-
 drivers/iio/dac/cio-dac.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/adc/stx104.c b/drivers/iio/adc/stx104.c
index be2de48844bc..7dd396f88f6b 100644
--- a/drivers/iio/adc/stx104.c
+++ b/drivers/iio/adc/stx104.c
@@ -49,7 +49,7 @@
 
 static unsigned int base[max_num_isa_dev(STX104_EXTENT)];
 static unsigned int num_stx104;
-module_param_array(base, uint, &num_stx104, 0);
+module_param_hw_array(base, uint, ioport, &num_stx104, 0);
 MODULE_PARM_DESC(base, "Apex Embedded Systems STX104 base addresses");
 
 /**
diff --git a/drivers/iio/dac/cio-dac.c b/drivers/iio/dac/cio-dac.c
index 5a743e2a779d..dac086129edf 100644
--- a/drivers/iio/dac/cio-dac.c
+++ b/drivers/iio/dac/cio-dac.c
@@ -39,7 +39,7 @@
 
 static unsigned int base[max_num_isa_dev(CIO_DAC_EXTENT)];
 static unsigned int num_cio_dac;
-module_param_array(base, uint, &num_cio_dac, 0);
+module_param_hw_array(base, uint, ioport, &num_cio_dac, 0);
 MODULE_PARM_DESC(base, "Measurement Computing CIO-DAC base addresses");
 
 /**
-- 
cgit v1.2.3


From f6b12d04346cfcc926756ded982c91c25eff0333 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:23 +0100
Subject: Annotate hardware config module parameters in drivers/input/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/input/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
cc: linux-input@vger.kernel.org
---
 drivers/input/mouse/inport.c      | 2 +-
 drivers/input/mouse/logibm.c      | 2 +-
 drivers/input/touchscreen/mk712.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/input/mouse/inport.c b/drivers/input/mouse/inport.c
index 3827a22362de..9ce71dfa0de1 100644
--- a/drivers/input/mouse/inport.c
+++ b/drivers/input/mouse/inport.c
@@ -78,7 +78,7 @@ MODULE_LICENSE("GPL");
 #define INPORT_IRQ		5
 
 static int inport_irq = INPORT_IRQ;
-module_param_named(irq, inport_irq, uint, 0);
+module_param_hw_named(irq, inport_irq, uint, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ number (5=default)");
 
 static struct input_dev *inport_dev;
diff --git a/drivers/input/mouse/logibm.c b/drivers/input/mouse/logibm.c
index e2413113df22..6f165e053f4d 100644
--- a/drivers/input/mouse/logibm.c
+++ b/drivers/input/mouse/logibm.c
@@ -69,7 +69,7 @@ MODULE_LICENSE("GPL");
 #define LOGIBM_IRQ		5
 
 static int logibm_irq = LOGIBM_IRQ;
-module_param_named(irq, logibm_irq, uint, 0);
+module_param_hw_named(irq, logibm_irq, uint, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ number (5=default)");
 
 static struct input_dev *logibm_dev;
diff --git a/drivers/input/touchscreen/mk712.c b/drivers/input/touchscreen/mk712.c
index 36e57deacd03..bd5352824f77 100644
--- a/drivers/input/touchscreen/mk712.c
+++ b/drivers/input/touchscreen/mk712.c
@@ -50,11 +50,11 @@ MODULE_DESCRIPTION("ICS MicroClock MK712 TouchScreen driver");
 MODULE_LICENSE("GPL");
 
 static unsigned int mk712_io = 0x260;	/* Also 0x200, 0x208, 0x300 */
-module_param_named(io, mk712_io, uint, 0);
+module_param_hw_named(io, mk712_io, uint, ioport, 0);
 MODULE_PARM_DESC(io, "I/O base address of MK712 touchscreen controller");
 
 static unsigned int mk712_irq = 10;	/* Also 12, 14, 15 */
-module_param_named(irq, mk712_irq, uint, 0);
+module_param_hw_named(irq, mk712_irq, uint, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ of MK712 touchscreen controller");
 
 /* eight 8-bit registers */
-- 
cgit v1.2.3


From b9351f7e51dbbf1d850281142a55d848301d482d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:24 +0100
Subject: Annotate hardware config module parameters in drivers/isdn/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/isdn/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Karsten Keil <isdn@linux-pingi.de>
cc: netdev@vger.kernel.org
---
 drivers/isdn/hardware/avm/b1isa.c |  4 ++--
 drivers/isdn/hardware/avm/t1isa.c |  4 ++--
 drivers/isdn/hisax/config.c       | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/isdn/hardware/avm/b1isa.c b/drivers/isdn/hardware/avm/b1isa.c
index 31ef8130a87f..54e871a47387 100644
--- a/drivers/isdn/hardware/avm/b1isa.c
+++ b/drivers/isdn/hardware/avm/b1isa.c
@@ -169,8 +169,8 @@ static struct pci_dev isa_dev[MAX_CARDS];
 static int io[MAX_CARDS];
 static int irq[MAX_CARDS];
 
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
 
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index 72ef18853951..9516203c735f 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -516,8 +516,8 @@ static int io[MAX_CARDS];
 static int irq[MAX_CARDS];
 static int cardnr[MAX_CARDS];
 
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(cardnr, int, NULL, 0);
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c
index 2d12c6ceeb89..c7d68675b028 100644
--- a/drivers/isdn/hisax/config.c
+++ b/drivers/isdn/hisax/config.c
@@ -350,13 +350,13 @@ MODULE_AUTHOR("Karsten Keil");
 MODULE_LICENSE("GPL");
 module_param_array(type, int, NULL, 0);
 module_param_array(protocol, int, NULL, 0);
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
-module_param_array(mem, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
+module_param_hw_array(mem, int, iomem, NULL, 0);
 module_param(id, charp, 0);
 #ifdef IO0_IO1
-module_param_array(io0, int, NULL, 0);
-module_param_array(io1, int, NULL, 0);
+module_param_hw_array(io0, int, ioport, NULL, 0);
+module_param_hw_array(io1, int, ioport, NULL, 0);
 #endif
 #endif /* MODULE */
 
-- 
cgit v1.2.3


From 5a8fc6a3cebb0dde27584603c5c4b5c72c6d810f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:24 +0100
Subject: Annotate hardware config module parameters in drivers/media/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/media/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
cc: mjpeg-users@lists.sourceforge.net
cc: linux-media@vger.kernel.org
---
 drivers/media/pci/zoran/zoran_card.c |  2 +-
 drivers/media/rc/serial_ir.c         | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/media/pci/zoran/zoran_card.c b/drivers/media/pci/zoran/zoran_card.c
index 5266755add63..4680f001653a 100644
--- a/drivers/media/pci/zoran/zoran_card.c
+++ b/drivers/media/pci/zoran/zoran_card.c
@@ -69,7 +69,7 @@ MODULE_PARM_DESC(card, "Card type");
  */
 
 static unsigned long vidmem;	/* default = 0 - Video memory base address */
-module_param(vidmem, ulong, 0444);
+module_param_hw(vidmem, ulong, iomem, 0444);
 MODULE_PARM_DESC(vidmem, "Default video memory base address");
 
 /*
diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c
index 41b54e40176c..40d305842a9b 100644
--- a/drivers/media/rc/serial_ir.c
+++ b/drivers/media/rc/serial_ir.c
@@ -833,11 +833,11 @@ MODULE_LICENSE("GPL");
 module_param(type, int, 0444);
 MODULE_PARM_DESC(type, "Hardware type (0 = home-brew, 1 = IRdeo, 2 = IRdeo Remote, 3 = AnimaX, 4 = IgorPlug");
 
-module_param(io, int, 0444);
+module_param_hw(io, int, ioport, 0444);
 MODULE_PARM_DESC(io, "I/O address base (0x3f8 or 0x2f8)");
 
 /* some architectures (e.g. intel xscale) have memory mapped registers */
-module_param(iommap, bool, 0444);
+module_param_hw(iommap, bool, other, 0444);
 MODULE_PARM_DESC(iommap, "physical base for memory mapped I/O (0 = no memory mapped io)");
 
 /*
@@ -845,13 +845,13 @@ MODULE_PARM_DESC(iommap, "physical base for memory mapped I/O (0 = no memory map
  * on 32bit word boundaries.
  * See linux-kernel/drivers/tty/serial/8250/8250.c serial_in()/out()
  */
-module_param(ioshift, int, 0444);
+module_param_hw(ioshift, int, other, 0444);
 MODULE_PARM_DESC(ioshift, "shift I/O register offset (0 = no shift)");
 
-module_param(irq, int, 0444);
+module_param_hw(irq, int, irq, 0444);
 MODULE_PARM_DESC(irq, "Interrupt (4 or 3)");
 
-module_param(share_irq, bool, 0444);
+module_param_hw(share_irq, bool, other, 0444);
 MODULE_PARM_DESC(share_irq, "Share interrupts (0 = off, 1 = on)");
 
 module_param(sense, int, 0444);
-- 
cgit v1.2.3


From 4f1927dcbf79f6c7c153c8ec9beeb17364649f0c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:24 +0100
Subject: Annotate hardware config module parameters in drivers/misc/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/misc/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Arnd Bergmann <arnd@arndb.de>
---
 drivers/misc/dummy-irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/dummy-irq.c b/drivers/misc/dummy-irq.c
index acbbe0390be4..76a1015d5783 100644
--- a/drivers/misc/dummy-irq.c
+++ b/drivers/misc/dummy-irq.c
@@ -59,6 +59,6 @@ module_exit(dummy_irq_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jiri Kosina");
-module_param(irq, uint, 0444);
+module_param_hw(irq, uint, irq, 0444);
 MODULE_PARM_DESC(irq, "The IRQ to register for");
 MODULE_DESCRIPTION("Dummy IRQ handler driver");
-- 
cgit v1.2.3


From dac562fc5ff4956d779029d2ae379e34c791f011 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:25 +0100
Subject: Annotate hardware config module parameters in drivers/mmc/host/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/mmc/host/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Pierre Ossman <pierre@ossman.eu>
cc: Ulf Hansson <ulf.hansson@linaro.org>
cc: linux-mmc@vger.kernel.org
---
 drivers/mmc/host/wbsd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index bd04e8bae010..e15a9733fcfd 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -2001,11 +2001,11 @@ static void __exit wbsd_drv_exit(void)
 module_init(wbsd_drv_init);
 module_exit(wbsd_drv_exit);
 #ifdef CONFIG_PNP
-module_param_named(nopnp, param_nopnp, uint, 0444);
+module_param_hw_named(nopnp, param_nopnp, uint, other, 0444);
 #endif
-module_param_named(io, param_io, uint, 0444);
-module_param_named(irq, param_irq, uint, 0444);
-module_param_named(dma, param_dma, int, 0444);
+module_param_hw_named(io, param_io, uint, ioport, 0444);
+module_param_hw_named(irq, param_irq, uint, irq, 0444);
+module_param_hw_named(dma, param_dma, int, dma, 0444);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pierre Ossman <pierre@ossman.eu>");
-- 
cgit v1.2.3


From 6621f85d79775f71de9623fdfc9135fc494d6863 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:25 +0100
Subject: Annotate hardware config module parameters in drivers/net/appletalk/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/net/appletalk/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Arnaldo Carvalho de Melo <acme@kernel.org>
cc: netdev@vger.kernel.org
---
 drivers/net/appletalk/cops.c | 6 +++---
 drivers/net/appletalk/ltpc.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index 1b2e9217ec78..486e1e6997fc 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -986,9 +986,9 @@ static int cops_close(struct net_device *dev)
 static struct net_device *cops_dev;
 
 MODULE_LICENSE("GPL");
-module_param(io, int, 0);
-module_param(irq, int, 0);
-module_param(board_type, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(board_type, int, other, 0);
 
 static int __init cops_module_init(void)
 {
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 01e2ac55c137..ac755d2950a6 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -1231,9 +1231,9 @@ static struct net_device *dev_ltpc;
 
 MODULE_LICENSE("GPL");
 module_param(debug, int, 0);
-module_param(io, int, 0);
-module_param(irq, int, 0);
-module_param(dma, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(dma, int, dma, 0);
 
 
 static int __init ltpc_module_init(void)
-- 
cgit v1.2.3


From 06a5128a29b3d67fadff5d9593d79b815d8b34d8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:25 +0100
Subject: Annotate hardware config module parameters in drivers/net/arcnet/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/net/arcnet/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Michael Grzeschik <m.grzeschik@pengutronix.de>
cc: netdev@vger.kernel.org
---
 drivers/net/arcnet/com20020-isa.c | 4 ++--
 drivers/net/arcnet/com90io.c      | 4 ++--
 drivers/net/arcnet/com90xx.c      | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index b9e9931353b2..38fa60ddaf2e 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -129,8 +129,8 @@ static int clockp = 0;
 static int clockm = 0;
 
 module_param(node, int, 0);
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 module_param_string(device, device, sizeof(device), 0);
 module_param(timeout, int, 0);
 module_param(backplane, int, 0);
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index b57863df5bf5..4e56aaf2b984 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -347,8 +347,8 @@ static int io;			/* use the insmod io= irq= shmem= options */
 static int irq;
 static char device[9];		/* use eg. device=arc1 to change name */
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 module_param_string(device, device, sizeof(device), 0);
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c
index 81f90c4703ae..ca4a57c30bf8 100644
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -88,8 +88,8 @@ static int irq;
 static int shmem;
 static char device[9];		/* use eg. device=arc1 to change name */
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 module_param(shmem, int, 0);
 module_param_string(device, device, sizeof(device), 0);
 
-- 
cgit v1.2.3


From e43f2c52a07793cb97e4a963a5b90d4af311c3d5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:25 +0100
Subject: Annotate hardware config module parameters in drivers/net/can/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/net/can/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
cc: Wolfgang Grandegger <wg@grandegger.com>
cc: linux-can@vger.kernel.org
cc: netdev@vger.kernel.org
---
 drivers/net/can/cc770/cc770_isa.c     | 8 ++++----
 drivers/net/can/sja1000/sja1000_isa.c | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c
index e0d15711e9ac..3a30fd3b4498 100644
--- a/drivers/net/can/cc770/cc770_isa.c
+++ b/drivers/net/can/cc770/cc770_isa.c
@@ -82,16 +82,16 @@ static u8 cor[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static u8 bcr[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static int indirect[MAXDEV] = {[0 ... (MAXDEV - 1)] = -1};
 
-module_param_array(port, ulong, NULL, S_IRUGO);
+module_param_hw_array(port, ulong, ioport, NULL, S_IRUGO);
 MODULE_PARM_DESC(port, "I/O port number");
 
-module_param_array(mem, ulong, NULL, S_IRUGO);
+module_param_hw_array(mem, ulong, iomem, NULL, S_IRUGO);
 MODULE_PARM_DESC(mem, "I/O memory address");
 
-module_param_array(indirect, int, NULL, S_IRUGO);
+module_param_hw_array(indirect, int, ioport, NULL, S_IRUGO);
 MODULE_PARM_DESC(indirect, "Indirect access via address and data port");
 
-module_param_array(irq, int, NULL, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
 MODULE_PARM_DESC(irq, "IRQ number");
 
 module_param_array(clk, int, NULL, S_IRUGO);
diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c
index e97e6d35b300..a89c1e92554d 100644
--- a/drivers/net/can/sja1000/sja1000_isa.c
+++ b/drivers/net/can/sja1000/sja1000_isa.c
@@ -48,16 +48,16 @@ static unsigned char ocr[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static int indirect[MAXDEV] = {[0 ... (MAXDEV - 1)] = -1};
 static spinlock_t indirect_lock[MAXDEV];  /* lock for indirect access mode */
 
-module_param_array(port, ulong, NULL, S_IRUGO);
+module_param_hw_array(port, ulong, ioport, NULL, S_IRUGO);
 MODULE_PARM_DESC(port, "I/O port number");
 
-module_param_array(mem, ulong, NULL, S_IRUGO);
+module_param_hw_array(mem, ulong, iomem, NULL, S_IRUGO);
 MODULE_PARM_DESC(mem, "I/O memory address");
 
-module_param_array(indirect, int, NULL, S_IRUGO);
+module_param_hw_array(indirect, int, ioport, NULL, S_IRUGO);
 MODULE_PARM_DESC(indirect, "Indirect access via address and data port");
 
-module_param_array(irq, int, NULL, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
 MODULE_PARM_DESC(irq, "IRQ number");
 
 module_param_array(clk, int, NULL, S_IRUGO);
-- 
cgit v1.2.3


From df29840815a6aaa6a1ab8d85d7745a1a6fc25de0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:26 +0100
Subject: Annotate hardware config module parameters in drivers/net/ethernet/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/net/ethernet/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steffen Klassert <steffen.klassert@secunet.com>
cc: Jaroslav Kysela <perex@perex.cz>
cc: netdev@vger.kernel.org
cc: linux-parisc@vger.kernel.org
---
 drivers/net/ethernet/3com/3c509.c      | 2 +-
 drivers/net/ethernet/3com/3c59x.c      | 4 ++--
 drivers/net/ethernet/8390/ne.c         | 4 ++--
 drivers/net/ethernet/8390/smc-ultra.c  | 4 ++--
 drivers/net/ethernet/8390/wd.c         | 8 ++++----
 drivers/net/ethernet/amd/lance.c       | 6 +++---
 drivers/net/ethernet/amd/ni65.c        | 6 +++---
 drivers/net/ethernet/cirrus/cs89x0.c   | 6 +++---
 drivers/net/ethernet/dec/tulip/de4x5.c | 2 +-
 drivers/net/ethernet/hp/hp100.c        | 2 +-
 drivers/net/ethernet/realtek/atp.c     | 4 ++--
 drivers/net/ethernet/smsc/smc9194.c    | 4 ++--
 12 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index c7f9f2c77da7..db8592d412ab 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -1371,7 +1371,7 @@ el3_resume(struct device *pdev)
 #endif /* CONFIG_PM */
 
 module_param(debug,int, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param(max_interrupt_work, int, 0);
 MODULE_PARM_DESC(debug, "debug level (0-6)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 40196f41768a..e41245a54f8b 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -813,8 +813,8 @@ module_param(global_enable_wol, int, 0);
 module_param_array(enable_wol, int, NULL, 0);
 module_param(rx_copybreak, int, 0);
 module_param(max_interrupt_work, int, 0);
-module_param(compaq_ioaddr, int, 0);
-module_param(compaq_irq, int, 0);
+module_param_hw(compaq_ioaddr, int, ioport, 0);
+module_param_hw(compaq_irq, int, irq, 0);
 module_param(compaq_device_id, int, 0);
 module_param(watchdog, int, 0);
 module_param(global_use_mmio, int, 0);
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index c063b410a163..66f47987e2a2 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -74,8 +74,8 @@ static int bad[MAX_NE_CARDS];
 static u32 ne_msg_enable;
 
 #ifdef MODULE
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(bad, int, NULL, 0);
 module_param_named(msg_enable, ne_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
 MODULE_PARM_DESC(io, "I/O base address(es),required");
diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c
index 364b6514f65f..4e02f6a23575 100644
--- a/drivers/net/ethernet/8390/smc-ultra.c
+++ b/drivers/net/ethernet/8390/smc-ultra.c
@@ -561,8 +561,8 @@ static struct net_device *dev_ultra[MAX_ULTRA_CARDS];
 static int io[MAX_ULTRA_CARDS];
 static int irq[MAX_ULTRA_CARDS];
 
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_named(msg_enable, ultra_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c
index ad019cbc698f..6efa2722f850 100644
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -503,10 +503,10 @@ static int irq[MAX_WD_CARDS];
 static int mem[MAX_WD_CARDS];
 static int mem_end[MAX_WD_CARDS];	/* for non std. mem size */
 
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
-module_param_array(mem, int, NULL, 0);
-module_param_array(mem_end, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
+module_param_hw_array(mem, int, iomem, NULL, 0);
+module_param_hw_array(mem_end, int, iomem, NULL, 0);
 module_param_named(msg_enable, wd_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (ignored for PureData boards)");
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index 61a641f23149..12a6a93d221b 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -318,9 +318,9 @@ static int io[MAX_CARDS];
 static int dma[MAX_CARDS];
 static int irq[MAX_CARDS];
 
-module_param_array(io, int, NULL, 0);
-module_param_array(dma, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(dma, int, dma, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param(lance_debug, int, 0);
 MODULE_PARM_DESC(io, "LANCE/PCnet I/O base address(es),required");
 MODULE_PARM_DESC(dma, "LANCE/PCnet ISA DMA channel (ignored for some devices)");
diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c
index 5985bf220a8d..e248d1ab3e47 100644
--- a/drivers/net/ethernet/amd/ni65.c
+++ b/drivers/net/ethernet/amd/ni65.c
@@ -1227,9 +1227,9 @@ static void set_multicast_list(struct net_device *dev)
 #ifdef MODULE
 static struct net_device *dev_ni65;
 
-module_param(irq, int, 0);
-module_param(io, int, 0);
-module_param(dma, int, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(dma, int, dma, 0);
 MODULE_PARM_DESC(irq, "ni6510 IRQ number (ignored for some cards)");
 MODULE_PARM_DESC(io, "ni6510 I/O base address");
 MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)");
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index 3647b28e8de0..8f660d9761cc 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -1704,12 +1704,12 @@ static int use_dma;			/* These generate unused var warnings if ALLOW_DMA = 0 */
 static int dma;
 static int dmasize = 16;		/* or 64 */
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 module_param(debug, int, 0);
 module_param_string(media, media, sizeof(media), 0);
 module_param(duplex, int, 0);
-module_param(dma , int, 0);
+module_param_hw(dma , int, dma, 0);
 module_param(dmasize , int, 0);
 module_param(use_dma , int, 0);
 MODULE_PARM_DESC(io, "cs89x0 I/O base address");
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index df4a871df633..fd6bcf024729 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -1015,7 +1015,7 @@ static int     compact_infoblock(struct net_device *dev, u_char count, u_char *p
 
 static int io=0x0;/* EDIT THIS LINE FOR YOUR CONFIGURATION IF NEEDED        */
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 module_param(de4x5_debug, int, 0);
 module_param(dec_only, int, 0);
 module_param(args, charp, 0);
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index 1a31bee6e728..5673b071e39d 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -2966,7 +2966,7 @@ MODULE_DESCRIPTION("HP CASCADE Architecture Driver for 100VG-AnyLan Network Adap
 #define HP100_DEVICES 5
 /* Parameters set by insmod */
 static int hp100_port[HP100_DEVICES] = { 0, [1 ... (HP100_DEVICES-1)] = -1 };
-module_param_array(hp100_port, int, NULL, 0);
+module_param_hw_array(hp100_port, int, ioport, NULL, 0);
 
 /* List of devices */
 static struct net_device *hp100_devlist[HP100_DEVICES];
diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index 9bcd4aefc9c5..bed34684994f 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c
@@ -151,8 +151,8 @@ MODULE_LICENSE("GPL");
 
 module_param(max_interrupt_work, int, 0);
 module_param(debug, int, 0);
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(xcvr, int, NULL, 0);
 MODULE_PARM_DESC(max_interrupt_work, "ATP maximum events handled per interrupt");
 MODULE_PARM_DESC(debug, "ATP debug level (0-7)");
diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c
index c8d84679ede7..d3bb2ba51f40 100644
--- a/drivers/net/ethernet/smsc/smc9194.c
+++ b/drivers/net/ethernet/smsc/smc9194.c
@@ -1501,8 +1501,8 @@ static void smc_set_multicast_list(struct net_device *dev)
 static struct net_device *devSMC9194;
 MODULE_LICENSE("GPL");
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 module_param(ifport, int, 0);
 MODULE_PARM_DESC(io, "SMC 99194 I/O base address");
 MODULE_PARM_DESC(irq, "SMC 99194 IRQ number");
-- 
cgit v1.2.3


From b658e5d854d4e9015d83133a826ec734770deefb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:26 +0100
Subject: Annotate hardware config module parameters in drivers/net/hamradio/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/net/hamradio/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Thomas Sailer <t.sailer@alumni.ethz.ch>
cc: Joerg Reuter <jreuter@yaina.de>
cc: linux-hams@vger.kernel.org
cc: netdev@vger.kernel.org
---
 drivers/net/hamradio/baycom_epp.c     | 2 +-
 drivers/net/hamradio/baycom_par.c     | 2 +-
 drivers/net/hamradio/baycom_ser_fdx.c | 4 ++--
 drivers/net/hamradio/baycom_ser_hdx.c | 4 ++--
 drivers/net/hamradio/dmascc.c         | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 594fa1407e29..1503f10122f7 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -1176,7 +1176,7 @@ static int iobase[NR_PORTS] = { 0x378, };
 
 module_param_array(mode, charp, NULL, 0);
 MODULE_PARM_DESC(mode, "baycom operating mode");
-module_param_array(iobase, int, NULL, 0);
+module_param_hw_array(iobase, int, ioport, NULL, 0);
 MODULE_PARM_DESC(iobase, "baycom io base address");
 
 MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c
index 809dc25909d1..92b13b39f426 100644
--- a/drivers/net/hamradio/baycom_par.c
+++ b/drivers/net/hamradio/baycom_par.c
@@ -481,7 +481,7 @@ static int iobase[NR_PORTS] = { 0x378, };
 
 module_param_array(mode, charp, NULL, 0);
 MODULE_PARM_DESC(mode, "baycom operating mode; eg. par96 or picpar");
-module_param_array(iobase, int, NULL, 0);
+module_param_hw_array(iobase, int, ioport, NULL, 0);
 MODULE_PARM_DESC(iobase, "baycom io base address");
 
 MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index ebc06822fd4d..d9a646acca20 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -614,9 +614,9 @@ static int baud[NR_PORTS] = { [0 ... NR_PORTS-1] = 1200 };
 
 module_param_array(mode, charp, NULL, 0);
 MODULE_PARM_DESC(mode, "baycom operating mode; * for software DCD");
-module_param_array(iobase, int, NULL, 0);
+module_param_hw_array(iobase, int, ioport, NULL, 0);
 MODULE_PARM_DESC(iobase, "baycom io base address");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "baycom irq number");
 module_param_array(baud, int, NULL, 0);
 MODULE_PARM_DESC(baud, "baycom baud rate (300 to 4800)");
diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c
index 60fcf512c208..f1c8a9ff3891 100644
--- a/drivers/net/hamradio/baycom_ser_hdx.c
+++ b/drivers/net/hamradio/baycom_ser_hdx.c
@@ -642,9 +642,9 @@ static int irq[NR_PORTS] = { 4, };
 
 module_param_array(mode, charp, NULL, 0);
 MODULE_PARM_DESC(mode, "baycom operating mode; * for software DCD");
-module_param_array(iobase, int, NULL, 0);
+module_param_hw_array(iobase, int, ioport, NULL, 0);
 MODULE_PARM_DESC(iobase, "baycom io base address");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "baycom irq number");
 
 MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu");
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 2479072981a1..dec6b76bc0fb 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -274,7 +274,7 @@ static unsigned long rand;
 
 MODULE_AUTHOR("Klaus Kudielka");
 MODULE_DESCRIPTION("Driver for high-speed SCC boards");
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_LICENSE("GPL");
 
 static void __exit dmascc_exit(void)
-- 
cgit v1.2.3


From 4f06e652721da79e032d3ba38bc1589bd553a5fc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:26 +0100
Subject: Annotate hardware config module parameters in drivers/net/irda/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/net/irda/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Samuel Ortiz <samuel@sortiz.org>
cc: netdev@vger.kernel.org
---
 drivers/net/irda/ali-ircc.c    |  6 +++---
 drivers/net/irda/nsc-ircc.c    |  6 +++---
 drivers/net/irda/smsc-ircc2.c  | 10 +++++-----
 drivers/net/irda/w83977af_ir.c |  4 ++--
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index c285eafd3f1c..35f198d83701 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -2207,11 +2207,11 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:" ALI_IRCC_DRIVER_NAME);
 
 
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io, "Base I/O addresses");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "IRQ lines");
-module_param_array(dma, int, NULL, 0);
+module_param_hw_array(dma, int, dma, NULL, 0);
 MODULE_PARM_DESC(dma, "DMA channels");
 
 module_init(ali_ircc_init);
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index aaecc3baaf30..7beae147be11 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -2396,11 +2396,11 @@ MODULE_LICENSE("GPL");
 
 module_param(qos_mtt_bits, int, 0);
 MODULE_PARM_DESC(qos_mtt_bits, "Minimum Turn Time");
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io, "Base I/O addresses");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "IRQ lines");
-module_param_array(dma, int, NULL, 0);
+module_param_hw_array(dma, int, dma, NULL, 0);
 MODULE_PARM_DESC(dma, "DMA channels");
 module_param(dongle_id, int, 0);
 MODULE_PARM_DESC(dongle_id, "Type-id of used dongle");
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index dcf92ba80872..23ed89ae5ddc 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -82,24 +82,24 @@ MODULE_PARM_DESC(nopnp, "Do not use PNP to detect controller settings, defaults
 
 #define DMA_INVAL 255
 static int ircc_dma = DMA_INVAL;
-module_param(ircc_dma, int, 0);
+module_param_hw(ircc_dma, int, dma, 0);
 MODULE_PARM_DESC(ircc_dma, "DMA channel");
 
 #define IRQ_INVAL 255
 static int ircc_irq = IRQ_INVAL;
-module_param(ircc_irq, int, 0);
+module_param_hw(ircc_irq, int, irq, 0);
 MODULE_PARM_DESC(ircc_irq, "IRQ line");
 
 static int ircc_fir;
-module_param(ircc_fir, int, 0);
+module_param_hw(ircc_fir, int, ioport, 0);
 MODULE_PARM_DESC(ircc_fir, "FIR Base Address");
 
 static int ircc_sir;
-module_param(ircc_sir, int, 0);
+module_param_hw(ircc_sir, int, ioport, 0);
 MODULE_PARM_DESC(ircc_sir, "SIR Base Address");
 
 static int ircc_cfg;
-module_param(ircc_cfg, int, 0);
+module_param_hw(ircc_cfg, int, ioport, 0);
 MODULE_PARM_DESC(ircc_cfg, "Configuration register base address");
 
 static int ircc_transceiver;
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 8d5b903d1d9d..282b6c9ae05b 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -1263,9 +1263,9 @@ MODULE_LICENSE("GPL");
 
 module_param(qos_mtt_bits, int, 0);
 MODULE_PARM_DESC(qos_mtt_bits, "Mimimum Turn Time");
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io, "Base I/O addresses");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "IRQ lines");
 
 /*
-- 
cgit v1.2.3


From af28a03c1b1eae4d7c97f4bd0c6326522c8a3bc9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:27 +0100
Subject: Annotate hardware config module parameters in drivers/net/wan/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/net/wan/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: "Jan \"Yenya\" Kasprzak" <kas@fi.muni.cz>
cc: netdev@vger.kernel.org
---
 drivers/net/wan/cosa.c         | 6 +++---
 drivers/net/wan/hostess_sv11.c | 6 +++---
 drivers/net/wan/sbni.c         | 4 ++--
 drivers/net/wan/sealevel.c     | 8 ++++----
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 4ca71bca39ac..6ea16260ec76 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -232,11 +232,11 @@ static int irq[MAX_CARDS+1] = { -1, -1, -1, -1, -1, -1, 0, };
 static struct class *cosa_class;
 
 #ifdef MODULE
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io, "The I/O bases of the COSA or SRP cards");
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "The IRQ lines of the COSA or SRP cards");
-module_param_array(dma, int, NULL, 0);
+module_param_hw_array(dma, int, dma, NULL, 0);
 MODULE_PARM_DESC(dma, "The DMA channels of the COSA or SRP cards");
 
 MODULE_AUTHOR("Jan \"Yenya\" Kasprzak, <kas@fi.muni.cz>");
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index dd6bb3364ad2..4de0737fbf8a 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -324,11 +324,11 @@ static void sv11_shutdown(struct z8530_dev *dev)
 static int io = 0x200;
 static int irq = 9;
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "The I/O base of the Comtrol Hostess SV11 card");
-module_param(dma, int, 0);
+module_param_hw(dma, int, dma, 0);
 MODULE_PARM_DESC(dma, "Set this to 1 to use DMA1/DMA3 for TX/RX");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "The interrupt line setting for the Comtrol Hostess SV11 card");
 
 MODULE_AUTHOR("Alan Cox");
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index 3ca3419c54a0..bde8c0339831 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -1463,8 +1463,8 @@ set_multicast_list( struct net_device  *dev )
 
 
 #ifdef MODULE
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(baud, int, NULL, 0);
 module_param_array(rxl, int, NULL, 0);
 module_param_array(mac, int, NULL, 0);
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index fbb5aa2c4d8f..c56f2c252113 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -363,13 +363,13 @@ static int rxdma=3;
 static int irq=5;
 static bool slow=false;
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "The I/O base of the Sealevel card");
-module_param(txdma, int, 0);
+module_param_hw(txdma, int, dma, 0);
 MODULE_PARM_DESC(txdma, "Transmit DMA channel");
-module_param(rxdma, int, 0);
+module_param_hw(rxdma, int, dma, 0);
 MODULE_PARM_DESC(rxdma, "Receive DMA channel");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "The interrupt line setting for the SeaLevel card");
 module_param(slow, bool, 0);
 MODULE_PARM_DESC(slow, "Set this for an older Sealevel card such as the 4012");
-- 
cgit v1.2.3


From 767c13e610d5775be00423c1ce046c1eb9616a21 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:27 +0100
Subject: Annotate hardware config module parameters in drivers/net/wireless/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/net/wireless/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Kalle Valo <kvalo@codeaurora.org>
cc: linux-wireless@vger.kernel.org
cc: netdev@vger.kernel.org
---
 drivers/net/wireless/cisco/airo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 4b040451a9b8..1b7e125a28e2 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -246,8 +246,8 @@ MODULE_DESCRIPTION("Support for Cisco/Aironet 802.11 wireless ethernet cards.  "
 		   "Direct support for ISA/PCI/MPI cards and support for PCMCIA when used with airo_cs.");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_SUPPORTED_DEVICE("Aironet 4500, 4800 and Cisco 340/350");
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(rates, int, NULL, 0);
 module_param_array(ssids, charp, NULL, 0);
 module_param(auto_wep, int, 0);
-- 
cgit v1.2.3


From c8fc074dd388112890684b614be1e58335890b27 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:27 +0100
Subject: Annotate hardware config module parameters in drivers/parport/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/parport/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
---
 drivers/parport/parport_pc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 9d42dfe65d44..5548193a28a6 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -3150,13 +3150,13 @@ static char *irq[PARPORT_PC_MAX_PORTS];
 static char *dma[PARPORT_PC_MAX_PORTS];
 
 MODULE_PARM_DESC(io, "Base I/O address (SPP regs)");
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io_hi, "Base I/O address (ECR)");
-module_param_array(io_hi, int, NULL, 0);
+module_param_hw_array(io_hi, int, ioport, NULL, 0);
 MODULE_PARM_DESC(irq, "IRQ line");
-module_param_array(irq, charp, NULL, 0);
+module_param_hw_array(irq, charp, irq, NULL, 0);
 MODULE_PARM_DESC(dma, "DMA channel");
-module_param_array(dma, charp, NULL, 0);
+module_param_hw_array(dma, charp, dma, NULL, 0);
 #if defined(CONFIG_PARPORT_PC_SUPERIO) || \
        (defined(CONFIG_PARPORT_1284) && defined(CONFIG_PARPORT_PC_FIFO))
 MODULE_PARM_DESC(verbose_probing, "Log chit-chat during initialisation");
-- 
cgit v1.2.3


From 01b961b70dde53b3a5b5062670a19d566e67e78e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:27 +0100
Subject: Annotate hardware config module parameters in drivers/pci/hotplug/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/pci/hotplug/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
cc: Scott Murray <scott@spiteful.org>
cc: linux-pci@vger.kernel.org
---
 drivers/pci/hotplug/cpcihp_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/cpcihp_generic.c b/drivers/pci/hotplug/cpcihp_generic.c
index 88a44a707b96..bbf9cf8aeaad 100644
--- a/drivers/pci/hotplug/cpcihp_generic.c
+++ b/drivers/pci/hotplug/cpcihp_generic.c
@@ -220,7 +220,7 @@ module_param(first_slot, byte, 0);
 MODULE_PARM_DESC(first_slot, "Hotswap bus first slot number");
 module_param(last_slot, byte, 0);
 MODULE_PARM_DESC(last_slot, "Hotswap bus last slot number");
-module_param(port, ushort, 0);
+module_param_hw(port, ushort, ioport, 0);
 MODULE_PARM_DESC(port, "#ENUM signal I/O port");
 module_param(enum_bit, uint, 0);
 MODULE_PARM_DESC(enum_bit, "#ENUM signal bit (0-7)");
-- 
cgit v1.2.3


From 9149ba1fc2276181c7f80969d349ea25a7f9fe1f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:27 +0100
Subject: Annotate hardware config module parameters in drivers/pcmcia/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/pcmcia/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-pcmcia@lists.infradead.org
---
 drivers/pcmcia/i82365.c | 8 ++++----
 drivers/pcmcia/tcic.c   | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
index eb0d80a429e4..fb38cc01859f 100644
--- a/drivers/pcmcia/i82365.c
+++ b/drivers/pcmcia/i82365.c
@@ -108,12 +108,12 @@ static int async_clock = -1;
 static int cable_mode = -1;
 static int wakeup = 0;
 
-module_param(i365_base, ulong, 0444);
+module_param_hw(i365_base, ulong, ioport, 0444);
 module_param(ignore, int, 0444);
 module_param(extra_sockets, int, 0444);
-module_param(irq_mask, int, 0444);
-module_param_array(irq_list, int, &irq_list_count, 0444);
-module_param(cs_irq, int, 0444);
+module_param_hw(irq_mask, int, other, 0444);
+module_param_hw_array(irq_list, int, irq, &irq_list_count, 0444);
+module_param_hw(cs_irq, int, irq, 0444);
 module_param(async_clock, int, 0444);
 module_param(cable_mode, int, 0444);
 module_param(wakeup, int, 0444);
diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
index 1ee63e5f0550..a1ac72d51d70 100644
--- a/drivers/pcmcia/tcic.c
+++ b/drivers/pcmcia/tcic.c
@@ -85,12 +85,12 @@ static int poll_quick = HZ/20;
 /* CCLK external clock time, in nanoseconds.  70 ns = 14.31818 MHz */
 static int cycle_time = 70;
 
-module_param(tcic_base, ulong, 0444);
+module_param_hw(tcic_base, ulong, ioport, 0444);
 module_param(ignore, int, 0444);
 module_param(do_scan, int, 0444);
-module_param(irq_mask, int, 0444);
-module_param_array(irq_list, int, &irq_list_count, 0444);
-module_param(cs_irq, int, 0444);
+module_param_hw(irq_mask, int, other, 0444);
+module_param_hw_array(irq_list, int, irq, &irq_list_count, 0444);
+module_param_hw(cs_irq, int, irq, 0444);
 module_param(poll_interval, int, 0444);
 module_param(poll_quick, int, 0444);
 module_param(cycle_time, int, 0444);
-- 
cgit v1.2.3


From 88f06b76e462119cb694c3ff13d7d343c49d2569 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:27 +0100
Subject: Annotate hardware config module parameters in drivers/scsi/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/scsi/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: "Juergen E. Fischer" <fischer@norbit.de>
cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
cc: "Martin K. Petersen" <martin.petersen@oracle.com>
cc: Dario Ballabio <ballabio_dario@emc.com>
cc: Finn Thain <fthain@telegraphics.com.au>
cc: Michael Schmitz <schmitzmic@gmail.com>
cc: Achim Leubner <achim_leubner@adaptec.com>
cc: linux-scsi@vger.kernel.org
---
 drivers/scsi/aha152x.c   | 4 ++--
 drivers/scsi/aha1542.c   | 2 +-
 drivers/scsi/g_NCR5380.c | 8 ++++----
 drivers/scsi/gdth.c      | 2 +-
 drivers/scsi/qlogicfas.c | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index f44d0487236e..ce5dc73d85bb 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -331,11 +331,11 @@ MODULE_LICENSE("GPL");
 #if !defined(PCMCIA)
 #if defined(MODULE)
 static int io[] = {0, 0};
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io,"base io address of controller");
 
 static int irq[] = {0, 0};
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq,"interrupt for controller");
 
 static int scsiid[] = {7, 7};
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 7db448ec8beb..a23cc9ac5acd 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -31,7 +31,7 @@ module_param(isapnp, bool, 0);
 MODULE_PARM_DESC(isapnp, "enable PnP support (default=1)");
 
 static int io[MAXBOARDS] = { 0x330, 0x334, 0, 0 };
-module_param_array(io, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
 MODULE_PARM_DESC(io, "base IO address of controller (0x130,0x134,0x230,0x234,0x330,0x334, default=0x330,0x334)");
 
 /* time AHA spends on the AT-bus during data transfer */
diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 67c8dac321ad..c34fc91ba486 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -85,8 +85,8 @@ static int ncr_53c400;
 static int ncr_53c400a;
 static int dtc_3181e;
 static int hp_c2502;
-module_param(ncr_irq, int, 0);
-module_param(ncr_addr, int, 0);
+module_param_hw(ncr_irq, int, irq, 0);
+module_param_hw(ncr_addr, int, ioport, 0);
 module_param(ncr_5380, int, 0);
 module_param(ncr_53c400, int, 0);
 module_param(ncr_53c400a, int, 0);
@@ -94,11 +94,11 @@ module_param(dtc_3181e, int, 0);
 module_param(hp_c2502, int, 0);
 
 static int irq[] = { -1, -1, -1, -1, -1, -1, -1, -1 };
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(irq, "IRQ number(s) (0=none, 254=auto [default])");
 
 static int base[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
-module_param_array(base, int, NULL, 0);
+module_param_hw_array(base, int, ioport, NULL, 0);
 MODULE_PARM_DESC(base, "base address(es)");
 
 static int card[] = { -1, -1, -1, -1, -1, -1, -1, -1 };
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index d020a13646ae..facc7271f932 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -353,7 +353,7 @@ static int probe_eisa_isa = 0;
 static int force_dma32 = 0;
 
 /* parameters for modprobe/insmod */
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 module_param(disable, int, 0);
 module_param(reserve_mode, int, 0);
 module_param_array(reserve_list, int, NULL, 0);
diff --git a/drivers/scsi/qlogicfas.c b/drivers/scsi/qlogicfas.c
index 61cac87fb86f..840823b99e51 100644
--- a/drivers/scsi/qlogicfas.c
+++ b/drivers/scsi/qlogicfas.c
@@ -137,8 +137,8 @@ err:
 static struct qlogicfas408_priv *cards;
 static int iobase[MAX_QLOGICFAS];
 static int irq[MAX_QLOGICFAS] = { [0 ... MAX_QLOGICFAS-1] = -1 };
-module_param_array(iobase, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(iobase, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 MODULE_PARM_DESC(iobase, "I/O address");
 MODULE_PARM_DESC(irq, "IRQ");
 
-- 
cgit v1.2.3


From 32820d8abd4d9502208697f0d26a3f8761b5868f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:28 +0100
Subject: Annotate hardware config module parameters in drivers/staging/media/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/staging/media/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
cc: linux-media@vger.kernel.org
cc: devel@driverdev.osuosl.org
---
 drivers/staging/media/lirc/lirc_sir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/lirc/lirc_sir.c b/drivers/staging/media/lirc/lirc_sir.c
index c6c3de94adaa..dde46dd8cabb 100644
--- a/drivers/staging/media/lirc/lirc_sir.c
+++ b/drivers/staging/media/lirc/lirc_sir.c
@@ -826,10 +826,10 @@ MODULE_AUTHOR("Milan Pikula");
 #endif
 MODULE_LICENSE("GPL");
 
-module_param(io, int, S_IRUGO);
+module_param_hw(io, int, ioport, S_IRUGO);
 MODULE_PARM_DESC(io, "I/O address base (0x3f8 or 0x2f8)");
 
-module_param(irq, int, S_IRUGO);
+module_param_hw(irq, int, irq, S_IRUGO);
 MODULE_PARM_DESC(irq, "Interrupt (4 or 3)");
 
 module_param(threshold, int, S_IRUGO);
-- 
cgit v1.2.3


From dbf05cb05f61145069d01ca9c6a896159184af88 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:28 +0100
Subject: Annotate hardware config module parameters in
 drivers/staging/speakup/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/staging/speakup/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: speakup@linux-speakup.org
cc: devel@driverdev.osuosl.org
---
 drivers/staging/speakup/speakup_acntpc.c | 2 +-
 drivers/staging/speakup/speakup_dtlk.c   | 2 +-
 drivers/staging/speakup/speakup_keypc.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/speakup/speakup_acntpc.c b/drivers/staging/speakup/speakup_acntpc.c
index c7fab261d860..b6fbf9de1f85 100644
--- a/drivers/staging/speakup/speakup_acntpc.c
+++ b/drivers/staging/speakup/speakup_acntpc.c
@@ -307,7 +307,7 @@ static void accent_release(void)
 	speakup_info.port_tts = 0;
 }
 
-module_param_named(port, port_forced, int, 0444);
+module_param_hw_named(port, port_forced, int, ioport, 0444);
 module_param_named(start, synth_acntpc.startup, short, 0444);
 
 MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing).");
diff --git a/drivers/staging/speakup/speakup_dtlk.c b/drivers/staging/speakup/speakup_dtlk.c
index e2bf20806d8d..9c097fda07b0 100644
--- a/drivers/staging/speakup/speakup_dtlk.c
+++ b/drivers/staging/speakup/speakup_dtlk.c
@@ -378,7 +378,7 @@ static void dtlk_release(void)
 	speakup_info.port_tts = 0;
 }
 
-module_param_named(port, port_forced, int, 0444);
+module_param_hw_named(port, port_forced, int, ioport, 0444);
 module_param_named(start, synth_dtlk.startup, short, 0444);
 
 MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing).");
diff --git a/drivers/staging/speakup/speakup_keypc.c b/drivers/staging/speakup/speakup_keypc.c
index 10f4964782e2..e653b52175b8 100644
--- a/drivers/staging/speakup/speakup_keypc.c
+++ b/drivers/staging/speakup/speakup_keypc.c
@@ -309,7 +309,7 @@ static void keynote_release(void)
 	synth_port = 0;
 }
 
-module_param_named(port, port_forced, int, 0444);
+module_param_hw_named(port, port_forced, int, ioport, 0444);
 module_param_named(start, synth_keypc.startup, short, 0444);
 
 MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing).");
-- 
cgit v1.2.3


From ea38fd72fb5f065e3f655d388193db3476820482 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:28 +0100
Subject: Annotate hardware config module parameters in drivers/staging/vme/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/staging/vme/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Martyn Welch <martyn@welchs.me.uk>
cc: Manohar Vanga <manohar.vanga@gmail.com>
cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: devel@driverdev.osuosl.org
---
 drivers/staging/vme/devices/vme_pio2_core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/vme/devices/vme_pio2_core.c b/drivers/staging/vme/devices/vme_pio2_core.c
index 20a2d835fdaa..367535b4b77f 100644
--- a/drivers/staging/vme/devices/vme_pio2_core.c
+++ b/drivers/staging/vme/devices/vme_pio2_core.c
@@ -466,16 +466,16 @@ static void __exit pio2_exit(void)
 
 /* These are required for each board */
 MODULE_PARM_DESC(bus, "Enumeration of VMEbus to which the board is connected");
-module_param_array(bus, int, &bus_num, 0444);
+module_param_hw_array(bus, int, other, &bus_num, 0444);
 
 MODULE_PARM_DESC(base, "Base VME address for PIO2 Registers");
-module_param_array(base, long, &base_num, 0444);
+module_param_hw_array(base, long, other, &base_num, 0444);
 
 MODULE_PARM_DESC(vector, "VME IRQ Vector (Lower 4 bits masked)");
-module_param_array(vector, int, &vector_num, 0444);
+module_param_hw_array(vector, int, other, &vector_num, 0444);
 
 MODULE_PARM_DESC(level, "VME IRQ Level");
-module_param_array(level, int, &level_num, 0444);
+module_param_hw_array(level, int, other, &level_num, 0444);
 
 MODULE_PARM_DESC(variant, "Last 4 characters of PIO2 board variant");
 module_param_array(variant, charp, &variant_num, 0444);
-- 
cgit v1.2.3


From 3b60daf86b133f0b15e3eb9b767c6c1752af2bd6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:29 +0100
Subject: Annotate hardware config module parameters in drivers/tty/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/tty/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Jiri Slaby <jslaby@suse.com>
cc: linux-serial@vger.kernel.org
---
 drivers/tty/cyclades.c              |  4 ++--
 drivers/tty/moxa.c                  |  2 +-
 drivers/tty/mxser.c                 |  2 +-
 drivers/tty/rocket.c                | 10 +++++-----
 drivers/tty/serial/8250/8250_core.c |  4 ++--
 drivers/tty/synclink.c              |  6 +++---
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index 5e4fa9206861..104f09c58163 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -156,8 +156,8 @@ static unsigned int cy_isa_addresses[] = {
 static long maddr[NR_CARDS];
 static int irq[NR_CARDS];
 
-module_param_array(maddr, long, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_hw_array(maddr, long, iomem, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
 
 #endif				/* CONFIG_ISA */
 
diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c
index 4caf0c3b1f99..3b251f4e5df0 100644
--- a/drivers/tty/moxa.c
+++ b/drivers/tty/moxa.c
@@ -179,7 +179,7 @@ MODULE_FIRMWARE("c320tunx.cod");
 
 module_param_array(type, uint, NULL, 0);
 MODULE_PARM_DESC(type, "card type: C218=2, C320=4");
-module_param_array(baseaddr, ulong, NULL, 0);
+module_param_hw_array(baseaddr, ulong, ioport, NULL, 0);
 MODULE_PARM_DESC(baseaddr, "base address");
 module_param_array(numports, uint, NULL, 0);
 MODULE_PARM_DESC(numports, "numports (ignored for C218)");
diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c
index 7b8f383fb090..8bd6fb6d9391 100644
--- a/drivers/tty/mxser.c
+++ b/drivers/tty/mxser.c
@@ -183,7 +183,7 @@ static int ttymajor = MXSERMAJOR;
 
 MODULE_AUTHOR("Casper Yang");
 MODULE_DESCRIPTION("MOXA Smartio/Industio Family Multiport Board Device Driver");
-module_param_array(ioaddr, ulong, NULL, 0);
+module_param_hw_array(ioaddr, ulong, ioport, NULL, 0);
 MODULE_PARM_DESC(ioaddr, "ISA io addresses to look for a moxa board");
 module_param(ttymajor, int, 0);
 MODULE_LICENSE("GPL");
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index d66c1edd9892..b51a877da986 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -250,15 +250,15 @@ static int sReadAiopNumChan(WordIO_t io);
 
 MODULE_AUTHOR("Theodore Ts'o");
 MODULE_DESCRIPTION("Comtrol RocketPort driver");
-module_param(board1, ulong, 0);
+module_param_hw(board1, ulong, ioport, 0);
 MODULE_PARM_DESC(board1, "I/O port for (ISA) board #1");
-module_param(board2, ulong, 0);
+module_param_hw(board2, ulong, ioport, 0);
 MODULE_PARM_DESC(board2, "I/O port for (ISA) board #2");
-module_param(board3, ulong, 0);
+module_param_hw(board3, ulong, ioport, 0);
 MODULE_PARM_DESC(board3, "I/O port for (ISA) board #3");
-module_param(board4, ulong, 0);
+module_param_hw(board4, ulong, ioport, 0);
 MODULE_PARM_DESC(board4, "I/O port for (ISA) board #4");
-module_param(controller, ulong, 0);
+module_param_hw(controller, ulong, ioport, 0);
 MODULE_PARM_DESC(controller, "I/O port for (ISA) rocketport controller");
 module_param(support_low_speed, bool, 0);
 MODULE_PARM_DESC(support_low_speed, "1 means support 50 baud, 0 means support 460400 baud");
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 76e03a7de9cc..89fde17d9617 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -1191,7 +1191,7 @@ module_exit(serial8250_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Generic 8250/16x50 serial driver");
 
-module_param(share_irqs, uint, 0644);
+module_param_hw(share_irqs, uint, other, 0644);
 MODULE_PARM_DESC(share_irqs, "Share IRQs with other non-8250/16x50 devices (unsafe)");
 
 module_param(nr_uarts, uint, 0644);
@@ -1201,7 +1201,7 @@ module_param(skip_txen_test, uint, 0644);
 MODULE_PARM_DESC(skip_txen_test, "Skip checking for the TXEN bug at init time");
 
 #ifdef CONFIG_SERIAL_8250_RSA
-module_param_array(probe_rsa, ulong, &probe_rsa_count, 0444);
+module_param_hw_array(probe_rsa, ulong, ioport, &probe_rsa_count, 0444);
 MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA");
 #endif
 MODULE_ALIAS_CHARDEV_MAJOR(TTY_MAJOR);
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index 657eed82eeb3..a2c308f7d637 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -869,9 +869,9 @@ static int txholdbufs[MAX_TOTAL_DEVICES];
 	
 module_param(break_on_load, bool, 0);
 module_param(ttymajor, int, 0);
-module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
-module_param_array(dma, int, NULL, 0);
+module_param_hw_array(io, int, ioport, NULL, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
+module_param_hw_array(dma, int, dma, NULL, 0);
 module_param(debug_level, int, 0);
 module_param_array(maxframe, int, NULL, 0);
 module_param_array(txdmabufs, int, NULL, 0);
-- 
cgit v1.2.3


From c729203defa7e6672ff6a5e503066351ac3928cb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:29 +0100
Subject: Annotate hardware config module parameters in drivers/video/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/video/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
cc: linux-fbdev@vger.kernel.org
---
 drivers/video/fbdev/arcfb.c | 8 ++++----
 drivers/video/fbdev/n411.c  | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c
index 1928cb2b5386..7e87d0d61658 100644
--- a/drivers/video/fbdev/arcfb.c
+++ b/drivers/video/fbdev/arcfb.c
@@ -645,17 +645,17 @@ module_param(nosplash, uint, 0);
 MODULE_PARM_DESC(nosplash, "Disable doing the splash screen");
 module_param(arcfb_enable, uint, 0);
 MODULE_PARM_DESC(arcfb_enable, "Enable communication with Arc board");
-module_param(dio_addr, ulong, 0);
+module_param_hw(dio_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(dio_addr, "IO address for data, eg: 0x480");
-module_param(cio_addr, ulong, 0);
+module_param_hw(cio_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(cio_addr, "IO address for control, eg: 0x400");
-module_param(c2io_addr, ulong, 0);
+module_param_hw(c2io_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(c2io_addr, "IO address for secondary control, eg: 0x408");
 module_param(splashval, ulong, 0);
 MODULE_PARM_DESC(splashval, "Splash pattern: 0xFF is black, 0x00 is green");
 module_param(tuhold, ulong, 0);
 MODULE_PARM_DESC(tuhold, "Time to hold between strobing data to Arc board");
-module_param(irq, uint, 0);
+module_param_hw(irq, uint, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ for the Arc board");
 
 module_init(arcfb_init);
diff --git a/drivers/video/fbdev/n411.c b/drivers/video/fbdev/n411.c
index 053deacad7cc..a3677313396e 100644
--- a/drivers/video/fbdev/n411.c
+++ b/drivers/video/fbdev/n411.c
@@ -193,11 +193,11 @@ module_exit(n411_exit);
 
 module_param(nosplash, uint, 0);
 MODULE_PARM_DESC(nosplash, "Disable doing the splash screen");
-module_param(dio_addr, ulong, 0);
+module_param_hw(dio_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(dio_addr, "IO address for data, eg: 0x480");
-module_param(cio_addr, ulong, 0);
+module_param_hw(cio_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(cio_addr, "IO address for control, eg: 0x400");
-module_param(c2io_addr, ulong, 0);
+module_param_hw(c2io_addr, ulong, ioport, 0);
 MODULE_PARM_DESC(c2io_addr, "IO address for secondary control, eg: 0x408");
 module_param(splashval, ulong, 0);
 MODULE_PARM_DESC(splashval, "Splash pattern: 0x00 is black, 0x01 is white");
-- 
cgit v1.2.3


From 5d1c93ce21832825acc48595a6fec8cfdb3e1453 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:29 +0100
Subject: Annotate hardware config module parameters in drivers/watchdog/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in drivers/watchdog/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
cc: Wim Van Sebroeck <wim@iguana.be>
cc: Zwane Mwaikambo <zwanem@gmail.com>
cc: linux-watchdog@vger.kernel.org
---
 drivers/watchdog/cpu5wdt.c     | 2 +-
 drivers/watchdog/eurotechwdt.c | 4 ++--
 drivers/watchdog/pc87413_wdt.c | 2 +-
 drivers/watchdog/sc1200wdt.c   | 2 +-
 drivers/watchdog/wdt.c         | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/watchdog/cpu5wdt.c b/drivers/watchdog/cpu5wdt.c
index 6d03e8e30f8b..6c3f78e45c26 100644
--- a/drivers/watchdog/cpu5wdt.c
+++ b/drivers/watchdog/cpu5wdt.c
@@ -289,7 +289,7 @@ MODULE_DESCRIPTION("sma cpu5 watchdog driver");
 MODULE_SUPPORTED_DEVICE("sma cpu5 watchdog");
 MODULE_LICENSE("GPL");
 
-module_param(port, int, 0);
+module_param_hw(port, int, ioport, 0);
 MODULE_PARM_DESC(port, "base address of watchdog card, default is 0x91");
 
 module_param(verbose, int, 0);
diff --git a/drivers/watchdog/eurotechwdt.c b/drivers/watchdog/eurotechwdt.c
index 23ee53240c4c..38e96712264f 100644
--- a/drivers/watchdog/eurotechwdt.c
+++ b/drivers/watchdog/eurotechwdt.c
@@ -97,9 +97,9 @@ MODULE_PARM_DESC(nowayout,
 #define WDT_TIMER_CFG		0xf3
 
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "Eurotech WDT io port (default=0x3f0)");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "Eurotech WDT irq (default=10)");
 module_param(ev, charp, 0);
 MODULE_PARM_DESC(ev, "Eurotech WDT event type (default is `int')");
diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c
index 9f15dd9435d1..06a892e36a8d 100644
--- a/drivers/watchdog/pc87413_wdt.c
+++ b/drivers/watchdog/pc87413_wdt.c
@@ -579,7 +579,7 @@ MODULE_AUTHOR("Marcus Junker <junker@anduras.de>");
 MODULE_DESCRIPTION("PC87413 WDT driver");
 MODULE_LICENSE("GPL");
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, MODNAME " I/O port (default: "
 					__MODULE_STRING(IO_DEFAULT) ").");
 
diff --git a/drivers/watchdog/sc1200wdt.c b/drivers/watchdog/sc1200wdt.c
index 131193a7acdf..b34d3d5ba632 100644
--- a/drivers/watchdog/sc1200wdt.c
+++ b/drivers/watchdog/sc1200wdt.c
@@ -88,7 +88,7 @@ MODULE_PARM_DESC(isapnp,
 	"When set to 0 driver ISA PnP support will be disabled");
 #endif
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "io port");
 module_param(timeout, int, 0);
 MODULE_PARM_DESC(timeout, "range is 0-255 minutes, default is 1");
diff --git a/drivers/watchdog/wdt.c b/drivers/watchdog/wdt.c
index e0206b5b7d89..e481fbbc4ae7 100644
--- a/drivers/watchdog/wdt.c
+++ b/drivers/watchdog/wdt.c
@@ -78,9 +78,9 @@ static int irq = 11;
 
 static DEFINE_SPINLOCK(wdt_lock);
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "WDT io port (default=0x240)");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "WDT irq (default=11)");
 
 /* Support for the Fan Tachometer on the WDT501-P */
-- 
cgit v1.2.3


From b90fe0c4e0ceb52c78c17f3cfa1ff8e79275028d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:29 +0100
Subject: Annotate hardware config module parameters in fs/pstore/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in fs/pstore/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
cc: Anton Vorontsov <anton@enomsg.org>
cc: Colin Cross <ccross@android.com>
cc: Tony Luck <tony.luck@intel.com>
---
 fs/pstore/ram.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 11f918d34b1e..cce1d38417ca 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -58,7 +58,7 @@ module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400);
 MODULE_PARM_DESC(pmsg_size, "size of user space message log");
 
 static unsigned long long mem_address;
-module_param(mem_address, ullong, 0400);
+module_param_hw(mem_address, ullong, other, 0400);
 MODULE_PARM_DESC(mem_address,
 		"start of reserved RAM used to store oops/panic logs");
 
-- 
cgit v1.2.3


From b11ce420c5dfc966061bc20f576c85504bb69712 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:30 +0100
Subject: Annotate hardware config module parameters in sound/drivers/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in sound/drivers/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Takashi Iwai <tiwai@suse.de>
cc: Jaroslav Kysela <perex@perex.cz>
cc: alsa-devel@alsa-project.org
---
 sound/drivers/mpu401/mpu401.c | 4 ++--
 sound/drivers/mtpav.c         | 4 ++--
 sound/drivers/serial-u16550.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sound/drivers/mpu401/mpu401.c b/sound/drivers/mpu401/mpu401.c
index fed7e7e2177b..9b86e00d7d95 100644
--- a/sound/drivers/mpu401/mpu401.c
+++ b/sound/drivers/mpu401/mpu401.c
@@ -53,9 +53,9 @@ MODULE_PARM_DESC(enable, "Enable MPU-401 device.");
 module_param_array(pnp, bool, NULL, 0444);
 MODULE_PARM_DESC(pnp, "PnP detection for MPU-401 device.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for MPU-401 device.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for MPU-401 device.");
 module_param_array(uart_enter, bool, NULL, 0444);
 MODULE_PARM_DESC(uart_enter, "Issue UART_ENTER command at open.");
diff --git a/sound/drivers/mtpav.c b/sound/drivers/mtpav.c
index 00b31f92c504..0f6392001e30 100644
--- a/sound/drivers/mtpav.c
+++ b/sound/drivers/mtpav.c
@@ -86,9 +86,9 @@ module_param(index, int, 0444);
 MODULE_PARM_DESC(index, "Index value for MotuMTPAV MIDI.");
 module_param(id, charp, 0444);
 MODULE_PARM_DESC(id, "ID string for MotuMTPAV MIDI.");
-module_param(port, long, 0444);
+module_param_hw(port, long, ioport, 0444);
 MODULE_PARM_DESC(port, "Parallel port # for MotuMTPAV MIDI.");
-module_param(irq, int, 0444);
+module_param_hw(irq, int, irq, 0444);
 MODULE_PARM_DESC(irq, "Parallel IRQ # for MotuMTPAV MIDI.");
 module_param(hwports, int, 0444);
 MODULE_PARM_DESC(hwports, "Hardware ports # for MotuMTPAV MIDI.");
diff --git a/sound/drivers/serial-u16550.c b/sound/drivers/serial-u16550.c
index 60d51ac4ccfe..88e66ea0306d 100644
--- a/sound/drivers/serial-u16550.c
+++ b/sound/drivers/serial-u16550.c
@@ -84,9 +84,9 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for Serial MIDI.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable UART16550A chip.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for UART16550A chip.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for UART16550A chip.");
 module_param_array(speed, int, NULL, 0444);
 MODULE_PARM_DESC(speed, "Speed in bauds.");
-- 
cgit v1.2.3


From e992ef5705c1e154acb248869b39e0be4c003a8b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:30 +0100
Subject: Annotate hardware config module parameters in sound/isa/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in sound/isa/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jaroslav Kysela <perex@perex.cz>
cc: Takashi Iwai <tiwai@suse.com>
cc: alsa-devel@alsa-project.org
---
 sound/isa/ad1848/ad1848.c          |  6 +++---
 sound/isa/adlib.c                  |  2 +-
 sound/isa/cmi8328.c                | 12 ++++++------
 sound/isa/cmi8330.c                | 20 ++++++++++----------
 sound/isa/cs423x/cs4231.c          | 12 ++++++------
 sound/isa/cs423x/cs4236.c          | 18 +++++++++---------
 sound/isa/es1688/es1688.c          | 12 ++++++------
 sound/isa/es18xx.c                 | 12 ++++++------
 sound/isa/galaxy/galaxy.c          | 16 ++++++++--------
 sound/isa/gus/gusclassic.c         |  8 ++++----
 sound/isa/gus/gusextreme.c         | 16 ++++++++--------
 sound/isa/gus/gusmax.c             |  8 ++++----
 sound/isa/gus/interwave.c          | 10 +++++-----
 sound/isa/msnd/msnd_pinnacle.c     | 20 ++++++++++----------
 sound/isa/opl3sa2.c                | 16 ++++++++--------
 sound/isa/opti9xx/miro.c           | 14 +++++++-------
 sound/isa/opti9xx/opti92x-ad1848.c | 14 +++++++-------
 sound/isa/sb/jazz16.c              | 12 ++++++------
 sound/isa/sb/sb16.c                | 14 +++++++-------
 sound/isa/sb/sb8.c                 |  6 +++---
 sound/isa/sc6000.c                 | 12 ++++++------
 sound/isa/sscape.c                 | 12 ++++++------
 sound/isa/wavefront/wavefront.c    | 18 +++++++++---------
 23 files changed, 145 insertions(+), 145 deletions(-)

diff --git a/sound/isa/ad1848/ad1848.c b/sound/isa/ad1848/ad1848.c
index a302d1f8d14f..e739b1c85c25 100644
--- a/sound/isa/ad1848/ad1848.c
+++ b/sound/isa/ad1848/ad1848.c
@@ -55,11 +55,11 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for " CRD_NAME " driver.");
 module_param_array(thinkpad, bool, NULL, 0444);
 MODULE_PARM_DESC(thinkpad, "Enable only for the onboard CS4248 of IBM Thinkpad 360/750/755 series.");
diff --git a/sound/isa/adlib.c b/sound/isa/adlib.c
index 8d3060fd7ad7..5fb619eca5c8 100644
--- a/sound/isa/adlib.c
+++ b/sound/isa/adlib.c
@@ -27,7 +27,7 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
 
 static int snd_adlib_match(struct device *dev, unsigned int n)
diff --git a/sound/isa/cmi8328.c b/sound/isa/cmi8328.c
index 787475084f46..8e1756c3b9bb 100644
--- a/sound/isa/cmi8328.c
+++ b/sound/isa/cmi8328.c
@@ -51,18 +51,18 @@ MODULE_PARM_DESC(index, "Index value for CMI8328 soundcard.");
 module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for CMI8328 soundcard.");
 
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for CMI8328 driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for CMI8328 driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 for CMI8328 driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 for CMI8328 driver.");
 
-module_param_array(mpuport, long, NULL, 0444);
+module_param_hw_array(mpuport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpuport, "MPU-401 port # for CMI8328 driver.");
-module_param_array(mpuirq, int, NULL, 0444);
+module_param_hw_array(mpuirq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpuirq, "IRQ # for CMI8328 MPU-401 port.");
 #ifdef SUPPORT_JOYSTICK
 module_param_array(gameport, bool, NULL, 0444);
diff --git a/sound/isa/cmi8330.c b/sound/isa/cmi8330.c
index dfedfd85f205..f64b29ab5cc7 100644
--- a/sound/isa/cmi8330.c
+++ b/sound/isa/cmi8330.c
@@ -95,27 +95,27 @@ module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard.");
 #endif
 
-module_param_array(sbport, long, NULL, 0444);
+module_param_hw_array(sbport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(sbport, "Port # for CMI8330/CMI8329 SB driver.");
-module_param_array(sbirq, int, NULL, 0444);
+module_param_hw_array(sbirq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(sbirq, "IRQ # for CMI8330/CMI8329 SB driver.");
-module_param_array(sbdma8, int, NULL, 0444);
+module_param_hw_array(sbdma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(sbdma8, "DMA8 for CMI8330/CMI8329 SB driver.");
-module_param_array(sbdma16, int, NULL, 0444);
+module_param_hw_array(sbdma16, int, dma, NULL, 0444);
 MODULE_PARM_DESC(sbdma16, "DMA16 for CMI8330/CMI8329 SB driver.");
 
-module_param_array(wssport, long, NULL, 0444);
+module_param_hw_array(wssport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(wssport, "Port # for CMI8330/CMI8329 WSS driver.");
-module_param_array(wssirq, int, NULL, 0444);
+module_param_hw_array(wssirq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(wssirq, "IRQ # for CMI8330/CMI8329 WSS driver.");
-module_param_array(wssdma, int, NULL, 0444);
+module_param_hw_array(wssdma, int, dma, NULL, 0444);
 MODULE_PARM_DESC(wssdma, "DMA for CMI8330/CMI8329 WSS driver.");
 
-module_param_array(fmport, long, NULL, 0444);
+module_param_hw_array(fmport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fmport, "FM port # for CMI8330/CMI8329 driver.");
-module_param_array(mpuport, long, NULL, 0444);
+module_param_hw_array(mpuport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpuport, "MPU-401 port # for CMI8330/CMI8329 driver.");
-module_param_array(mpuirq, int, NULL, 0444);
+module_param_hw_array(mpuirq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpuirq, "IRQ # for CMI8330/CMI8329 MPU-401 port.");
 #ifdef CONFIG_PNP
 static int isa_registered;
diff --git a/sound/isa/cs423x/cs4231.c b/sound/isa/cs423x/cs4231.c
index ef7448e9f813..e8edd9017a2f 100644
--- a/sound/isa/cs423x/cs4231.c
+++ b/sound/isa/cs423x/cs4231.c
@@ -55,17 +55,17 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for " CRD_NAME " driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for " CRD_NAME " driver.");
 
 static int snd_cs4231_match(struct device *dev, unsigned int n)
diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c
index 9d7582c90a95..1f9a3b2be7a1 100644
--- a/sound/isa/cs423x/cs4236.c
+++ b/sound/isa/cs423x/cs4236.c
@@ -98,23 +98,23 @@ MODULE_PARM_DESC(enable, "Enable " IDENT " soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "ISA PnP detection for specified soundcard.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " IDENT " driver.");
-module_param_array(cport, long, NULL, 0444);
+module_param_hw_array(cport, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(cport, "Control port # for " IDENT " driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " IDENT " driver.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for " IDENT " driver.");
-module_param_array(sb_port, long, NULL, 0444);
+module_param_hw_array(sb_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(sb_port, "SB port # for " IDENT " driver (optional).");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " IDENT " driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " IDENT " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for " IDENT " driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for " IDENT " driver.");
 
 #ifdef CONFIG_PNP
diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c
index 1901c2bb6c3b..36320e7f2789 100644
--- a/sound/isa/es1688/es1688.c
+++ b/sound/isa/es1688/es1688.c
@@ -71,17 +71,17 @@ module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard.");
 #endif
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for ES1688 driver.");
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver.");
-module_param_array(dma8, int, NULL, 0444);
+module_param_hw_array(dma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma8, "8-bit DMA # for " CRD_NAME " driver.");
 
 #ifdef CONFIG_PNP
diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c
index 5094b62d8f77..0cabe2b8974f 100644
--- a/sound/isa/es18xx.c
+++ b/sound/isa/es18xx.c
@@ -1999,17 +1999,17 @@ MODULE_PARM_DESC(enable, "Enable ES18xx soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for ES18xx driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for ES18xx driver.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for ES18xx driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for ES18xx driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA 1 # for ES18xx driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA 2 # for ES18xx driver.");
 
 #ifdef CONFIG_PNP
diff --git a/sound/isa/galaxy/galaxy.c b/sound/isa/galaxy/galaxy.c
index 379abe2cbeb2..b9994cc9f5fb 100644
--- a/sound/isa/galaxy/galaxy.c
+++ b/sound/isa/galaxy/galaxy.c
@@ -53,21 +53,21 @@ static int mpu_irq[SNDRV_CARDS] = SNDRV_DEFAULT_IRQ;
 static int dma1[SNDRV_CARDS] = SNDRV_DEFAULT_DMA;
 static int dma2[SNDRV_CARDS] = SNDRV_DEFAULT_DMA;
 
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(wss_port, long, NULL, 0444);
+module_param_hw_array(wss_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(wss_port, "WSS port # for " CRD_NAME " driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "Playback DMA # for " CRD_NAME " driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "Capture DMA # for " CRD_NAME " driver.");
 
 /*
diff --git a/sound/isa/gus/gusclassic.c b/sound/isa/gus/gusclassic.c
index c169be49ed71..92a997ab1229 100644
--- a/sound/isa/gus/gusclassic.c
+++ b/sound/isa/gus/gusclassic.c
@@ -58,13 +58,13 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for " CRD_NAME " driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for " CRD_NAME " driver.");
 module_param_array(joystick_dac, int, NULL, 0444);
 MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for " CRD_NAME " driver.");
diff --git a/sound/isa/gus/gusextreme.c b/sound/isa/gus/gusextreme.c
index 77ac2fd723b4..beb52c0f70ea 100644
--- a/sound/isa/gus/gusextreme.c
+++ b/sound/isa/gus/gusextreme.c
@@ -66,21 +66,21 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver.");
-module_param_array(gf1_port, long, NULL, 0444);
+module_param_hw_array(gf1_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(gf1_port, "GF1 port # for " CRD_NAME " driver (optional).");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver.");
-module_param_array(gf1_irq, int, NULL, 0444);
+module_param_hw_array(gf1_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(gf1_irq, "GF1 IRQ # for " CRD_NAME " driver.");
-module_param_array(dma8, int, NULL, 0444);
+module_param_hw_array(dma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma8, "8-bit DMA # for " CRD_NAME " driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "GF1 DMA # for " CRD_NAME " driver.");
 module_param_array(joystick_dac, int, NULL, 0444);
 MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for " CRD_NAME " driver.");
diff --git a/sound/isa/gus/gusmax.c b/sound/isa/gus/gusmax.c
index dd88c9d33492..63309a453140 100644
--- a/sound/isa/gus/gusmax.c
+++ b/sound/isa/gus/gusmax.c
@@ -56,13 +56,13 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for GUS MAX soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable GUS MAX soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for GUS MAX driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for GUS MAX driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for GUS MAX driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for GUS MAX driver.");
 module_param_array(joystick_dac, int, NULL, 0444);
 MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for GUS MAX driver.");
diff --git a/sound/isa/gus/interwave.c b/sound/isa/gus/interwave.c
index 70d0040484c8..0687b7ef3e53 100644
--- a/sound/isa/gus/interwave.c
+++ b/sound/isa/gus/interwave.c
@@ -92,17 +92,17 @@ MODULE_PARM_DESC(enable, "Enable InterWave soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "ISA PnP detection for specified soundcard.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for InterWave driver.");
 #ifdef SNDRV_STB
-module_param_array(port_tc, long, NULL, 0444);
+module_param_hw_array(port_tc, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port_tc, "Tone control (TEA6330T - i2c bus) port # for InterWave driver.");
 #endif
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for InterWave driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for InterWave driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for InterWave driver.");
 module_param_array(joystick_dac, int, NULL, 0444);
 MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for InterWave driver.");
diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c
index 4c072666115d..ad4897337df5 100644
--- a/sound/isa/msnd/msnd_pinnacle.c
+++ b/sound/isa/msnd/msnd_pinnacle.c
@@ -800,22 +800,22 @@ MODULE_LICENSE("GPL");
 MODULE_FIRMWARE(INITCODEFILE);
 MODULE_FIRMWARE(PERMCODEFILE);
 
-module_param_array(io, long, NULL, S_IRUGO);
+module_param_hw_array(io, long, ioport, NULL, S_IRUGO);
 MODULE_PARM_DESC(io, "IO port #");
-module_param_array(irq, int, NULL, S_IRUGO);
-module_param_array(mem, long, NULL, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
+module_param_hw_array(mem, long, iomem, NULL, S_IRUGO);
 module_param_array(write_ndelay, int, NULL, S_IRUGO);
 module_param(calibrate_signal, int, S_IRUGO);
 #ifndef MSND_CLASSIC
 module_param_array(digital, int, NULL, S_IRUGO);
-module_param_array(cfg, long, NULL, S_IRUGO);
+module_param_hw_array(cfg, long, ioport, NULL, S_IRUGO);
 module_param_array(reset, int, 0, S_IRUGO);
-module_param_array(mpu_io, long, NULL, S_IRUGO);
-module_param_array(mpu_irq, int, NULL, S_IRUGO);
-module_param_array(ide_io0, long, NULL, S_IRUGO);
-module_param_array(ide_io1, long, NULL, S_IRUGO);
-module_param_array(ide_irq, int, NULL, S_IRUGO);
-module_param_array(joystick_io, long, NULL, S_IRUGO);
+module_param_hw_array(mpu_io, long, ioport, NULL, S_IRUGO);
+module_param_hw_array(mpu_irq, int, irq, NULL, S_IRUGO);
+module_param_hw_array(ide_io0, long, ioport, NULL, S_IRUGO);
+module_param_hw_array(ide_io1, long, ioport, NULL, S_IRUGO);
+module_param_hw_array(ide_irq, int, irq, NULL, S_IRUGO);
+module_param_hw_array(joystick_io, long, ioport, NULL, S_IRUGO);
 #endif
 
 
diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c
index ae133633a420..4098e3e0353d 100644
--- a/sound/isa/opl3sa2.c
+++ b/sound/isa/opl3sa2.c
@@ -69,21 +69,21 @@ MODULE_PARM_DESC(enable, "Enable OPL3-SA soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for OPL3-SA driver.");
-module_param_array(sb_port, long, NULL, 0444);
+module_param_hw_array(sb_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(sb_port, "SB port # for OPL3-SA driver.");
-module_param_array(wss_port, long, NULL, 0444);
+module_param_hw_array(wss_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(wss_port, "WSS port # for OPL3-SA driver.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for OPL3-SA driver.");
-module_param_array(midi_port, long, NULL, 0444);
+module_param_hw_array(midi_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(midi_port, "MIDI port # for OPL3-SA driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for OPL3-SA driver.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for OPL3-SA driver.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for OPL3-SA driver.");
 module_param_array(opl3sa3_ymode, int, NULL, 0444);
 MODULE_PARM_DESC(opl3sa3_ymode, "Speaker size selection for 3D Enhancement mode: Desktop/Large Notebook/Small Notebook/HiFi.");
diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c
index 3a9067db1a84..bcbff56f060d 100644
--- a/sound/isa/opti9xx/miro.c
+++ b/sound/isa/opti9xx/miro.c
@@ -69,19 +69,19 @@ module_param(index, int, 0444);
 MODULE_PARM_DESC(index, "Index value for miro soundcard.");
 module_param(id, charp, 0444);
 MODULE_PARM_DESC(id, "ID string for miro soundcard.");
-module_param(port, long, 0444);
+module_param_hw(port, long, ioport, 0444);
 MODULE_PARM_DESC(port, "WSS port # for miro driver.");
-module_param(mpu_port, long, 0444);
+module_param_hw(mpu_port, long, ioport, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for miro driver.");
-module_param(fm_port, long, 0444);
+module_param_hw(fm_port, long, ioport, 0444);
 MODULE_PARM_DESC(fm_port, "FM Port # for miro driver.");
-module_param(irq, int, 0444);
+module_param_hw(irq, int, irq, 0444);
 MODULE_PARM_DESC(irq, "WSS irq # for miro driver.");
-module_param(mpu_irq, int, 0444);
+module_param_hw(mpu_irq, int, irq, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 irq # for miro driver.");
-module_param(dma1, int, 0444);
+module_param_hw(dma1, int, dma, 0444);
 MODULE_PARM_DESC(dma1, "1st dma # for miro driver.");
-module_param(dma2, int, 0444);
+module_param_hw(dma2, int, dma, 0444);
 MODULE_PARM_DESC(dma2, "2nd dma # for miro driver.");
 module_param(wss, int, 0444);
 MODULE_PARM_DESC(wss, "wss mode");
diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c
index 0a5266003786..ceddb392b1e3 100644
--- a/sound/isa/opti9xx/opti92x-ad1848.c
+++ b/sound/isa/opti9xx/opti92x-ad1848.c
@@ -88,20 +88,20 @@ MODULE_PARM_DESC(id, "ID string for opti9xx based soundcard.");
 module_param(isapnp, bool, 0444);
 MODULE_PARM_DESC(isapnp, "Enable ISA PnP detection for specified soundcard.");
 #endif
-module_param(port, long, 0444);
+module_param_hw(port, long, ioport, 0444);
 MODULE_PARM_DESC(port, "WSS port # for opti9xx driver.");
-module_param(mpu_port, long, 0444);
+module_param_hw(mpu_port, long, ioport, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for opti9xx driver.");
-module_param(fm_port, long, 0444);
+module_param_hw(fm_port, long, ioport, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for opti9xx driver.");
-module_param(irq, int, 0444);
+module_param_hw(irq, int, irq, 0444);
 MODULE_PARM_DESC(irq, "WSS irq # for opti9xx driver.");
-module_param(mpu_irq, int, 0444);
+module_param_hw(mpu_irq, int, irq, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 irq # for opti9xx driver.");
-module_param(dma1, int, 0444);
+module_param_hw(dma1, int, dma, 0444);
 MODULE_PARM_DESC(dma1, "1st dma # for opti9xx driver.");
 #if defined(CS4231) || defined(OPTi93X)
-module_param(dma2, int, 0444);
+module_param_hw(dma2, int, dma, 0444);
 MODULE_PARM_DESC(dma2, "2nd dma # for opti9xx driver.");
 #endif	/* CS4231 || OPTi93X */
 
diff --git a/sound/isa/sb/jazz16.c b/sound/isa/sb/jazz16.c
index 4d909971eedb..bfa0055e1fd6 100644
--- a/sound/isa/sb/jazz16.c
+++ b/sound/isa/sb/jazz16.c
@@ -50,17 +50,17 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for Media Vision Jazz16 based soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Media Vision Jazz16 based soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for jazz16 driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for jazz16 driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for jazz16 driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for jazz16 driver.");
-module_param_array(dma8, int, NULL, 0444);
+module_param_hw_array(dma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma8, "DMA8 # for jazz16 driver.");
-module_param_array(dma16, int, NULL, 0444);
+module_param_hw_array(dma16, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma16, "DMA16 # for jazz16 driver.");
 
 #define SB_JAZZ16_WAKEUP	0xaf
diff --git a/sound/isa/sb/sb16.c b/sound/isa/sb/sb16.c
index 4a7d7c89808f..3b2e4f405ff2 100644
--- a/sound/isa/sb/sb16.c
+++ b/sound/isa/sb/sb16.c
@@ -99,21 +99,21 @@ MODULE_PARM_DESC(enable, "Enable SoundBlaster 16 soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard.");
 #endif
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for SB16 driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for SB16 driver.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port # for SB16 PnP driver.");
 #ifdef SNDRV_SBAWE_EMU8000
-module_param_array(awe_port, long, NULL, 0444);
+module_param_hw_array(awe_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(awe_port, "AWE port # for SB16 PnP driver.");
 #endif
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for SB16 driver.");
-module_param_array(dma8, int, NULL, 0444);
+module_param_hw_array(dma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma8, "8-bit DMA # for SB16 driver.");
-module_param_array(dma16, int, NULL, 0444);
+module_param_hw_array(dma16, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma16, "16-bit DMA # for SB16 driver.");
 module_param_array(mic_agc, int, NULL, 0444);
 MODULE_PARM_DESC(mic_agc, "Mic Auto-Gain-Control switch.");
diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c
index ad42d2364199..d77dcba276b5 100644
--- a/sound/isa/sb/sb8.c
+++ b/sound/isa/sb/sb8.c
@@ -47,11 +47,11 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for Sound Blaster soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Sound Blaster soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for SB8 driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for SB8 driver.");
-module_param_array(dma8, int, NULL, 0444);
+module_param_hw_array(dma8, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma8, "8-bit DMA # for SB8 driver.");
 
 struct snd_sb8 {
diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c
index b61a6633d8f2..c09d9b914efe 100644
--- a/sound/isa/sc6000.c
+++ b/sound/isa/sc6000.c
@@ -64,17 +64,17 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for sc-6000 based soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable sc-6000 based soundcard.");
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for sc-6000 driver.");
-module_param_array(mss_port, long, NULL, 0444);
+module_param_hw_array(mss_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mss_port, "MSS Port # for sc-6000 driver.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port # for sc-6000 driver.");
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for sc-6000 driver.");
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for sc-6000 driver.");
-module_param_array(dma, int, NULL, 0444);
+module_param_hw_array(dma, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma, "DMA # for sc-6000 driver.");
 module_param_array(joystick, bool, NULL, 0444);
 MODULE_PARM_DESC(joystick, "Enable gameport.");
diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c
index fdcfa29e2205..54f5758a1bb3 100644
--- a/sound/isa/sscape.c
+++ b/sound/isa/sscape.c
@@ -63,22 +63,22 @@ MODULE_PARM_DESC(index, "Index number for SoundScape soundcard");
 module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "Description for SoundScape card");
 
-module_param_array(port, long, NULL, 0444);
+module_param_hw_array(port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "Port # for SoundScape driver.");
 
-module_param_array(wss_port, long, NULL, 0444);
+module_param_hw_array(wss_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(wss_port, "WSS Port # for SoundScape driver.");
 
-module_param_array(irq, int, NULL, 0444);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for SoundScape driver.");
 
-module_param_array(mpu_irq, int, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU401 IRQ # for SoundScape driver.");
 
-module_param_array(dma, int, NULL, 0444);
+module_param_hw_array(dma, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma, "DMA # for SoundScape driver.");
 
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for SoundScape driver.");
 
 module_param_array(joystick, bool, NULL, 0444);
diff --git a/sound/isa/wavefront/wavefront.c b/sound/isa/wavefront/wavefront.c
index a0987a57c8a9..da4e9a85f0af 100644
--- a/sound/isa/wavefront/wavefront.c
+++ b/sound/isa/wavefront/wavefront.c
@@ -63,23 +63,23 @@ MODULE_PARM_DESC(enable, "Enable WaveFront soundcard.");
 module_param_array(isapnp, bool, NULL, 0444);
 MODULE_PARM_DESC(isapnp, "ISA PnP detection for WaveFront soundcards.");
 #endif
-module_param_array(cs4232_pcm_port, long, NULL, 0444);
+module_param_hw_array(cs4232_pcm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(cs4232_pcm_port, "Port # for CS4232 PCM interface.");
-module_param_array(cs4232_pcm_irq, int, NULL, 0444);
+module_param_hw_array(cs4232_pcm_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(cs4232_pcm_irq, "IRQ # for CS4232 PCM interface.");
-module_param_array(dma1, int, NULL, 0444);
+module_param_hw_array(dma1, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma1, "DMA1 # for CS4232 PCM interface.");
-module_param_array(dma2, int, NULL, 0444);
+module_param_hw_array(dma2, int, dma, NULL, 0444);
 MODULE_PARM_DESC(dma2, "DMA2 # for CS4232 PCM interface.");
-module_param_array(cs4232_mpu_port, long, NULL, 0444);
+module_param_hw_array(cs4232_mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(cs4232_mpu_port, "port # for CS4232 MPU-401 interface.");
-module_param_array(cs4232_mpu_irq, int, NULL, 0444);
+module_param_hw_array(cs4232_mpu_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(cs4232_mpu_irq, "IRQ # for CS4232 MPU-401 interface.");
-module_param_array(ics2115_irq, int, NULL, 0444);
+module_param_hw_array(ics2115_irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(ics2115_irq, "IRQ # for ICS2115.");
-module_param_array(ics2115_port, long, NULL, 0444);
+module_param_hw_array(ics2115_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(ics2115_port, "Port # for ICS2115.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port #.");
 module_param_array(use_cs4232_midi, bool, NULL, 0444);
 MODULE_PARM_DESC(use_cs4232_midi, "Use CS4232 MPU-401 interface (inaccessibly located inside your computer)");
-- 
cgit v1.2.3


From 232b0b0829fa4f22172750a6e2a36867583da285 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:30 +0100
Subject: Annotate hardware config module parameters in sound/oss/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in sound/oss/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jaroslav Kysela <perex@perex.cz>
cc: Takashi Iwai <tiwai@suse.com>
cc: Andrew Veliath <andrewtv@usa.net>
cc: alsa-devel@alsa-project.org
---
 sound/oss/ad1848.c        |  8 ++++----
 sound/oss/aedsp16.c       | 12 ++++++------
 sound/oss/mpu401.c        |  4 ++--
 sound/oss/msnd_pinnacle.c | 20 ++++++++++----------
 sound/oss/opl3.c          |  2 +-
 sound/oss/pas2_card.c     | 18 +++++++++---------
 sound/oss/pss.c           | 14 +++++++-------
 sound/oss/sb_card.c       | 10 +++++-----
 sound/oss/trix.c          | 18 +++++++++---------
 sound/oss/uart401.c       |  4 ++--
 sound/oss/uart6850.c      |  4 ++--
 sound/oss/waveartist.c    |  8 ++++----
 12 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/sound/oss/ad1848.c b/sound/oss/ad1848.c
index f6156d8169d0..2421f59cf279 100644
--- a/sound/oss/ad1848.c
+++ b/sound/oss/ad1848.c
@@ -2805,10 +2805,10 @@ static int __initdata dma = -1;
 static int __initdata dma2 = -1;
 static int __initdata type = 0;
 
-module_param(io, int, 0);		/* I/O for a raw AD1848 card */
-module_param(irq, int, 0);		/* IRQ to use */
-module_param(dma, int, 0);		/* First DMA channel */
-module_param(dma2, int, 0);		/* Second DMA channel */
+module_param_hw(io, int, ioport, 0);	/* I/O for a raw AD1848 card */
+module_param_hw(irq, int, irq, 0);	/* IRQ to use */
+module_param_hw(dma, int, dma, 0);	/* First DMA channel */
+module_param_hw(dma2, int, dma, 0);	/* Second DMA channel */
 module_param(type, int, 0);		/* Card type */
 module_param(deskpro_xl, bool, 0);	/* Special magic for Deskpro XL boxen */
 module_param(deskpro_m, bool, 0);	/* Special magic for Deskpro M box */
diff --git a/sound/oss/aedsp16.c b/sound/oss/aedsp16.c
index bb477d5c8528..f058ed6bdb69 100644
--- a/sound/oss/aedsp16.c
+++ b/sound/oss/aedsp16.c
@@ -1303,17 +1303,17 @@ static int __initdata mpu_irq = -1;
 static int __initdata mss_base = -1;
 static int __initdata mpu_base = -1;
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "I/O base address (0x220 0x240)");
-module_param(irq, int, 0);
+module_param_hw(irq, int, irq, 0);
 MODULE_PARM_DESC(irq, "IRQ line (5 7 9 10 11)");
-module_param(dma, int, 0);
+module_param_hw(dma, int, dma, 0);
 MODULE_PARM_DESC(dma, "dma line (0 1 3)");
-module_param(mpu_irq, int, 0);
+module_param_hw(mpu_irq, int, irq, 0);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ line (5 7 9 10 0)");
-module_param(mss_base, int, 0);
+module_param_hw(mss_base, int, ioport, 0);
 MODULE_PARM_DESC(mss_base, "MSS emulation I/O base address (0x530 0xE80)");
-module_param(mpu_base, int, 0);
+module_param_hw(mpu_base, int, ioport, 0);
 MODULE_PARM_DESC(mpu_base,"MPU-401 I/O base address (0x300 0x310 0x320 0x330)");
 MODULE_AUTHOR("Riccardo Facchetti <fizban@tin.it>");
 MODULE_DESCRIPTION("Audio Excel DSP 16 Driver Version " VERSION);
diff --git a/sound/oss/mpu401.c b/sound/oss/mpu401.c
index 862735005b43..20e8fa46f647 100644
--- a/sound/oss/mpu401.c
+++ b/sound/oss/mpu401.c
@@ -1748,8 +1748,8 @@ static struct address_info cfg;
 static int io = -1;
 static int irq = -1;
 
-module_param(irq, int, 0);
-module_param(io, int, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(io, int, ioport, 0);
 
 static int __init init_mpu401(void)
 {
diff --git a/sound/oss/msnd_pinnacle.c b/sound/oss/msnd_pinnacle.c
index f34ec01d2239..d2abc2cf3213 100644
--- a/sound/oss/msnd_pinnacle.c
+++ b/sound/oss/msnd_pinnacle.c
@@ -1727,22 +1727,22 @@ static int
 calibrate_signal __initdata =		CONFIG_MSND_CALSIGNAL;
 #endif /* MODULE */
 
-module_param				(io, int, 0);
-module_param				(irq, int, 0);
-module_param				(mem, int, 0);
+module_param_hw				(io, int, ioport, 0);
+module_param_hw				(irq, int, irq, 0);
+module_param_hw				(mem, int, iomem, 0);
 module_param				(write_ndelay, int, 0);
 module_param				(fifosize, int, 0);
 module_param				(calibrate_signal, int, 0);
 #ifndef MSND_CLASSIC
 module_param				(digital, bool, 0);
-module_param				(cfg, int, 0);
+module_param_hw				(cfg, int, ioport, 0);
 module_param				(reset, int, 0);
-module_param				(mpu_io, int, 0);
-module_param				(mpu_irq, int, 0);
-module_param				(ide_io0, int, 0);
-module_param				(ide_io1, int, 0);
-module_param				(ide_irq, int, 0);
-module_param				(joystick_io, int, 0);
+module_param_hw				(mpu_io, int, ioport, 0);
+module_param_hw				(mpu_irq, int, irq, 0);
+module_param_hw				(ide_io0, int, ioport, 0);
+module_param_hw				(ide_io1, int, ioport, 0);
+module_param_hw				(ide_irq, int, irq, 0);
+module_param_hw				(joystick_io, int, ioport, 0);
 #endif
 
 static int __init msnd_init(void)
diff --git a/sound/oss/opl3.c b/sound/oss/opl3.c
index b6d19adf8f41..f0f5b5be6314 100644
--- a/sound/oss/opl3.c
+++ b/sound/oss/opl3.c
@@ -1200,7 +1200,7 @@ static int me;
 
 static int io = -1;
 
-module_param(io, int, 0);
+module_param_hw(io, int, ioport, 0);
 
 static int __init init_opl3 (void)
 {
diff --git a/sound/oss/pas2_card.c b/sound/oss/pas2_card.c
index b07954a79536..769fca692d2a 100644
--- a/sound/oss/pas2_card.c
+++ b/sound/oss/pas2_card.c
@@ -383,15 +383,15 @@ static int __initdata sb_irq	= -1;
 static int __initdata sb_dma	= -1;
 static int __initdata sb_dma16	= -1;
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
-module_param(dma, int, 0);
-module_param(dma16, int, 0);
-
-module_param(sb_io, int, 0);
-module_param(sb_irq, int, 0);
-module_param(sb_dma, int, 0);
-module_param(sb_dma16, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(dma, int, dma, 0);
+module_param_hw(dma16, int, dma, 0);
+
+module_param_hw(sb_io, int, ioport, 0);
+module_param_hw(sb_irq, int, irq, 0);
+module_param_hw(sb_dma, int, dma, 0);
+module_param_hw(sb_dma16, int, dma, 0);
 
 module_param(joystick, bool, 0);
 module_param(symphony, bool, 0);
diff --git a/sound/oss/pss.c b/sound/oss/pss.c
index 81314f9e2ccb..33c3a442e162 100644
--- a/sound/oss/pss.c
+++ b/sound/oss/pss.c
@@ -1139,19 +1139,19 @@ static bool pss_no_sound = 0;	/* Just configure non-sound components */
 static bool pss_keep_settings  = 1;	/* Keep hardware settings at module exit */
 static char *pss_firmware = "/etc/sound/pss_synth";
 
-module_param(pss_io, int, 0);
+module_param_hw(pss_io, int, ioport, 0);
 MODULE_PARM_DESC(pss_io, "Set i/o base of PSS card (probably 0x220 or 0x240)");
-module_param(mss_io, int, 0);
+module_param_hw(mss_io, int, ioport, 0);
 MODULE_PARM_DESC(mss_io, "Set WSS (audio) i/o base (0x530, 0x604, 0xE80, 0xF40, or other. Address must end in 0 or 4 and must be from 0x100 to 0xFF4)");
-module_param(mss_irq, int, 0);
+module_param_hw(mss_irq, int, irq, 0);
 MODULE_PARM_DESC(mss_irq, "Set WSS (audio) IRQ (3, 5, 7, 9, 10, 11, 12)");
-module_param(mss_dma, int, 0);
+module_param_hw(mss_dma, int, dma, 0);
 MODULE_PARM_DESC(mss_dma, "Set WSS (audio) DMA (0, 1, 3)");
-module_param(mpu_io, int, 0);
+module_param_hw(mpu_io, int, ioport, 0);
 MODULE_PARM_DESC(mpu_io, "Set MIDI i/o base (0x330 or other. Address must be on 4 location boundaries and must be from 0x100 to 0xFFC)");
-module_param(mpu_irq, int, 0);
+module_param_hw(mpu_irq, int, irq, 0);
 MODULE_PARM_DESC(mpu_irq, "Set MIDI IRQ (3, 5, 7, 9, 10, 11, 12)");
-module_param(pss_cdrom_port, int, 0);
+module_param_hw(pss_cdrom_port, int, ioport, 0);
 MODULE_PARM_DESC(pss_cdrom_port, "Set the PSS CDROM port i/o base (0x340 or other)");
 module_param(pss_enable_joystick, bool, 0);
 MODULE_PARM_DESC(pss_enable_joystick, "Enables the PSS joystick port (1 to enable, 0 to disable)");
diff --git a/sound/oss/sb_card.c b/sound/oss/sb_card.c
index fb5d7250de38..2a92cfe6cfe9 100644
--- a/sound/oss/sb_card.c
+++ b/sound/oss/sb_card.c
@@ -61,15 +61,15 @@ static int __initdata uart401	= 0;
 static int __initdata pnp       = 0;
 #endif
 
-module_param(io, int, 000);
+module_param_hw(io, int, ioport, 000);
 MODULE_PARM_DESC(io,       "Soundblaster i/o base address (0x220,0x240,0x260,0x280)");
-module_param(irq, int, 000);
+module_param_hw(irq, int, irq, 000);
 MODULE_PARM_DESC(irq,	   "IRQ (5,7,9,10)");
-module_param(dma, int, 000);
+module_param_hw(dma, int, dma, 000);
 MODULE_PARM_DESC(dma,	   "8-bit DMA channel (0,1,3)");
-module_param(dma16, int, 000);
+module_param_hw(dma16, int, dma, 000);
 MODULE_PARM_DESC(dma16,	   "16-bit DMA channel (5,6,7)");
-module_param(mpu_io, int, 000);
+module_param_hw(mpu_io, int, ioport, 000);
 MODULE_PARM_DESC(mpu_io,   "MPU base address");
 module_param(type, int, 000);
 MODULE_PARM_DESC(type,	   "You can set this to specific card type (doesn't " \
diff --git a/sound/oss/trix.c b/sound/oss/trix.c
index 3c494dc93b93..a57bc635d758 100644
--- a/sound/oss/trix.c
+++ b/sound/oss/trix.c
@@ -413,15 +413,15 @@ static int __initdata sb_irq	= -1;
 static int __initdata mpu_io	= -1;
 static int __initdata mpu_irq	= -1;
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
-module_param(dma, int, 0);
-module_param(dma2, int, 0);
-module_param(sb_io, int, 0);
-module_param(sb_dma, int, 0);
-module_param(sb_irq, int, 0);
-module_param(mpu_io, int, 0);
-module_param(mpu_irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
+module_param_hw(dma, int, dma, 0);
+module_param_hw(dma2, int, dma, 0);
+module_param_hw(sb_io, int, ioport, 0);
+module_param_hw(sb_dma, int, dma, 0);
+module_param_hw(sb_irq, int, irq, 0);
+module_param_hw(mpu_io, int, ioport, 0);
+module_param_hw(mpu_irq, int, irq, 0);
 module_param(joystick, bool, 0);
 
 static int __init init_trix(void)
diff --git a/sound/oss/uart401.c b/sound/oss/uart401.c
index dae4d4344407..83dcc85b8688 100644
--- a/sound/oss/uart401.c
+++ b/sound/oss/uart401.c
@@ -429,8 +429,8 @@ static struct address_info cfg_mpu;
 static int io = -1;
 static int irq = -1;
 
-module_param(io, int, 0444);
-module_param(irq, int, 0444);
+module_param_hw(io, int, ioport, 0444);
+module_param_hw(irq, int, irq, 0444);
 
 
 static int __init init_uart401(void)
diff --git a/sound/oss/uart6850.c b/sound/oss/uart6850.c
index 1079133dd6ab..eda32d7eddbd 100644
--- a/sound/oss/uart6850.c
+++ b/sound/oss/uart6850.c
@@ -315,8 +315,8 @@ static struct address_info cfg_mpu;
 static int __initdata io = -1;
 static int __initdata irq = -1;
 
-module_param(io, int, 0);
-module_param(irq, int, 0);
+module_param_hw(io, int, ioport, 0);
+module_param_hw(irq, int, irq, 0);
 
 static int __init init_uart6850(void)
 {
diff --git a/sound/oss/waveartist.c b/sound/oss/waveartist.c
index 0b8d0de87273..4f0c3a232e41 100644
--- a/sound/oss/waveartist.c
+++ b/sound/oss/waveartist.c
@@ -2036,8 +2036,8 @@ __setup("waveartist=", setup_waveartist);
 #endif
 
 MODULE_DESCRIPTION("Rockwell WaveArtist RWA-010 sound driver");
-module_param(io, int, 0);		/* IO base */
-module_param(irq, int, 0);		/* IRQ */
-module_param(dma, int, 0);		/* DMA */
-module_param(dma2, int, 0);		/* DMA2 */
+module_param_hw(io, int, ioport, 0);		/* IO base */
+module_param_hw(irq, int, irq, 0);		/* IRQ */
+module_param_hw(dma, int, dma, 0);		/* DMA */
+module_param_hw(dma2, int, dma, 0);		/* DMA2 */
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 6192c41fc608b0a58d5540b015aa1672c266f3c5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Apr 2017 16:54:30 +0100
Subject: Annotate hardware config module parameters in sound/pci/

When the kernel is running in secure boot mode, we lock down the kernel to
prevent userspace from modifying the running kernel image.  Whilst this
includes prohibiting access to things like /dev/mem, it must also prevent
access by means of configuring driver modules in such a way as to cause a
device to access or modify the kernel image.

To this end, annotate module_param* statements that refer to hardware
configuration and indicate for future reference what type of parameter they
specify.  The parameter parser in the core sees this information and can
skip such parameters with an error message if the kernel is locked down.
The module initialisation then runs as normal, but just sees whatever the
default values for those parameters is.

Note that we do still need to do the module initialisation because some
drivers have viable defaults set in case parameters aren't specified and
some drivers support automatic configuration (e.g. PNP or PCI) in addition
to manually coded parameters.

This patch annotates drivers in sound/pci/.

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jaroslav Kysela <perex@perex.cz>
cc: Takashi Iwai <tiwai@suse.com>
cc: alsa-devel@alsa-project.org
---
 sound/pci/als4000.c         | 2 +-
 sound/pci/cmipci.c          | 6 +++---
 sound/pci/ens1370.c         | 2 +-
 sound/pci/riptide/riptide.c | 6 +++---
 sound/pci/sonicvibes.c      | 2 +-
 sound/pci/via82xx.c         | 2 +-
 sound/pci/ymfpci/ymfpci.c   | 6 +++---
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/sound/pci/als4000.c b/sound/pci/als4000.c
index 92bc06d01288..7844a75d8ed9 100644
--- a/sound/pci/als4000.c
+++ b/sound/pci/als4000.c
@@ -102,7 +102,7 @@ MODULE_PARM_DESC(id, "ID string for ALS4000 soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable ALS4000 soundcard.");
 #ifdef SUPPORT_JOYSTICK
-module_param_array(joystick_port, int, NULL, 0444);
+module_param_hw_array(joystick_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(joystick_port, "Joystick port address for ALS4000 soundcard. (0 = disabled)");
 #endif
 
diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c
index aeedc270ed9b..430f064c64da 100644
--- a/sound/pci/cmipci.c
+++ b/sound/pci/cmipci.c
@@ -68,14 +68,14 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for C-Media PCI soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable C-Media PCI soundcard.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM port.");
 module_param_array(soft_ac3, bool, NULL, 0444);
 MODULE_PARM_DESC(soft_ac3, "Software-conversion of raw SPDIF packets (model 033 only).");
 #ifdef SUPPORT_JOYSTICK
-module_param_array(joystick_port, int, NULL, 0444);
+module_param_hw_array(joystick_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(joystick_port, "Joystick port address.");
 #endif
 
diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c
index 164adad91650..90376739c5e1 100644
--- a/sound/pci/ens1370.c
+++ b/sound/pci/ens1370.c
@@ -106,7 +106,7 @@ module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Ensoniq AudioPCI soundcard.");
 #ifdef SUPPORT_JOYSTICK
 #ifdef CHIP1371
-module_param_array(joystick_port, int, NULL, 0444);
+module_param_hw_array(joystick_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(joystick_port, "Joystick port address.");
 #else
 module_param_array(joystick, bool, NULL, 0444);
diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
index 19c9df6b0f3d..f067c76d77f8 100644
--- a/sound/pci/riptide/riptide.c
+++ b/sound/pci/riptide/riptide.c
@@ -137,12 +137,12 @@ MODULE_PARM_DESC(id, "ID string for Riptide soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Riptide soundcard.");
 #ifdef SUPPORT_JOYSTICK
-module_param_array(joystick_port, int, NULL, 0444);
+module_param_hw_array(joystick_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(joystick_port, "Joystick port # for Riptide soundcard.");
 #endif
-module_param_array(mpu_port, int, NULL, 0444);
+module_param_hw_array(mpu_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU401 port # for Riptide driver.");
-module_param_array(opl3_port, int, NULL, 0444);
+module_param_hw_array(opl3_port, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(opl3_port, "OPL3 port # for Riptide driver.");
 
 /*
diff --git a/sound/pci/sonicvibes.c b/sound/pci/sonicvibes.c
index a6aa48c5b969..8e3d4ec39c35 100644
--- a/sound/pci/sonicvibes.c
+++ b/sound/pci/sonicvibes.c
@@ -66,7 +66,7 @@ module_param_array(reverb, bool, NULL, 0444);
 MODULE_PARM_DESC(reverb, "Enable reverb (SRAM is present) for S3 SonicVibes soundcard.");
 module_param_array(mge, bool, NULL, 0444);
 MODULE_PARM_DESC(mge, "MIC Gain Enable for S3 SonicVibes soundcard.");
-module_param(dmaio, uint, 0444);
+module_param_hw(dmaio, uint, ioport, 0444);
 MODULE_PARM_DESC(dmaio, "DDMA i/o base address for S3 SonicVibes soundcard.");
 
 /*
diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c
index 2d8c14e3f8d2..127834021175 100644
--- a/sound/pci/via82xx.c
+++ b/sound/pci/via82xx.c
@@ -92,7 +92,7 @@ module_param(index, int, 0444);
 MODULE_PARM_DESC(index, "Index value for VIA 82xx bridge.");
 module_param(id, charp, 0444);
 MODULE_PARM_DESC(id, "ID string for VIA 82xx bridge.");
-module_param(mpu_port, long, 0444);
+module_param_hw(mpu_port, long, ioport, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 port. (VT82C686x only)");
 #ifdef SUPPORT_JOYSTICK
 module_param(joystick, bool, 0444);
diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c
index 812e27a1bcbc..4faf3e1ed06a 100644
--- a/sound/pci/ymfpci/ymfpci.c
+++ b/sound/pci/ymfpci/ymfpci.c
@@ -55,12 +55,12 @@ module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for the Yamaha DS-1 PCI soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Yamaha DS-1 soundcard.");
-module_param_array(mpu_port, long, NULL, 0444);
+module_param_hw_array(mpu_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(mpu_port, "MPU-401 Port.");
-module_param_array(fm_port, long, NULL, 0444);
+module_param_hw_array(fm_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(fm_port, "FM OPL-3 Port.");
 #ifdef SUPPORT_JOYSTICK
-module_param_array(joystick_port, long, NULL, 0444);
+module_param_hw_array(joystick_port, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(joystick_port, "Joystick port address");
 #endif
 module_param_array(rear_switch, bool, NULL, 0444);
-- 
cgit v1.2.3


From 2a0c57545a291f257cd231b1c4b18285b84608d8 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 10 Apr 2017 16:50:56 +0530
Subject: iommu/of: Refactor of_iommu_configure() for error handling

In preparation for some upcoming cleverness, rework the control flow in
of_iommu_configure() to minimise duplication and improve the propogation
of errors. It's also as good a time as any to switch over from the
now-just-a-compatibility-wrapper of_iommu_get_ops() to using the generic
IOMMU instance interface directly.

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/of_iommu.c | 83 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 2683e9fc0dcf..8f4e59985f6e 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -96,6 +96,28 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
 }
 EXPORT_SYMBOL_GPL(of_get_dma_window);
 
+static const struct iommu_ops
+*of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec)
+{
+	const struct iommu_ops *ops;
+	struct fwnode_handle *fwnode = &iommu_spec->np->fwnode;
+	int err;
+
+	ops = iommu_ops_from_fwnode(fwnode);
+	if (!ops || !ops->of_xlate)
+		return NULL;
+
+	err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops);
+	if (err)
+		return ERR_PTR(err);
+
+	err = ops->of_xlate(dev, iommu_spec);
+	if (err)
+		return ERR_PTR(err);
+
+	return ops;
+}
+
 static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
 {
 	struct of_phandle_args *iommu_spec = data;
@@ -105,10 +127,11 @@ static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
 }
 
 static const struct iommu_ops
-*of_pci_iommu_configure(struct pci_dev *pdev, struct device_node *bridge_np)
+*of_pci_iommu_init(struct pci_dev *pdev, struct device_node *bridge_np)
 {
 	const struct iommu_ops *ops;
 	struct of_phandle_args iommu_spec;
+	int err;
 
 	/*
 	 * Start by tracing the RID alias down the PCI topology as
@@ -123,56 +146,56 @@ static const struct iommu_ops
 	 * bus into the system beyond, and which IOMMU it ends up at.
 	 */
 	iommu_spec.np = NULL;
-	if (of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map",
-			   "iommu-map-mask", &iommu_spec.np, iommu_spec.args))
-		return NULL;
+	err = of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map",
+			     "iommu-map-mask", &iommu_spec.np,
+			     iommu_spec.args);
+	if (err)
+		return err == -ENODEV ? NULL : ERR_PTR(err);
 
-	ops = iommu_ops_from_fwnode(&iommu_spec.np->fwnode);
-	if (!ops || !ops->of_xlate ||
-	    iommu_fwspec_init(&pdev->dev, &iommu_spec.np->fwnode, ops) ||
-	    ops->of_xlate(&pdev->dev, &iommu_spec))
-		ops = NULL;
+	ops = of_iommu_xlate(&pdev->dev, &iommu_spec);
 
 	of_node_put(iommu_spec.np);
 	return ops;
 }
 
-const struct iommu_ops *of_iommu_configure(struct device *dev,
-					   struct device_node *master_np)
+static const struct iommu_ops
+*of_platform_iommu_init(struct device *dev, struct device_node *np)
 {
 	struct of_phandle_args iommu_spec;
-	struct device_node *np;
 	const struct iommu_ops *ops = NULL;
 	int idx = 0;
 
-	if (dev_is_pci(dev))
-		return of_pci_iommu_configure(to_pci_dev(dev), master_np);
-
 	/*
 	 * We don't currently walk up the tree looking for a parent IOMMU.
 	 * See the `Notes:' section of
 	 * Documentation/devicetree/bindings/iommu/iommu.txt
 	 */
-	while (!of_parse_phandle_with_args(master_np, "iommus",
-					   "#iommu-cells", idx,
-					   &iommu_spec)) {
-		np = iommu_spec.np;
-		ops = iommu_ops_from_fwnode(&np->fwnode);
-
-		if (!ops || !ops->of_xlate ||
-		    iommu_fwspec_init(dev, &np->fwnode, ops) ||
-		    ops->of_xlate(dev, &iommu_spec))
-			goto err_put_node;
-
-		of_node_put(np);
+	while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells",
+					   idx, &iommu_spec)) {
+		ops = of_iommu_xlate(dev, &iommu_spec);
+		of_node_put(iommu_spec.np);
 		idx++;
+		if (IS_ERR_OR_NULL(ops))
+			break;
 	}
 
 	return ops;
+}
+
+const struct iommu_ops *of_iommu_configure(struct device *dev,
+					   struct device_node *master_np)
+{
+	const struct iommu_ops *ops;
+
+	if (!master_np)
+		return NULL;
+
+	if (dev_is_pci(dev))
+		ops = of_pci_iommu_init(to_pci_dev(dev), master_np);
+	else
+		ops = of_platform_iommu_init(dev, master_np);
 
-err_put_node:
-	of_node_put(np);
-	return NULL;
+	return IS_ERR(ops) ? NULL : ops;
 }
 
 static int __init of_iommu_init(void)
-- 
cgit v1.2.3


From d7b0558230e444f29488fcee0b0b561015d16f8a Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 10 Apr 2017 16:50:57 +0530
Subject: iommu/of: Prepare for deferred IOMMU configuration

IOMMU configuration represents unchanging properties of the hardware,
and as such should only need happen once in a device's lifetime, but
the necessary interaction with the IOMMU device and driver complicates
exactly when that point should be.

Since the only reasonable tool available for handling the inter-device
dependency is probe deferral, we need to prepare of_iommu_configure()
to run later than it is currently called (i.e. at driver probe rather
than device creation), to handle being retried, and to tell whether a
not-yet present IOMMU should be waited for or skipped (by virtue of
having declared a built-in driver or not).

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/of_iommu.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 8f4e59985f6e..c8be889f5206 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -96,6 +96,19 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
 }
 EXPORT_SYMBOL_GPL(of_get_dma_window);
 
+static bool of_iommu_driver_present(struct device_node *np)
+{
+	/*
+	 * If the IOMMU still isn't ready by the time we reach init, assume
+	 * it never will be. We don't want to defer indefinitely, nor attempt
+	 * to dereference __iommu_of_table after it's been freed.
+	 */
+	if (system_state > SYSTEM_BOOTING)
+		return false;
+
+	return of_match_node(&__iommu_of_table, np);
+}
+
 static const struct iommu_ops
 *of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec)
 {
@@ -104,12 +117,20 @@ static const struct iommu_ops
 	int err;
 
 	ops = iommu_ops_from_fwnode(fwnode);
-	if (!ops || !ops->of_xlate)
+	if ((ops && !ops->of_xlate) ||
+	    (!ops && !of_iommu_driver_present(iommu_spec->np)))
 		return NULL;
 
 	err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops);
 	if (err)
 		return ERR_PTR(err);
+	/*
+	 * The otherwise-empty fwspec handily serves to indicate the specific
+	 * IOMMU device we're waiting for, which will be useful if we ever get
+	 * a proper probe-ordering dependency mechanism in future.
+	 */
+	if (!ops)
+		return ERR_PTR(-EPROBE_DEFER);
 
 	err = ops->of_xlate(dev, iommu_spec);
 	if (err)
@@ -186,14 +207,34 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 					   struct device_node *master_np)
 {
 	const struct iommu_ops *ops;
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
 
 	if (!master_np)
 		return NULL;
 
+	if (fwspec) {
+		if (fwspec->ops)
+			return fwspec->ops;
+
+		/* In the deferred case, start again from scratch */
+		iommu_fwspec_free(dev);
+	}
+
 	if (dev_is_pci(dev))
 		ops = of_pci_iommu_init(to_pci_dev(dev), master_np);
 	else
 		ops = of_platform_iommu_init(dev, master_np);
+	/*
+	 * If we have reason to believe the IOMMU driver missed the initial
+	 * add_device callback for dev, replay it to get things in order.
+	 */
+	if (!IS_ERR_OR_NULL(ops) && ops->add_device &&
+	    dev->bus && !dev->iommu_group) {
+		int err = ops->add_device(dev);
+
+		if (err)
+			ops = ERR_PTR(err);
+	}
 
 	return IS_ERR(ops) ? NULL : ops;
 }
-- 
cgit v1.2.3


From 3f1866779cf8338e1c8bd32e5f6f5424795ef191 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Mon, 10 Apr 2017 16:50:58 +0530
Subject: of: dma: Make of_dma_deconfigure() public

As part of moving DMA initializing to probe time the
of_dma_deconfigure() function will need to be called from different
source files. Make it public and move it to drivers/of/device.c where
the of_dma_configure() function is.

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/of/device.c       | 12 ++++++++++++
 drivers/of/platform.c     |  5 -----
 include/linux/of_device.h |  3 +++
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/of/device.c b/drivers/of/device.c
index b1e6bebda3f3..0d378c03e1a4 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -151,6 +151,18 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 }
 EXPORT_SYMBOL_GPL(of_dma_configure);
 
+/**
+ * of_dma_deconfigure - Clean up DMA configuration
+ * @dev:	Device for which to clean up DMA configuration
+ *
+ * Clean up all configuration performed by of_dma_configure_ops() and free all
+ * resources that have been allocated.
+ */
+void of_dma_deconfigure(struct device *dev)
+{
+	arch_teardown_dma_ops(dev);
+}
+
 int of_device_register(struct platform_device *pdev)
 {
 	device_initialize(&pdev->dev);
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 5dfcc967dd05..5344db50aa65 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -158,11 +158,6 @@ struct platform_device *of_device_alloc(struct device_node *np,
 }
 EXPORT_SYMBOL(of_device_alloc);
 
-static void of_dma_deconfigure(struct device *dev)
-{
-	arch_teardown_dma_ops(dev);
-}
-
 /**
  * of_platform_device_create_pdata - Alloc, initialize and register an of_device
  * @np: pointer to node to create device for
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index c12dace043f3..af984551cc2b 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -56,6 +56,7 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
 }
 
 void of_dma_configure(struct device *dev, struct device_node *np);
+void of_dma_deconfigure(struct device *dev);
 #else /* CONFIG_OF */
 
 static inline int of_driver_match_device(struct device *dev,
@@ -105,6 +106,8 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
 }
 static inline void of_dma_configure(struct device *dev, struct device_node *np)
 {}
+static inline void of_dma_deconfigure(struct device *dev)
+{}
 #endif /* CONFIG_OF */
 
 #endif /* _LINUX_OF_DEVICE_H */
-- 
cgit v1.2.3


From 1d9029d440e40b276c0691caed1de10c42d96bef Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 10 Apr 2017 16:50:59 +0530
Subject: ACPI/IORT: Add function to check SMMUs drivers presence

The IOMMU probe deferral implementation requires a mechanism to detect
if drivers for SMMU components are built-in in the kernel to detect
whether IOMMU configuration for a given device should be deferred (ie
SMMU drivers present but still not probed) or not (drivers not present).

Add a simple function to IORT to detect if SMMU drivers for SMMU
components managed by IORT are built-in in the kernel.

Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Cc: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/acpi/arm64/iort.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 4a5bb967250b..3dd9ec372dae 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -523,6 +523,19 @@ static int arm_smmu_iort_xlate(struct device *dev, u32 streamid,
 	return ret;
 }
 
+static inline bool iort_iommu_driver_enabled(u8 type)
+{
+	switch (type) {
+	case ACPI_IORT_NODE_SMMU_V3:
+		return IS_BUILTIN(CONFIG_ARM_SMMU_V3);
+	case ACPI_IORT_NODE_SMMU:
+		return IS_BUILTIN(CONFIG_ARM_SMMU);
+	default:
+		pr_warn("IORT node type %u does not describe an SMMU\n", type);
+		return false;
+	}
+}
+
 static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
 					struct acpi_iort_node *node,
 					u32 streamid)
-- 
cgit v1.2.3


From efc8551a276faab19d85079da02c5fb602b0dcbe Mon Sep 17 00:00:00 2001
From: Sricharan R <sricharan@codeaurora.org>
Date: Mon, 10 Apr 2017 16:51:00 +0530
Subject: of: device: Fix overflow of coherent_dma_mask

Size of the dma-range is calculated as coherent_dma_mask + 1
and passed to arch_setup_dma_ops further. It overflows when
the coherent_dma_mask is set for full 64 bits 0xFFFFFFFFFFFFFFFF,
resulting in size getting passed as 0 wrongly. Fix this by
passsing in max(mask, mask + 1). Note that in this case
when the mask is set to full 64bits, we will be passing the mask
itself to arch_setup_dma_ops instead of the size. The real fix
for this should be to make arch_setup_dma_ops receive the
mask and handle it, to be done in the future.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/of/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/of/device.c b/drivers/of/device.c
index 0d378c03e1a4..e1ae9e7104a1 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -107,7 +107,7 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 	ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
 	if (ret < 0) {
 		dma_addr = offset = 0;
-		size = dev->coherent_dma_mask + 1;
+		size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
 	} else {
 		offset = PFN_DOWN(paddr - dma_addr);
 
-- 
cgit v1.2.3


From 09515ef5ddad71c7820e5e428da418b709feeb26 Mon Sep 17 00:00:00 2001
From: Sricharan R <sricharan@codeaurora.org>
Date: Mon, 10 Apr 2017 16:51:01 +0530
Subject: of/acpi: Configure dma operations at probe time for platform/amba/pci
 bus devices

Configuring DMA ops at probe time will allow deferring device probe when
the IOMMU isn't available yet. The dma_configure for the device is
now called from the generic device_attach callback just before the
bus/driver probe is called. This way, configuring the DMA ops for the
device would be called at the same place for all bus_types, hence the
deferred probing mechanism should work for all buses as well.

pci_bus_add_devices    (platform/amba)(_device_create/driver_register)
       |                         |
pci_bus_add_device     (device_add/driver_register)
       |                         |
device_attach           device_initial_probe
       |                         |
__device_attach_driver    __device_attach_driver
       |
driver_probe_device
       |
really_probe
       |
dma_configure

Similarly on the device/driver_unregister path __device_release_driver is
called which inturn calls dma_deconfigure.

This patch changes the dma ops configuration to probe time for
both OF and ACPI based platform/amba/pci bus devices.

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com> (drivers/pci part)
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/acpi/glue.c         |  5 -----
 drivers/base/dd.c           |  9 +++++++++
 drivers/base/dma-mapping.c  | 40 ++++++++++++++++++++++++++++++++++++++++
 drivers/of/platform.c       |  5 +----
 drivers/pci/probe.c         | 28 ----------------------------
 include/linux/dma-mapping.h | 12 ++++++++++++
 6 files changed, 62 insertions(+), 37 deletions(-)

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index fb19e1cdb641..c05f24107bfc 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -176,7 +176,6 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev)
 	struct list_head *physnode_list;
 	unsigned int node_id;
 	int retval = -EINVAL;
-	enum dev_dma_attr attr;
 
 	if (has_acpi_companion(dev)) {
 		if (acpi_dev) {
@@ -233,10 +232,6 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev)
 	if (!has_acpi_companion(dev))
 		ACPI_COMPANION_SET(dev, acpi_dev);
 
-	attr = acpi_get_dma_attr(acpi_dev);
-	if (attr != DEV_DMA_NOT_SUPPORTED)
-		acpi_dma_configure(dev, attr);
-
 	acpi_physnode_link_name(physical_node_name, node_id);
 	retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
 				   physical_node_name);
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index a1fbf55c4d3a..4882f06d12df 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -19,6 +19,7 @@
 
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
 #include <linux/wait.h>
@@ -356,6 +357,10 @@ re_probe:
 	if (ret)
 		goto pinctrl_bind_failed;
 
+	ret = dma_configure(dev);
+	if (ret)
+		goto dma_failed;
+
 	if (driver_sysfs_add(dev)) {
 		printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n",
 			__func__, dev_name(dev));
@@ -417,6 +422,8 @@ re_probe:
 	goto done;
 
 probe_failed:
+	dma_deconfigure(dev);
+dma_failed:
 	if (dev->bus)
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 					     BUS_NOTIFY_DRIVER_NOT_BOUND, dev);
@@ -826,6 +833,8 @@ static void __device_release_driver(struct device *dev, struct device *parent)
 			drv->remove(dev);
 
 		device_links_driver_cleanup(dev);
+		dma_deconfigure(dev);
+
 		devres_release_all(dev);
 		dev->driver = NULL;
 		dev_set_drvdata(dev, NULL);
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index efd71cf4fdea..449b948c7427 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -7,9 +7,11 @@
  * This file is released under the GPLv2.
  */
 
+#include <linux/acpi.h>
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
 #include <linux/gfp.h>
+#include <linux/of_device.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
@@ -341,3 +343,41 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
 	vunmap(cpu_addr);
 }
 #endif
+
+/*
+ * Common configuration to enable DMA API use for a device
+ */
+#include <linux/pci.h>
+
+int dma_configure(struct device *dev)
+{
+	struct device *bridge = NULL, *dma_dev = dev;
+	enum dev_dma_attr attr;
+
+	if (dev_is_pci(dev)) {
+		bridge = pci_get_host_bridge_device(to_pci_dev(dev));
+		dma_dev = bridge;
+		if (IS_ENABLED(CONFIG_OF) && dma_dev->parent &&
+		    dma_dev->parent->of_node)
+			dma_dev = dma_dev->parent;
+	}
+
+	if (dma_dev->of_node) {
+		of_dma_configure(dev, dma_dev->of_node);
+	} else if (has_acpi_companion(dma_dev)) {
+		attr = acpi_get_dma_attr(to_acpi_device_node(dma_dev->fwnode));
+		if (attr != DEV_DMA_NOT_SUPPORTED)
+			acpi_dma_configure(dev, attr);
+	}
+
+	if (bridge)
+		pci_put_host_bridge_device(bridge);
+
+	return 0;
+}
+
+void dma_deconfigure(struct device *dev)
+{
+	of_dma_deconfigure(dev);
+	acpi_dma_deconfigure(dev);
+}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 5344db50aa65..2aa4ebbde9cd 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -22,6 +22,7 @@
 #include <linux/slab.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/of_iommu.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
@@ -186,11 +187,9 @@ static struct platform_device *of_platform_device_create_pdata(
 
 	dev->dev.bus = &platform_bus_type;
 	dev->dev.platform_data = platform_data;
-	of_dma_configure(&dev->dev, dev->dev.of_node);
 	of_msi_configure(&dev->dev, dev->dev.of_node);
 
 	if (of_device_add(dev) != 0) {
-		of_dma_deconfigure(&dev->dev);
 		platform_device_put(dev);
 		goto err_clear_flag;
 	}
@@ -248,7 +247,6 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
 		dev_set_name(&dev->dev, "%s", bus_id);
 	else
 		of_device_make_bus_id(&dev->dev);
-	of_dma_configure(&dev->dev, dev->dev.of_node);
 
 	/* Allow the HW Peripheral ID to be overridden */
 	prop = of_get_property(node, "arm,primecell-periphid", NULL);
@@ -542,7 +540,6 @@ static int of_platform_device_destroy(struct device *dev, void *data)
 		amba_device_unregister(to_amba_device(dev));
 #endif
 
-	of_dma_deconfigure(dev);
 	of_node_clear_flag(dev->of_node, OF_POPULATED);
 	of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
 	return 0;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dfc9a2794141..5a8dd43db336 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1893,33 +1893,6 @@ static void pci_set_msi_domain(struct pci_dev *dev)
 	dev_set_msi_domain(&dev->dev, d);
 }
 
-/**
- * pci_dma_configure - Setup DMA configuration
- * @dev: ptr to pci_dev struct of the PCI device
- *
- * Function to update PCI devices's DMA configuration using the same
- * info from the OF node or ACPI node of host bridge's parent (if any).
- */
-static void pci_dma_configure(struct pci_dev *dev)
-{
-	struct device *bridge = pci_get_host_bridge_device(dev);
-
-	if (IS_ENABLED(CONFIG_OF) &&
-		bridge->parent && bridge->parent->of_node) {
-			of_dma_configure(&dev->dev, bridge->parent->of_node);
-	} else if (has_acpi_companion(bridge)) {
-		struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
-		enum dev_dma_attr attr = acpi_get_dma_attr(adev);
-
-		if (attr == DEV_DMA_NOT_SUPPORTED)
-			dev_warn(&dev->dev, "DMA not supported.\n");
-		else
-			acpi_dma_configure(&dev->dev, attr);
-	}
-
-	pci_put_host_bridge_device(bridge);
-}
-
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 {
 	int ret;
@@ -1933,7 +1906,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	dev->dev.dma_mask = &dev->dma_mask;
 	dev->dev.dma_parms = &dev->dma_parms;
 	dev->dev.coherent_dma_mask = 0xffffffffull;
-	pci_dma_configure(dev);
 
 	pci_set_dma_max_seg_size(dev, 65536);
 	pci_set_dma_seg_boundary(dev, 0xffffffff);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 0977317c6835..4f3eecedca2d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -728,6 +728,18 @@ dma_mark_declared_memory_occupied(struct device *dev,
 }
 #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
 
+#ifdef CONFIG_HAS_DMA
+int dma_configure(struct device *dev);
+void dma_deconfigure(struct device *dev);
+#else
+static inline int dma_configure(struct device *dev)
+{
+	return 0;
+}
+
+static inline void dma_deconfigure(struct device *dev) {}
+#endif
+
 /*
  * Managed DMA API
  */
-- 
cgit v1.2.3


From 7b07cbefb68d486febf47e13b570fed53d9296b4 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Mon, 10 Apr 2017 16:51:02 +0530
Subject: iommu: of: Handle IOMMU lookup failure with deferred probing or error

Failures to look up an IOMMU when parsing the DT iommus property need to
be handled separately from the .of_xlate() failures to support deferred
probing.

The lack of a registered IOMMU can be caused by the lack of a driver for
the IOMMU, the IOMMU device probe not having been performed yet, having
been deferred, or having failed.

The first case occurs when the device tree describes the bus master and
IOMMU topology correctly but no device driver exists for the IOMMU yet
or the device driver has not been compiled in. Return NULL, the caller
will configure the device without an IOMMU.

The second and third cases are handled by deferring the probe of the bus
master device which will eventually get reprobed after the IOMMU.

The last case is currently handled by deferring the probe of the bus
master device as well. A mechanism to either configure the bus master
device without an IOMMU or to fail the bus master device probe depending
on whether the IOMMU is optional or mandatory would be a good
enhancement.

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Laurent Pichart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/base/dma-mapping.c | 5 +++--
 drivers/iommu/of_iommu.c   | 4 ++--
 drivers/of/device.c        | 9 +++++++--
 include/linux/of_device.h  | 9 ++++++---
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index 449b948c7427..82bd45ced7ff 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -353,6 +353,7 @@ int dma_configure(struct device *dev)
 {
 	struct device *bridge = NULL, *dma_dev = dev;
 	enum dev_dma_attr attr;
+	int ret = 0;
 
 	if (dev_is_pci(dev)) {
 		bridge = pci_get_host_bridge_device(to_pci_dev(dev));
@@ -363,7 +364,7 @@ int dma_configure(struct device *dev)
 	}
 
 	if (dma_dev->of_node) {
-		of_dma_configure(dev, dma_dev->of_node);
+		ret = of_dma_configure(dev, dma_dev->of_node);
 	} else if (has_acpi_companion(dma_dev)) {
 		attr = acpi_get_dma_attr(to_acpi_device_node(dma_dev->fwnode));
 		if (attr != DEV_DMA_NOT_SUPPORTED)
@@ -373,7 +374,7 @@ int dma_configure(struct device *dev)
 	if (bridge)
 		pci_put_host_bridge_device(bridge);
 
-	return 0;
+	return ret;
 }
 
 void dma_deconfigure(struct device *dev)
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index c8be889f5206..9f44ee8ea1bc 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -236,7 +236,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 			ops = ERR_PTR(err);
 	}
 
-	return IS_ERR(ops) ? NULL : ops;
+	return ops;
 }
 
 static int __init of_iommu_init(void)
@@ -247,7 +247,7 @@ static int __init of_iommu_init(void)
 	for_each_matching_node_and_match(np, matches, &match) {
 		const of_iommu_init_fn init_fn = match->data;
 
-		if (init_fn(np))
+		if (init_fn && init_fn(np))
 			pr_err("Failed to initialise IOMMU %s\n",
 				of_node_full_name(np));
 	}
diff --git a/drivers/of/device.c b/drivers/of/device.c
index e1ae9e7104a1..8bd3d8c09435 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -82,7 +82,7 @@ int of_device_add(struct platform_device *ofdev)
  * can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events
  * to fix up DMA configuration.
  */
-void of_dma_configure(struct device *dev, struct device_node *np)
+int of_dma_configure(struct device *dev, struct device_node *np)
 {
 	u64 dma_addr, paddr, size;
 	int ret;
@@ -123,7 +123,7 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 
 		if (!size) {
 			dev_err(dev, "Adjusted size 0x%llx invalid\n", size);
-			return;
+			return -EINVAL;
 		}
 		dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", offset);
 	}
@@ -144,10 +144,15 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 		coherent ? " " : " not ");
 
 	iommu = of_iommu_configure(dev, np);
+	if (IS_ERR(iommu))
+		return PTR_ERR(iommu);
+
 	dev_dbg(dev, "device is%sbehind an iommu\n",
 		iommu ? " " : " not ");
 
 	arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(of_dma_configure);
 
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index af984551cc2b..2cacdd81062e 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -55,7 +55,7 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
 	return of_node_get(cpu_dev->of_node);
 }
 
-void of_dma_configure(struct device *dev, struct device_node *np);
+int of_dma_configure(struct device *dev, struct device_node *np);
 void of_dma_deconfigure(struct device *dev);
 #else /* CONFIG_OF */
 
@@ -104,8 +104,11 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
 {
 	return NULL;
 }
-static inline void of_dma_configure(struct device *dev, struct device_node *np)
-{}
+
+static inline int of_dma_configure(struct device *dev, struct device_node *np)
+{
+	return 0;
+}
 static inline void of_dma_deconfigure(struct device *dev)
 {}
 #endif /* CONFIG_OF */
-- 
cgit v1.2.3


From 5a1bb638d5677053c7addcb228b56da6fccb5d68 Mon Sep 17 00:00:00 2001
From: Sricharan R <sricharan@codeaurora.org>
Date: Mon, 10 Apr 2017 16:51:03 +0530
Subject: drivers: acpi: Handle IOMMU lookup failure with deferred probing or
 error

This is an equivalent to the DT's handling of the iommu master's probe
with deferred probing when the corrsponding iommu is not probed yet.
The lack of a registered IOMMU can be caused by the lack of a driver for
the IOMMU, the IOMMU device probe not having been performed yet, having
been deferred, or having failed.

The first case occurs when the firmware describes the bus master and
IOMMU topology correctly but no device driver exists for the IOMMU yet
or the device driver has not been compiled in. Return NULL, the caller
will configure the device without an IOMMU.

The second and third cases are handled by deferring the probe of the bus
master device which will eventually get reprobed after the IOMMU.

The last case is currently handled by deferring the probe of the bus
master device as well. A mechanism to either configure the bus master
device without an IOMMU or to fail the bus master device probe depending
on whether the IOMMU is optional or mandatory would be a good
enhancement.

Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
[Lorenzo: Added fixes for dma_coherent_mask overflow, acpi_dma_configure
          called multiple times for same device]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/acpi/arm64/iort.c  | 33 ++++++++++++++++++++++++++++++++-
 drivers/acpi/scan.c        | 11 ++++++++---
 drivers/base/dma-mapping.c |  2 +-
 include/acpi/acpi_bus.h    |  2 +-
 include/linux/acpi.h       |  7 +++++--
 5 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 3dd9ec372dae..e323ece0314d 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -543,6 +543,14 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
 	const struct iommu_ops *ops = NULL;
 	int ret = -ENODEV;
 	struct fwnode_handle *iort_fwnode;
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+	/*
+	 * If we already translated the fwspec there
+	 * is nothing left to do, return the iommu_ops.
+	 */
+	if (fwspec && fwspec->ops)
+		return fwspec->ops;
 
 	if (node) {
 		iort_fwnode = iort_get_fwnode(node);
@@ -550,8 +558,17 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
 			return NULL;
 
 		ops = iommu_ops_from_fwnode(iort_fwnode);
+		/*
+		 * If the ops look-up fails, this means that either
+		 * the SMMU drivers have not been probed yet or that
+		 * the SMMU drivers are not built in the kernel;
+		 * Depending on whether the SMMU drivers are built-in
+		 * in the kernel or not, defer the IOMMU configuration
+		 * or just abort it.
+		 */
 		if (!ops)
-			return NULL;
+			return iort_iommu_driver_enabled(node->type) ?
+			       ERR_PTR(-EPROBE_DEFER) : NULL;
 
 		ret = arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops);
 	}
@@ -625,12 +642,26 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 
 		while (parent) {
 			ops = iort_iommu_xlate(dev, parent, streamid);
+			if (IS_ERR_OR_NULL(ops))
+				return ops;
 
 			parent = iort_node_get_id(node, &streamid,
 						  IORT_IOMMU_TYPE, i++);
 		}
 	}
 
+	/*
+	 * If we have reason to believe the IOMMU driver missed the initial
+	 * add_device callback for dev, replay it to get things in order.
+	 */
+	if (!IS_ERR_OR_NULL(ops) && ops->add_device &&
+	    dev->bus && !dev->iommu_group) {
+		int err = ops->add_device(dev);
+
+		if (err)
+			ops = ERR_PTR(err);
+	}
+
 	return ops;
 }
 
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 192691880d55..2a513cce332e 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1373,20 +1373,25 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
  * @dev: The pointer to the device
  * @attr: device dma attributes
  */
-void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
+int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
 {
 	const struct iommu_ops *iommu;
+	u64 size;
 
 	iort_set_dma_mask(dev);
 
 	iommu = iort_iommu_configure(dev);
+	if (IS_ERR(iommu))
+		return PTR_ERR(iommu);
 
+	size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
 	/*
 	 * Assume dma valid range starts at 0 and covers the whole
 	 * coherent_dma_mask.
 	 */
-	arch_setup_dma_ops(dev, 0, dev->coherent_dma_mask + 1, iommu,
-			   attr == DEV_DMA_COHERENT);
+	arch_setup_dma_ops(dev, 0, size, iommu, attr == DEV_DMA_COHERENT);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(acpi_dma_configure);
 
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index 82bd45ced7ff..755a2b5354c5 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -368,7 +368,7 @@ int dma_configure(struct device *dev)
 	} else if (has_acpi_companion(dma_dev)) {
 		attr = acpi_get_dma_attr(to_acpi_device_node(dma_dev->fwnode));
 		if (attr != DEV_DMA_NOT_SUPPORTED)
-			acpi_dma_configure(dev, attr);
+			ret = acpi_dma_configure(dev, attr);
 	}
 
 	if (bridge)
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index ef0ae8aaa567..2a9a5de0fb00 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -575,7 +575,7 @@ struct acpi_pci_root {
 
 bool acpi_dma_supported(struct acpi_device *adev);
 enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
-void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr);
+int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr);
 void acpi_dma_deconfigure(struct device *dev);
 
 struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 9b05886f9773..79d06ef654c9 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -762,8 +762,11 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
 	return DEV_DMA_NOT_SUPPORTED;
 }
 
-static inline void acpi_dma_configure(struct device *dev,
-				      enum dev_dma_attr attr) { }
+static inline int acpi_dma_configure(struct device *dev,
+				     enum dev_dma_attr attr)
+{
+	return 0;
+}
 
 static inline void acpi_dma_deconfigure(struct device *dev) { }
 
-- 
cgit v1.2.3


From b913efe78a7ce1b2e64af7e5dc3a03748b997c61 Mon Sep 17 00:00:00 2001
From: Sricharan R <sricharan@codeaurora.org>
Date: Mon, 10 Apr 2017 16:51:04 +0530
Subject: arm64: dma-mapping: Remove the notifier trick to handle early setting
 of dma_ops

With arch_setup_dma_ops now being called late during device's probe after
the device's iommu is probed, the notifier trick required to handle the
early setup of dma_ops before the iommu group gets created is not
required. So removing the notifier's here.

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Sricharan R <sricharan@codeaurora.org>
[rm: clean up even more]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/arm64/mm/dma-mapping.c | 142 ++++++--------------------------------------
 1 file changed, 18 insertions(+), 124 deletions(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 81cdb2e844ed..b46575954032 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -813,34 +813,26 @@ static const struct dma_map_ops iommu_dma_ops = {
 	.mapping_error = iommu_dma_mapping_error,
 };
 
-/*
- * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
- * everything it needs to - the device is only partially created and the
- * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we
- * need this delayed attachment dance. Once IOMMU probe ordering is sorted
- * to move the arch_setup_dma_ops() call later, all the notifier bits below
- * become unnecessary, and will go away.
- */
-struct iommu_dma_notifier_data {
-	struct list_head list;
-	struct device *dev;
-	const struct iommu_ops *ops;
-	u64 dma_base;
-	u64 size;
-};
-static LIST_HEAD(iommu_dma_masters);
-static DEFINE_MUTEX(iommu_dma_notifier_lock);
+static int __init __iommu_dma_init(void)
+{
+	return iommu_dma_init();
+}
+arch_initcall(__iommu_dma_init);
 
-static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
-			   u64 dma_base, u64 size)
+static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+				  const struct iommu_ops *ops)
 {
-	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct iommu_domain *domain;
+
+	if (!ops)
+		return;
 
 	/*
-	 * If the IOMMU driver has the DMA domain support that we require,
-	 * then the IOMMU core will have already configured a group for this
-	 * device, and allocated the default domain for that group.
+	 * The IOMMU core code allocates the default DMA domain, which the
+	 * underlying IOMMU driver needs to support via the dma-iommu layer.
 	 */
+	domain = iommu_get_domain_for_dev(dev);
+
 	if (!domain)
 		goto out_err;
 
@@ -851,109 +843,11 @@ static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
 		dev->dma_ops = &iommu_dma_ops;
 	}
 
-	return true;
+	return;
+
 out_err:
-	pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
+	 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
 		 dev_name(dev));
-	return false;
-}
-
-static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
-			      u64 dma_base, u64 size)
-{
-	struct iommu_dma_notifier_data *iommudata;
-
-	iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL);
-	if (!iommudata)
-		return;
-
-	iommudata->dev = dev;
-	iommudata->ops = ops;
-	iommudata->dma_base = dma_base;
-	iommudata->size = size;
-
-	mutex_lock(&iommu_dma_notifier_lock);
-	list_add(&iommudata->list, &iommu_dma_masters);
-	mutex_unlock(&iommu_dma_notifier_lock);
-}
-
-static int __iommu_attach_notifier(struct notifier_block *nb,
-				   unsigned long action, void *data)
-{
-	struct iommu_dma_notifier_data *master, *tmp;
-
-	if (action != BUS_NOTIFY_BIND_DRIVER)
-		return 0;
-
-	mutex_lock(&iommu_dma_notifier_lock);
-	list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) {
-		if (data == master->dev && do_iommu_attach(master->dev,
-				master->ops, master->dma_base, master->size)) {
-			list_del(&master->list);
-			kfree(master);
-			break;
-		}
-	}
-	mutex_unlock(&iommu_dma_notifier_lock);
-	return 0;
-}
-
-static int __init register_iommu_dma_ops_notifier(struct bus_type *bus)
-{
-	struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL);
-	int ret;
-
-	if (!nb)
-		return -ENOMEM;
-
-	nb->notifier_call = __iommu_attach_notifier;
-
-	ret = bus_register_notifier(bus, nb);
-	if (ret) {
-		pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n",
-			bus->name);
-		kfree(nb);
-	}
-	return ret;
-}
-
-static int __init __iommu_dma_init(void)
-{
-	int ret;
-
-	ret = iommu_dma_init();
-	if (!ret)
-		ret = register_iommu_dma_ops_notifier(&platform_bus_type);
-	if (!ret)
-		ret = register_iommu_dma_ops_notifier(&amba_bustype);
-#ifdef CONFIG_PCI
-	if (!ret)
-		ret = register_iommu_dma_ops_notifier(&pci_bus_type);
-#endif
-	return ret;
-}
-arch_initcall(__iommu_dma_init);
-
-static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-				  const struct iommu_ops *ops)
-{
-	struct iommu_group *group;
-
-	if (!ops)
-		return;
-	/*
-	 * TODO: As a concession to the future, we're ready to handle being
-	 * called both early and late (i.e. after bus_add_device). Once all
-	 * the platform bus code is reworked to call us late and the notifier
-	 * junk above goes away, move the body of do_iommu_attach here.
-	 */
-	group = iommu_group_get(dev);
-	if (group) {
-		do_iommu_attach(dev, ops, dma_base, size);
-		iommu_group_put(group);
-	} else {
-		queue_iommu_attach(dev, ops, dma_base, size);
-	}
 }
 
 void arch_teardown_dma_ops(struct device *dev)
-- 
cgit v1.2.3


From f6810c15cf973fc640ac8029951ff59f547b8a5e Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 10 Apr 2017 16:51:05 +0530
Subject: iommu/arm-smmu: Clean up early-probing workarounds

Now that the appropriate ordering is enforced via probe-deferral of
masters in core code, rip it all out and bask in the simplicity.

Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
[Sricharan: Rebased on top of ACPI IORT SMMU series]
Signed-off-by: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm-smmu-v3.c |  46 +-----------------
 drivers/iommu/arm-smmu.c    | 110 +++++++++++++++++++-------------------------
 2 files changed, 49 insertions(+), 107 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index bbd46efbe075..56401e6d2e68 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2763,51 +2763,9 @@ static struct platform_driver arm_smmu_driver = {
 	.probe	= arm_smmu_device_probe,
 	.remove	= arm_smmu_device_remove,
 };
+module_platform_driver(arm_smmu_driver);
 
-static int __init arm_smmu_init(void)
-{
-	static bool registered;
-	int ret = 0;
-
-	if (!registered) {
-		ret = platform_driver_register(&arm_smmu_driver);
-		registered = !ret;
-	}
-	return ret;
-}
-
-static void __exit arm_smmu_exit(void)
-{
-	return platform_driver_unregister(&arm_smmu_driver);
-}
-
-subsys_initcall(arm_smmu_init);
-module_exit(arm_smmu_exit);
-
-static int __init arm_smmu_of_init(struct device_node *np)
-{
-	int ret = arm_smmu_init();
-
-	if (ret)
-		return ret;
-
-	if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root))
-		return -ENODEV;
-
-	return 0;
-}
-IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", arm_smmu_of_init);
-
-#ifdef CONFIG_ACPI
-static int __init acpi_smmu_v3_init(struct acpi_table_header *table)
-{
-	if (iort_node_match(ACPI_IORT_NODE_SMMU_V3))
-		return arm_smmu_init();
-
-	return 0;
-}
-IORT_ACPI_DECLARE(arm_smmu_v3, ACPI_SIG_IORT, acpi_smmu_v3_init);
-#endif
+IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", NULL);
 
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 9b33700b7c69..7f522504a876 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -2077,6 +2077,23 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
 	return 0;
 }
 
+static void arm_smmu_bus_init(void)
+{
+	/* Oh, for a proper bus abstraction */
+	if (!iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
+#ifdef CONFIG_ARM_AMBA
+	if (!iommu_present(&amba_bustype))
+		bus_set_iommu(&amba_bustype, &arm_smmu_ops);
+#endif
+#ifdef CONFIG_PCI
+	if (!iommu_present(&pci_bus_type)) {
+		pci_request_acs();
+		bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+	}
+#endif
+}
+
 static int arm_smmu_device_probe(struct platform_device *pdev)
 {
 	struct resource *res;
@@ -2182,21 +2199,30 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	arm_smmu_device_reset(smmu);
 	arm_smmu_test_smr_masks(smmu);
 
-	/* Oh, for a proper bus abstraction */
-	if (!iommu_present(&platform_bus_type))
-		bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
-#ifdef CONFIG_ARM_AMBA
-	if (!iommu_present(&amba_bustype))
-		bus_set_iommu(&amba_bustype, &arm_smmu_ops);
-#endif
-#ifdef CONFIG_PCI
-	if (!iommu_present(&pci_bus_type)) {
-		pci_request_acs();
-		bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
-	}
-#endif
+	/*
+	 * For ACPI and generic DT bindings, an SMMU will be probed before
+	 * any device which might need it, so we want the bus ops in place
+	 * ready to handle default domain setup as soon as any SMMU exists.
+	 */
+	if (!using_legacy_binding)
+		arm_smmu_bus_init();
+
+	return 0;
+}
+
+/*
+ * With the legacy DT binding in play, though, we have no guarantees about
+ * probe order, but then we're also not doing default domains, so we can
+ * delay setting bus ops until we're sure every possible SMMU is ready,
+ * and that way ensure that no add_device() calls get missed.
+ */
+static int arm_smmu_legacy_bus_init(void)
+{
+	if (using_legacy_binding)
+		arm_smmu_bus_init();
 	return 0;
 }
+device_initcall_sync(arm_smmu_legacy_bus_init);
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
 {
@@ -2221,56 +2247,14 @@ static struct platform_driver arm_smmu_driver = {
 	.probe	= arm_smmu_device_probe,
 	.remove	= arm_smmu_device_remove,
 };
-
-static int __init arm_smmu_init(void)
-{
-	static bool registered;
-	int ret = 0;
-
-	if (!registered) {
-		ret = platform_driver_register(&arm_smmu_driver);
-		registered = !ret;
-	}
-	return ret;
-}
-
-static void __exit arm_smmu_exit(void)
-{
-	return platform_driver_unregister(&arm_smmu_driver);
-}
-
-subsys_initcall(arm_smmu_init);
-module_exit(arm_smmu_exit);
-
-static int __init arm_smmu_of_init(struct device_node *np)
-{
-	int ret = arm_smmu_init();
-
-	if (ret)
-		return ret;
-
-	if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root))
-		return -ENODEV;
-
-	return 0;
-}
-IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", arm_smmu_of_init);
-IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", arm_smmu_of_init);
-IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", arm_smmu_of_init);
-IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", arm_smmu_of_init);
-IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", arm_smmu_of_init);
-IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", arm_smmu_of_init);
-
-#ifdef CONFIG_ACPI
-static int __init arm_smmu_acpi_init(struct acpi_table_header *table)
-{
-	if (iort_node_match(ACPI_IORT_NODE_SMMU))
-		return arm_smmu_init();
-
-	return 0;
-}
-IORT_ACPI_DECLARE(arm_smmu, ACPI_SIG_IORT, arm_smmu_acpi_init);
-#endif
+module_platform_driver(arm_smmu_driver);
+
+IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL);
+IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL);
+IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL);
+IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL);
+IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL);
+IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL);
 
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
-- 
cgit v1.2.3


From 316ca8804ea84a782d5ba2163711ebb22116ff5a Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 10 Apr 2017 16:51:06 +0530
Subject: ACPI/IORT: Remove linker section for IORT entries probing

The IORT linker section introduced by commit 34ceea275f62
("ACPI/IORT: Introduce linker section for IORT entries probing")
was needed to make sure SMMU drivers are registered (and therefore
probed) in the kernel before devices using the SMMU have a chance
to probe in turn.

Through the introduction of deferred IOMMU configuration the linker
section based IORT probing infrastructure is not needed any longer, in
that device/SMMU probe dependencies are managed through the probe
deferral mechanism, making the IORT linker section infrastructure
unused, so that it can be removed.

Remove the unused IORT linker section probing infrastructure
from the kernel to complete the ACPI IORT IOMMU configure probe
deferral mechanism implementation.

Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/acpi/arm64/iort.c         | 2 --
 include/asm-generic/vmlinux.lds.h | 1 -
 include/linux/acpi_iort.h         | 3 ---
 3 files changed, 6 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index e323ece0314d..e7b1940ff13b 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1000,6 +1000,4 @@ void __init acpi_iort_init(void)
 	}
 
 	iort_init_platform_devices();
-
-	acpi_probe_device_table(iort);
 }
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 0968d13b3885..9faa26c41c14 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -566,7 +566,6 @@
 	IRQCHIP_OF_MATCH_TABLE()					\
 	ACPI_PROBE_TABLE(irqchip)					\
 	ACPI_PROBE_TABLE(clksrc)					\
-	ACPI_PROBE_TABLE(iort)						\
 	EARLYCON_TABLE()
 
 #define INIT_TEXT							\
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 77e08099e554..f167e1d045ff 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -52,7 +52,4 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 { return NULL; }
 #endif
 
-#define IORT_ACPI_DECLARE(name, table_id, fn)		\
-	ACPI_DECLARE_PROBE_ENTRY(iort, name, table_id, 0, NULL, 0, fn)
-
 #endif /* __ACPI_IORT_H__ */
-- 
cgit v1.2.3


From abaa7e5b054aae567861628b74dbc7fbf8ed79e8 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 12 Apr 2017 00:21:26 -0500
Subject: iommu/omap: Register driver before setting IOMMU ops

Move the registration of the OMAP IOMMU platform driver before
setting the IOMMU callbacks on the platform bus. This causes
the IOMMU devices to be probed first before the .add_device()
callback is invoked for all registered devices, and allows
the iommu_group support to be added to the OMAP IOMMU driver.

While at this, also check for the return status from bus_set_iommu.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index e2583cce2cc1..54556713c8d1 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1299,6 +1299,7 @@ static int __init omap_iommu_init(void)
 	const unsigned long flags = SLAB_HWCACHE_ALIGN;
 	size_t align = 1 << 10; /* L2 pagetable alignement */
 	struct device_node *np;
+	int ret;
 
 	np = of_find_matching_node(NULL, omap_iommu_of_match);
 	if (!np)
@@ -1312,11 +1313,25 @@ static int __init omap_iommu_init(void)
 		return -ENOMEM;
 	iopte_cachep = p;
 
-	bus_set_iommu(&platform_bus_type, &omap_iommu_ops);
-
 	omap_iommu_debugfs_init();
 
-	return platform_driver_register(&omap_iommu_driver);
+	ret = platform_driver_register(&omap_iommu_driver);
+	if (ret) {
+		pr_err("%s: failed to register driver\n", __func__);
+		goto fail_driver;
+	}
+
+	ret = bus_set_iommu(&platform_bus_type, &omap_iommu_ops);
+	if (ret)
+		goto fail_bus;
+
+	return 0;
+
+fail_bus:
+	platform_driver_unregister(&omap_iommu_driver);
+fail_driver:
+	kmem_cache_destroy(iopte_cachep);
+	return ret;
 }
 subsys_initcall(omap_iommu_init);
 /* must be ready before omap3isp is probed */
-- 
cgit v1.2.3


From 49a57ef7f8492ef985ee1ecdb927ca78a6b2f308 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 12 Apr 2017 00:21:27 -0500
Subject: iommu/omap: Drop legacy-style device support

All the supported boards that have OMAP IOMMU devices do support
DT boot only now. So, drop the support for the non-DT legacy-style
devices from the OMAP IOMMU driver. Couple of the fields from the
iommu platform data would no longer be required, so they have also
been cleaned up. The IOMMU platform data is still needed though for
performing reset management properly in a multi-arch environment.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c               | 30 ++++++++++++++----------------
 include/linux/platform_data/iommu-omap.h |  3 ---
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 54556713c8d1..febd4fbe3445 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -928,28 +928,26 @@ static int omap_iommu_probe(struct platform_device *pdev)
 	int irq;
 	struct omap_iommu *obj;
 	struct resource *res;
-	struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct device_node *of = pdev->dev.of_node;
 
+	if (!of) {
+		pr_err("%s: only DT-based devices are supported\n", __func__);
+		return -ENODEV;
+	}
+
 	obj = devm_kzalloc(&pdev->dev, sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
 	if (!obj)
 		return -ENOMEM;
 
-	if (of) {
-		obj->name = dev_name(&pdev->dev);
-		obj->nr_tlb_entries = 32;
-		err = of_property_read_u32(of, "ti,#tlb-entries",
-					   &obj->nr_tlb_entries);
-		if (err && err != -EINVAL)
-			return err;
-		if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8)
-			return -EINVAL;
-		if (of_find_property(of, "ti,iommu-bus-err-back", NULL))
-			obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN;
-	} else {
-		obj->nr_tlb_entries = pdata->nr_tlb_entries;
-		obj->name = pdata->name;
-	}
+	obj->name = dev_name(&pdev->dev);
+	obj->nr_tlb_entries = 32;
+	err = of_property_read_u32(of, "ti,#tlb-entries", &obj->nr_tlb_entries);
+	if (err && err != -EINVAL)
+		return err;
+	if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8)
+		return -EINVAL;
+	if (of_find_property(of, "ti,iommu-bus-err-back", NULL))
+		obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN;
 
 	obj->dev = &pdev->dev;
 	obj->ctx = (void *)obj + sizeof(*obj);
diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index 0496d171700a..a40fc0f4f9de 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -30,10 +30,7 @@ struct omap_iommu_arch_data {
 };
 
 struct iommu_platform_data {
-	const char *name;
 	const char *reset_name;
-	int nr_tlb_entries;
-
 	int (*assert_reset)(struct platform_device *pdev, const char *name);
 	int (*deassert_reset)(struct platform_device *pdev, const char *name);
 };
-- 
cgit v1.2.3


From e73b7afe4e8ca5ec4304a9e1d5009755a85fff91 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 12 Apr 2017 00:21:28 -0500
Subject: iommu/omap: Move data structures to omap-iommu.h

The internal data-structures are scattered over various
header and C files. Consolidate them in omap-iommu.h.

While at this, add the kerneldoc comment for the missing
iommu domain variable and revise the iommu_arch_data name.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
[s-anna@ti.com: revise kerneldoc comments]
Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c               | 16 ----------------
 drivers/iommu/omap-iommu.h               | 33 ++++++++++++++++++++++++++++++++
 include/linux/platform_data/iommu-omap.h | 17 ----------------
 3 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index febd4fbe3445..c1739a650654 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -42,22 +42,6 @@
 /* bitmap of the page sizes currently supported */
 #define OMAP_IOMMU_PGSIZES	(SZ_4K | SZ_64K | SZ_1M | SZ_16M)
 
-/**
- * struct omap_iommu_domain - omap iommu domain
- * @pgtable:	the page table
- * @iommu_dev:	an omap iommu device attached to this domain. only a single
- *		iommu device can be attached for now.
- * @dev:	Device using this domain.
- * @lock:	domain lock, should be taken when attaching/detaching
- */
-struct omap_iommu_domain {
-	u32 *pgtable;
-	struct omap_iommu *iommu_dev;
-	struct device *dev;
-	spinlock_t lock;
-	struct iommu_domain domain;
-};
-
 #define MMU_LOCK_BASE_SHIFT	10
 #define MMU_LOCK_BASE_MASK	(0x1f << MMU_LOCK_BASE_SHIFT)
 #define MMU_LOCK_BASE(x)	\
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 59628e5017b4..3c33608f48ca 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -14,6 +14,7 @@
 #define _OMAP_IOMMU_H
 
 #include <linux/bitops.h>
+#include <linux/iommu.h>
 
 #define for_each_iotlb_cr(obj, n, __i, cr)				\
 	for (__i = 0;							\
@@ -27,6 +28,23 @@ struct iotlb_entry {
 	u32 endian, elsz, mixed;
 };
 
+/**
+ * struct omap_iommu_domain - omap iommu domain
+ * @pgtable:	the page table
+ * @iommu_dev:	an omap iommu device attached to this domain. only a single
+ *		iommu device can be attached for now.
+ * @dev:	Device using this domain.
+ * @lock:	domain lock, should be taken when attaching/detaching
+ * @domain:	generic domain handle used by iommu core code
+ */
+struct omap_iommu_domain {
+	u32 *pgtable;
+	struct omap_iommu *iommu_dev;
+	struct device *dev;
+	spinlock_t lock;
+	struct iommu_domain domain;
+};
+
 struct omap_iommu {
 	const char	*name;
 	void __iomem	*regbase;
@@ -52,6 +70,21 @@ struct omap_iommu {
 	u32 id;
 };
 
+/**
+ * struct omap_iommu_arch_data - omap iommu private data
+ * @name: name of the iommu device
+ * @iommu_dev: handle of the iommu device
+ *
+ * This is an omap iommu private data object, which binds an iommu user
+ * to its iommu device. This object should be placed at the iommu user's
+ * dev_archdata so generic IOMMU API can be used without having to
+ * utilize omap-specific plumbing anymore.
+ */
+struct omap_iommu_arch_data {
+	const char *name;
+	struct omap_iommu *iommu_dev;
+};
+
 struct cr_regs {
 	u32 cam;
 	u32 ram;
diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index a40fc0f4f9de..e8b12dbf6170 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -12,23 +12,6 @@
 
 #include <linux/platform_device.h>
 
-#define MMU_REG_SIZE		256
-
-/**
- * struct iommu_arch_data - omap iommu private data
- * @name: name of the iommu device
- * @iommu_dev: handle of the iommu device
- *
- * This is an omap iommu private data object, which binds an iommu user
- * to its iommu device. This object should be placed at the iommu user's
- * dev_archdata so generic IOMMU API can be used without having to
- * utilize omap-specific plumbing anymore.
- */
-struct omap_iommu_arch_data {
-	const char *name;
-	struct omap_iommu *iommu_dev;
-};
-
 struct iommu_platform_data {
 	const char *reset_name;
 	int (*assert_reset)(struct platform_device *pdev, const char *name);
-- 
cgit v1.2.3


From ede1c2e7d4dc49fb1591e1754db9d53fabbd4b8b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 12 Apr 2017 00:21:29 -0500
Subject: iommu/omap: Store iommu_dev pointer in arch_data

Instead of finding the matching IOMMU for a device using
string comparision functions, store the pointer to the
iommu_dev in arch_data during the omap_iommu_add_device
callback and reset it during the omap_iommu_remove_device
callback functions.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
[s-anna@ti.com: few minor cleanups]
Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c | 56 +++++++++++++++++++---------------------------
 drivers/iommu/omap-iommu.h |  2 --
 2 files changed, 23 insertions(+), 35 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index c1739a650654..0553b0381e2a 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -802,33 +802,14 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
 	return IRQ_NONE;
 }
 
-static int device_match_by_alias(struct device *dev, void *data)
-{
-	struct omap_iommu *obj = to_iommu(dev);
-	const char *name = data;
-
-	pr_debug("%s: %s %s\n", __func__, obj->name, name);
-
-	return strcmp(obj->name, name) == 0;
-}
-
 /**
  * omap_iommu_attach() - attach iommu device to an iommu domain
- * @name:	name of target omap iommu device
+ * @obj:	target omap iommu device
  * @iopgd:	page table
  **/
-static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)
+static int omap_iommu_attach(struct omap_iommu *obj, u32 *iopgd)
 {
 	int err;
-	struct device *dev;
-	struct omap_iommu *obj;
-
-	dev = driver_find_device(&omap_iommu_driver.driver, NULL, (void *)name,
-				 device_match_by_alias);
-	if (!dev)
-		return ERR_PTR(-ENODEV);
-
-	obj = to_iommu(dev);
 
 	spin_lock(&obj->iommu_lock);
 
@@ -841,11 +822,13 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)
 	spin_unlock(&obj->iommu_lock);
 
 	dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
-	return obj;
+
+	return 0;
 
 err_enable:
 	spin_unlock(&obj->iommu_lock);
-	return ERR_PTR(err);
+
+	return err;
 }
 
 /**
@@ -1059,11 +1042,11 @@ static int
 omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
 	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
-	struct omap_iommu *oiommu;
 	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+	struct omap_iommu *oiommu;
 	int ret = 0;
 
-	if (!arch_data || !arch_data->name) {
+	if (!arch_data || !arch_data->iommu_dev) {
 		dev_err(dev, "device doesn't have an associated iommu\n");
 		return -EINVAL;
 	}
@@ -1077,15 +1060,16 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		goto out;
 	}
 
+	oiommu = arch_data->iommu_dev;
+
 	/* get a handle to and enable the omap iommu */
-	oiommu = omap_iommu_attach(arch_data->name, omap_domain->pgtable);
-	if (IS_ERR(oiommu)) {
-		ret = PTR_ERR(oiommu);
+	ret = omap_iommu_attach(oiommu, omap_domain->pgtable);
+	if (ret) {
 		dev_err(dev, "can't get omap iommu: %d\n", ret);
 		goto out;
 	}
 
-	omap_domain->iommu_dev = arch_data->iommu_dev = oiommu;
+	omap_domain->iommu_dev = oiommu;
 	omap_domain->dev = dev;
 	oiommu->domain = domain;
 
@@ -1098,7 +1082,6 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
 				   struct device *dev)
 {
 	struct omap_iommu *oiommu = dev_to_omap_iommu(dev);
-	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
 
 	/* only a single device is supported per domain for now */
 	if (omap_domain->iommu_dev != oiommu) {
@@ -1110,7 +1093,7 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
 
 	omap_iommu_detach(oiommu);
 
-	omap_domain->iommu_dev = arch_data->iommu_dev = NULL;
+	omap_domain->iommu_dev = NULL;
 	omap_domain->dev = NULL;
 	oiommu->domain = NULL;
 }
@@ -1214,6 +1197,7 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
 static int omap_iommu_add_device(struct device *dev)
 {
 	struct omap_iommu_arch_data *arch_data;
+	struct omap_iommu *oiommu;
 	struct device_node *np;
 	struct platform_device *pdev;
 
@@ -1236,13 +1220,19 @@ static int omap_iommu_add_device(struct device *dev)
 		return -EINVAL;
 	}
 
+	oiommu = platform_get_drvdata(pdev);
+	if (!oiommu) {
+		of_node_put(np);
+		return -EINVAL;
+	}
+
 	arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL);
 	if (!arch_data) {
 		of_node_put(np);
 		return -ENOMEM;
 	}
 
-	arch_data->name = kstrdup(dev_name(&pdev->dev), GFP_KERNEL);
+	arch_data->iommu_dev = oiommu;
 	dev->archdata.iommu = arch_data;
 
 	of_node_put(np);
@@ -1257,7 +1247,7 @@ static void omap_iommu_remove_device(struct device *dev)
 	if (!dev->of_node || !arch_data)
 		return;
 
-	kfree(arch_data->name);
+	dev->archdata.iommu = NULL;
 	kfree(arch_data);
 }
 
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 3c33608f48ca..f81184b549ec 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -72,7 +72,6 @@ struct omap_iommu {
 
 /**
  * struct omap_iommu_arch_data - omap iommu private data
- * @name: name of the iommu device
  * @iommu_dev: handle of the iommu device
  *
  * This is an omap iommu private data object, which binds an iommu user
@@ -81,7 +80,6 @@ struct omap_iommu {
  * utilize omap-specific plumbing anymore.
  */
 struct omap_iommu_arch_data {
-	const char *name;
 	struct omap_iommu *iommu_dev;
 };
 
-- 
cgit v1.2.3


From 01611fe8478bf582af0c33d5853137dd25b72f2f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 12 Apr 2017 00:21:30 -0500
Subject: iommu/omap: Make use of 'struct iommu_device'

Modify the driver to register individual iommus and
establish links between devices and iommus in sysfs.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
[s-anna@ti.com: fix some cleanup issues during failures]
Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c | 30 ++++++++++++++++++++++++++++++
 drivers/iommu/omap-iommu.h |  2 ++
 2 files changed, 32 insertions(+)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 0553b0381e2a..ef44fc740da7 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -36,6 +36,8 @@
 #include "omap-iopgtable.h"
 #include "omap-iommu.h"
 
+static const struct iommu_ops omap_iommu_ops;
+
 #define to_iommu(dev)							\
 	((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev)))
 
@@ -941,6 +943,16 @@ static int omap_iommu_probe(struct platform_device *pdev)
 		return err;
 	platform_set_drvdata(pdev, obj);
 
+	err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name);
+	if (err)
+		return err;
+
+	iommu_device_set_ops(&obj->iommu, &omap_iommu_ops);
+
+	err = iommu_device_register(&obj->iommu);
+	if (err)
+		goto out_sysfs;
+
 	pm_runtime_irq_safe(obj->dev);
 	pm_runtime_enable(obj->dev);
 
@@ -948,12 +960,19 @@ static int omap_iommu_probe(struct platform_device *pdev)
 
 	dev_info(&pdev->dev, "%s registered\n", obj->name);
 	return 0;
+
+out_sysfs:
+	iommu_device_sysfs_remove(&obj->iommu);
+	return err;
 }
 
 static int omap_iommu_remove(struct platform_device *pdev)
 {
 	struct omap_iommu *obj = platform_get_drvdata(pdev);
 
+	iommu_device_sysfs_remove(&obj->iommu);
+	iommu_device_unregister(&obj->iommu);
+
 	omap_iommu_debugfs_remove(obj);
 
 	pm_runtime_disable(obj->dev);
@@ -1200,6 +1219,7 @@ static int omap_iommu_add_device(struct device *dev)
 	struct omap_iommu *oiommu;
 	struct device_node *np;
 	struct platform_device *pdev;
+	int ret;
 
 	/*
 	 * Allocate the archdata iommu structure for DT-based devices.
@@ -1232,6 +1252,13 @@ static int omap_iommu_add_device(struct device *dev)
 		return -ENOMEM;
 	}
 
+	ret = iommu_device_link(&oiommu->iommu, dev);
+	if (ret) {
+		kfree(arch_data);
+		of_node_put(np);
+		return ret;
+	}
+
 	arch_data->iommu_dev = oiommu;
 	dev->archdata.iommu = arch_data;
 
@@ -1247,8 +1274,11 @@ static void omap_iommu_remove_device(struct device *dev)
 	if (!dev->of_node || !arch_data)
 		return;
 
+	iommu_device_unlink(&arch_data->iommu_dev->iommu, dev);
+
 	dev->archdata.iommu = NULL;
 	kfree(arch_data);
+
 }
 
 static const struct iommu_ops omap_iommu_ops = {
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index f81184b549ec..758958a4712c 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -68,6 +68,8 @@ struct omap_iommu {
 
 	int has_bus_err_back;
 	u32 id;
+
+	struct iommu_device iommu;
 };
 
 /**
-- 
cgit v1.2.3


From 28ae1e3e14f32c70913e4ebf4aeff7959ed03f84 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 12 Apr 2017 00:21:31 -0500
Subject: iommu/omap: Add iommu-group support

Support for IOMMU groups will become mandatory for drivers,
so add it to the omap iommu driver.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
[s-anna@ti.com: minor error cleanups]
Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c | 39 ++++++++++++++++++++++++++++++++++++++-
 drivers/iommu/omap-iommu.h |  1 +
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index ef44fc740da7..95dfca36ccb9 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -943,9 +943,13 @@ static int omap_iommu_probe(struct platform_device *pdev)
 		return err;
 	platform_set_drvdata(pdev, obj);
 
+	obj->group = iommu_group_alloc();
+	if (IS_ERR(obj->group))
+		return PTR_ERR(obj->group);
+
 	err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name);
 	if (err)
-		return err;
+		goto out_group;
 
 	iommu_device_set_ops(&obj->iommu, &omap_iommu_ops);
 
@@ -959,10 +963,13 @@ static int omap_iommu_probe(struct platform_device *pdev)
 	omap_iommu_debugfs_add(obj);
 
 	dev_info(&pdev->dev, "%s registered\n", obj->name);
+
 	return 0;
 
 out_sysfs:
 	iommu_device_sysfs_remove(&obj->iommu);
+out_group:
+	iommu_group_put(obj->group);
 	return err;
 }
 
@@ -970,6 +977,9 @@ static int omap_iommu_remove(struct platform_device *pdev)
 {
 	struct omap_iommu *obj = platform_get_drvdata(pdev);
 
+	iommu_group_put(obj->group);
+	obj->group = NULL;
+
 	iommu_device_sysfs_remove(&obj->iommu);
 	iommu_device_unregister(&obj->iommu);
 
@@ -1217,6 +1227,7 @@ static int omap_iommu_add_device(struct device *dev)
 {
 	struct omap_iommu_arch_data *arch_data;
 	struct omap_iommu *oiommu;
+	struct iommu_group *group;
 	struct device_node *np;
 	struct platform_device *pdev;
 	int ret;
@@ -1262,6 +1273,19 @@ static int omap_iommu_add_device(struct device *dev)
 	arch_data->iommu_dev = oiommu;
 	dev->archdata.iommu = arch_data;
 
+	/*
+	 * IOMMU group initialization calls into omap_iommu_device_group, which
+	 * needs a valid dev->archdata.iommu pointer
+	 */
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group)) {
+		iommu_device_unlink(&oiommu->iommu, dev);
+		dev->archdata.iommu = NULL;
+		kfree(arch_data);
+		return PTR_ERR(group);
+	}
+	iommu_group_put(group);
+
 	of_node_put(np);
 
 	return 0;
@@ -1275,12 +1299,24 @@ static void omap_iommu_remove_device(struct device *dev)
 		return;
 
 	iommu_device_unlink(&arch_data->iommu_dev->iommu, dev);
+	iommu_group_remove_device(dev);
 
 	dev->archdata.iommu = NULL;
 	kfree(arch_data);
 
 }
 
+static struct iommu_group *omap_iommu_device_group(struct device *dev)
+{
+	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+	struct iommu_group *group = NULL;
+
+	if (arch_data->iommu_dev)
+		group = arch_data->iommu_dev->group;
+
+	return group;
+}
+
 static const struct iommu_ops omap_iommu_ops = {
 	.domain_alloc	= omap_iommu_domain_alloc,
 	.domain_free	= omap_iommu_domain_free,
@@ -1292,6 +1328,7 @@ static const struct iommu_ops omap_iommu_ops = {
 	.iova_to_phys	= omap_iommu_iova_to_phys,
 	.add_device	= omap_iommu_add_device,
 	.remove_device	= omap_iommu_remove_device,
+	.device_group	= omap_iommu_device_group,
 	.pgsize_bitmap	= OMAP_IOMMU_PGSIZES,
 };
 
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 758958a4712c..6e70515e6038 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -70,6 +70,7 @@ struct omap_iommu {
 	u32 id;
 
 	struct iommu_device iommu;
+	struct iommu_group *group;
 };
 
 /**
-- 
cgit v1.2.3


From fd8e2d4b393252505783656471465c7f85f3c0a9 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 12 Apr 2017 00:21:32 -0500
Subject: omap3isp: Remove iommu_group related code

The OMAP IOMMU driver has added the support for IOMMU groups internally,
and the ISP device is automatically linked to the appropriate IOMMU group.
So, remove the explicit function calls that creates/deletes an iommu_group
and adds the ISP device to this group.

Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/media/platform/omap3isp/isp.c | 17 -----------------
 drivers/media/platform/omap3isp/isp.h |  1 -
 2 files changed, 18 deletions(-)

diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c
index 084ecf4aa9a4..0d984a28a003 100644
--- a/drivers/media/platform/omap3isp/isp.c
+++ b/drivers/media/platform/omap3isp/isp.c
@@ -1943,30 +1943,13 @@ static void isp_detach_iommu(struct isp_device *isp)
 {
 	arm_iommu_release_mapping(isp->mapping);
 	isp->mapping = NULL;
-	iommu_group_remove_device(isp->dev);
 }
 
 static int isp_attach_iommu(struct isp_device *isp)
 {
 	struct dma_iommu_mapping *mapping;
-	struct iommu_group *group;
 	int ret;
 
-	/* Create a device group and add the device to it. */
-	group = iommu_group_alloc();
-	if (IS_ERR(group)) {
-		dev_err(isp->dev, "failed to allocate IOMMU group\n");
-		return PTR_ERR(group);
-	}
-
-	ret = iommu_group_add_device(group, isp->dev);
-	iommu_group_put(group);
-
-	if (ret < 0) {
-		dev_err(isp->dev, "failed to add device to IPMMU group\n");
-		return ret;
-	}
-
 	/*
 	 * Create the ARM mapping, used by the ARM DMA mapping core to allocate
 	 * VAs. This will allocate a corresponding IOMMU domain.
diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h
index 7e6f6638433b..2f2ae609c548 100644
--- a/drivers/media/platform/omap3isp/isp.h
+++ b/drivers/media/platform/omap3isp/isp.h
@@ -23,7 +23,6 @@
 #include <linux/clk-provider.h>
 #include <linux/device.h>
 #include <linux/io.h>
-#include <linux/iommu.h>
 #include <linux/platform_device.h>
 #include <linux/wait.h>
 
-- 
cgit v1.2.3


From 33006cdf9c038dca9b3d4c148a509de851a66ea9 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 29 Mar 2017 14:02:19 -0700
Subject: ovl: Use designated initializers

Prepare to mark sensitive kernel structures for randomization by making
sure they're using designated initializers. These were identified during
allyesconfig builds of x86, arm, and arm64, with most initializer fixes
extracted from grsecurity.

For these cases, use { }, which will be zero-filled, instead of
undesignated NULLs.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 4 ++--
 fs/overlayfs/util.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index c9e70d39c1ea..07c8793efb1d 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -709,8 +709,8 @@ static const struct xattr_handler *ovl_xattr_handlers[] = {
 
 static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 {
-	struct path upperpath = { NULL, NULL };
-	struct path workpath = { NULL, NULL };
+	struct path upperpath = { };
+	struct path workpath = { };
 	struct dentry *root_dentry;
 	struct inode *realinode;
 	struct ovl_entry *oe;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 1953986ee6bc..35b8959db5ee 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -101,7 +101,7 @@ void ovl_path_lower(struct dentry *dentry, struct path *path)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
 
-	*path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL };
+	*path = oe->numlower ? oe->lowerstack[0] : (struct path) { };
 }
 
 enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
-- 
cgit v1.2.3


From 78757af6518a35bdc22b4e7f660ff9dbbeba35d7 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 8 Apr 2017 14:49:06 +0300
Subject: vfs: ftruncate check IS_APPEND() on real upper inode

ftruncate an overlayfs inode was checking IS_APPEND() on
overlay inode, but overlay inode does not have the S_APPEND flag.

Check IS_APPEND() on real upper inode instead.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/open.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/open.c b/fs/open.c
index 949cef29c3bb..993a91d20cc7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -193,7 +193,8 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 		goto out_putf;
 
 	error = -EPERM;
-	if (IS_APPEND(inode))
+	/* Check IS_APPEND on real upper inode */
+	if (IS_APPEND(file_inode(f.file)))
 		goto out_putf;
 
 	sb_start_write(inode->i_sb);
-- 
cgit v1.2.3


From b0990fbbbd147da2096b24f2d462abe02ca251fd Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 8 Apr 2017 14:49:07 +0300
Subject: ovl: check IS_APPEND() on real upper inode

For overlay file open, check IS_APPEND() on the real upper inode
inside d_real(), because the overlay inode does not have the
S_APPEND flag and IS_APPEND() can only be checked at open time.

Note that because overlayfs does not copy up the chattr inode flags
(i.e. S_APPEND, S_IMMUTABLE), the IS_APPEND() check is only relevant
for upper inodes that were set with chattr +a and not to lower
inodes that had chattr +a before copy up.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 07c8793efb1d..6faefa54cb5e 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -49,11 +49,28 @@ static void ovl_dentry_release(struct dentry *dentry)
 	}
 }
 
+static int ovl_check_append_only(struct inode *inode, int flag)
+{
+	/*
+	 * This test was moot in vfs may_open() because overlay inode does
+	 * not have the S_APPEND flag, so re-check on real upper inode
+	 */
+	if (IS_APPEND(inode)) {
+		if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
+			return -EPERM;
+		if (flag & O_TRUNC)
+			return -EPERM;
+	}
+
+	return 0;
+}
+
 static struct dentry *ovl_d_real(struct dentry *dentry,
 				 const struct inode *inode,
 				 unsigned int open_flags)
 {
 	struct dentry *real;
+	int err;
 
 	if (!d_is_reg(dentry)) {
 		if (!inode || inode == d_inode(dentry))
@@ -65,15 +82,20 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
 		return dentry;
 
 	if (open_flags) {
-		int err = ovl_open_maybe_copy_up(dentry, open_flags);
-
+		err = ovl_open_maybe_copy_up(dentry, open_flags);
 		if (err)
 			return ERR_PTR(err);
 	}
 
 	real = ovl_dentry_upper(dentry);
-	if (real && (!inode || inode == d_inode(real)))
+	if (real && (!inode || inode == d_inode(real))) {
+		if (!inode) {
+			err = ovl_check_append_only(d_inode(real), open_flags);
+			if (err)
+				return ERR_PTR(err);
+		}
 		return real;
+	}
 
 	real = ovl_dentry_lower(dentry);
 	if (!real)
-- 
cgit v1.2.3


From 3ba8775f64484a2b56bf3c88d09a186d819fa348 Mon Sep 17 00:00:00 2001
From: "zhichang.yuan" <yuanzhichang@hisilicon.com>
Date: Tue, 18 Apr 2017 20:51:48 +0800
Subject: iommu: Make iommu_bus_notifier return NOTIFY_DONE rather than error
 code

In iommu_bus_notifier(), when action is
BUS_NOTIFY_ADD_DEVICE, it will return 'ops->add_device(dev)'
directly. But ops->add_device will return ERR_VAL, such as
-ENODEV. These value will make notifier_call_chain() not to
traverse the remain nodes in struct notifier_block list.

This patch revises iommu_bus_notifier() to return
NOTIFY_DONE when some errors happened in ops->add_device().

Signed-off-by: zhichang.yuan <yuanzhichang@hisilicon.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8ea14f41a979..9170fd498f46 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1082,8 +1082,12 @@ static int iommu_bus_notifier(struct notifier_block *nb,
 	 * result in ADD/DEL notifiers to group->notifier
 	 */
 	if (action == BUS_NOTIFY_ADD_DEVICE) {
-		if (ops->add_device)
-			return ops->add_device(dev);
+		if (ops->add_device) {
+			int ret;
+
+			ret = ops->add_device(dev);
+			return (ret) ? NOTIFY_DONE : NOTIFY_OK;
+		}
 	} else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
 		if (ops->remove_device && dev->iommu_group) {
 			ops->remove_device(dev);
-- 
cgit v1.2.3


From b044f64513843e960f4b8d8e2e042abca1b7c029 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 10 Mar 2017 17:07:46 -0500
Subject: NFS: switch back to to ->iterate()

NFS has some optimizations for readdir to choose between using READDIR or
READDIRPLUS based on workload, and which NFS operation to use is determined
by subsequent interactions with lookup, d_revalidate, and getattr.

Concurrent use of nfs_readdir() via ->iterate_shared() can cause those
optimizations to repeatedly invalidate the pagecache used to store
directory entries during readdir(), which causes some very bad performance
for directories with many entries (more than about 10000).

There's a couple ways to fix this in NFS, but no fix would be as simple as
going back to ->iterate() to serialize nfs_readdir(), and neither fix I
tested performed as well as going back to ->iterate().

The first required taking the directory's i_lock for each entry, with the
result of terrible contention.

The second way adds another flag to the nfs_inode, and so keeps the
optimizations working for large directories.  The difference from using
->iterate() here is that much more memory is consumed for a given workload
without any performance gain.

The workings of nfs_readdir() are such that concurrent users are serialized
within read_cache_page() waiting to retrieve pages of entries from the
server.  By serializing this work in iterate_dir() instead, contention for
cache pages is reduced.  Waiting processes can have an uncontended pass at
the entirety of the directory's pagecache once previous processes have
completed filling it.

v2 - Keep the bits needed for parallel lookup

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c | 37 ++++++++++++-------------------------
 1 file changed, 12 insertions(+), 25 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f92ba8d6c556..3a188cb5ebce 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -57,7 +57,7 @@ static void nfs_readdir_clear_array(struct page*);
 const struct file_operations nfs_dir_operations = {
 	.llseek		= nfs_llseek_dir,
 	.read		= generic_read_dir,
-	.iterate_shared	= nfs_readdir,
+	.iterate	= nfs_readdir,
 	.open		= nfs_opendir,
 	.release	= nfs_closedir,
 	.fsync		= nfs_fsync_dir,
@@ -145,7 +145,6 @@ struct nfs_cache_array_entry {
 };
 
 struct nfs_cache_array {
-	atomic_t refcount;
 	int size;
 	int eof_index;
 	u64 last_cookie;
@@ -201,20 +200,11 @@ void nfs_readdir_clear_array(struct page *page)
 	int i;
 
 	array = kmap_atomic(page);
-	if (atomic_dec_and_test(&array->refcount))
-		for (i = 0; i < array->size; i++)
-			kfree(array->array[i].string.name);
+	for (i = 0; i < array->size; i++)
+		kfree(array->array[i].string.name);
 	kunmap_atomic(array);
 }
 
-static bool grab_page(struct page *page)
-{
-	struct nfs_cache_array *array = kmap_atomic(page);
-	bool res = atomic_inc_not_zero(&array->refcount);
-	kunmap_atomic(array);
-	return res;
-}
-
 /*
  * the caller is responsible for freeing qstr.name
  * when called by nfs_readdir_add_to_array, the strings will be freed in
@@ -680,7 +670,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		goto out_label_free;
 	}
 	memset(array, 0, sizeof(struct nfs_cache_array));
-	atomic_set(&array->refcount, 1);
 	array->eof_index = -1;
 
 	status = nfs_readdir_alloc_pages(pages, array_size);
@@ -743,7 +732,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 static
 void cache_page_release(nfs_readdir_descriptor_t *desc)
 {
-	nfs_readdir_clear_array(desc->page);
+	if (!desc->page->mapping)
+		nfs_readdir_clear_array(desc->page);
 	put_page(desc->page);
 	desc->page = NULL;
 }
@@ -751,16 +741,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
 static
 struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 {
-	struct page *page;
-
-	for (;;) {
-		page = read_cache_page(desc->file->f_mapping,
+	return read_cache_page(desc->file->f_mapping,
 			desc->page_index, (filler_t *)nfs_readdir_filler, desc);
-		if (IS_ERR(page) || grab_page(page))
-			break;
-		put_page(page);
-	}
-	return page;
 }
 
 /*
@@ -966,11 +948,13 @@ out:
 
 static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 {
+	struct inode *inode = file_inode(filp);
 	struct nfs_open_dir_context *dir_ctx = filp->private_data;
 
 	dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
 			filp, offset, whence);
 
+	inode_lock(inode);
 	switch (whence) {
 		case 1:
 			offset += filp->f_pos;
@@ -978,13 +962,16 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 			if (offset >= 0)
 				break;
 		default:
-			return -EINVAL;
+			offset = -EINVAL;
+			goto out;
 	}
 	if (offset != filp->f_pos) {
 		filp->f_pos = offset;
 		dir_ctx->dir_cookie = 0;
 		dir_ctx->duped = 0;
 	}
+out:
+	inode_unlock(inode);
 	return offset;
 }
 
-- 
cgit v1.2.3


From a7878ca140084ec267ede48fce64d7e60f6d873e Mon Sep 17 00:00:00 2001
From: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Date: Tue, 4 Apr 2017 15:12:51 +0200
Subject: nfs: flexfilelayout: remove v3-only data server limitation

Flexfilelayout supports data servers which talk NFS v3 and v4.{0,1,2}.
However, this code path is disabled and v3 only servers are accepted.
This change removes this limitation.
Signed-off-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/flexfilelayout/flexfilelayoutdev.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 457cfeb1d5c1..8ca9cc665561 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -119,7 +119,13 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
 		if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE)
 			ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE;
 
-		if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) {
+		/*
+		 * check for valid major/minor combination.
+		 * currently we support dataserver which talk:
+		 *   v3, v4.0, v4.1, v4.2
+		 */
+		if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) ||
+			(ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) {
 			dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__,
 				i, ds_versions[i].version,
 				ds_versions[i].minor_version);
-- 
cgit v1.2.3


From 1a916ce0497847ce4a1b21c63383a3653c613055 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:14:55 -0400
Subject: NFS: Clean up do_callback_layoutrecall()

Removing the dprintk()s lets us simplify the function by removing the
else condition entirely and returning the status of
initiate_{file,bulk}_draining() directly.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_proc.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f073a6d2c6a5..c52408055721 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -317,16 +317,9 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
 static u32 do_callback_layoutrecall(struct nfs_client *clp,
 				    struct cb_layoutrecallargs *args)
 {
-	u32 res;
-
-	dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
 	if (args->cbl_recall_type == RETURN_FILE)
-		res = initiate_file_draining(clp, args);
-	else
-		res = initiate_bulk_draining(clp, args);
-	dprintk("%s returning %i\n", __func__, res);
-	return res;
-
+		return initiate_file_draining(clp, args);
+	return initiate_bulk_draining(clp, args);
 }
 
 __be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
-- 
cgit v1.2.3


From 5694a4f848be23e3df758fc2317880c7f9f823d4 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:14:56 -0400
Subject: NFS: Clean up nfs4_callback_layoutrecall()

In addition to removing the dprintk(), this patch also initializes "res"
to the default return value instead of doing this through an else
condition.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_proc.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c52408055721..e61e4ccc181a 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -325,16 +325,10 @@ static u32 do_callback_layoutrecall(struct nfs_client *clp,
 __be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
 				  void *dummy, struct cb_process_state *cps)
 {
-	u32 res;
-
-	dprintk("%s: -->\n", __func__);
+	u32 res = NFS4ERR_OP_NOT_IN_SESSION;
 
 	if (cps->clp)
 		res = do_callback_layoutrecall(cps->clp, args);
-	else
-		res = NFS4ERR_OP_NOT_IN_SESSION;
-
-	dprintk("%s: exit with status = %d\n", __func__, res);
 	return cpu_to_be32(res);
 }
 
-- 
cgit v1.2.3


From be55f1bca7cf1021a1f4417e2d5f48bdc32e0bd2 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:14:57 -0400
Subject: NFS: Remove extra dprintk()s from callback_proc.c

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_proc.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index e61e4ccc181a..e7f041447afd 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -351,8 +351,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
 	struct nfs_client *clp = cps->clp;
 	struct nfs_server *server = NULL;
 
-	dprintk("%s: -->\n", __func__);
-
 	if (!clp) {
 		res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
 		goto out;
@@ -371,8 +369,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
 					goto found;
 				}
 			rcu_read_unlock();
-			dprintk("%s: layout type %u not found\n",
-				__func__, dev->cbd_layout_type);
 			continue;
 		}
 
@@ -382,8 +378,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
 
 out:
 	kfree(args->devs);
-	dprintk("%s: exit with status = %u\n",
-		__func__, be32_to_cpu(res));
 	return res;
 }
 
@@ -404,16 +398,11 @@ static __be32
 validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
 		const struct cb_sequenceargs * args)
 {
-	dprintk("%s enter. slotid %u seqid %u, slot table seqid: %u\n",
-		__func__, args->csa_slotid, args->csa_sequenceid, slot->seq_nr);
-
 	if (args->csa_slotid > tbl->server_highest_slotid)
 		return htonl(NFS4ERR_BADSLOT);
 
 	/* Replay */
 	if (args->csa_sequenceid == slot->seq_nr) {
-		dprintk("%s seqid %u is a replay\n",
-			__func__, args->csa_sequenceid);
 		if (nfs4_test_locked_slot(tbl, slot->slot_nr))
 			return htonl(NFS4ERR_DELAY);
 		/* Signal process_op to set this error on next op */
@@ -467,15 +456,6 @@ static bool referring_call_exists(struct nfs_client *clp,
 
 		for (j = 0; j < rclist->rcl_nrefcalls; j++) {
 			ref = &rclist->rcl_refcalls[j];
-
-			dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u "
-				"slotid %u\n", __func__,
-				((u32 *)&rclist->rcl_sessionid.data)[0],
-				((u32 *)&rclist->rcl_sessionid.data)[1],
-				((u32 *)&rclist->rcl_sessionid.data)[2],
-				((u32 *)&rclist->rcl_sessionid.data)[3],
-				ref->rc_sequenceid, ref->rc_slotid);
-
 			status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid,
 					ref->rc_sequenceid, HZ >> 1) < 0;
 			if (status)
@@ -580,8 +560,6 @@ out:
 		res->csr_status = status;
 
 	trace_nfs4_cb_sequence(args, res, status);
-	dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
-		ntohl(status), ntohl(res->csr_status));
 	return status;
 }
 
-- 
cgit v1.2.3


From 56938bb77a5f2cfa8c5d99029022ed2bbc0aef4a Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:14:58 -0400
Subject: NFS: Clean up decode_getattr_args()

Removing the dprintk() lets us return the status value directly, rather
than jumping to a label if an error occurs.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_xdr.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d051fc3583a9..c782261d86d5 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -192,11 +192,8 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
 
 	status = decode_fh(xdr, &args->fh);
 	if (unlikely(status != 0))
-		goto out;
-	status = decode_bitmap(xdr, args->bitmap);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+		return status;
+	return decode_bitmap(xdr, args->bitmap);
 }
 
 static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
-- 
cgit v1.2.3


From 135a4ea0d9b8a3fe538c309e0073c17893e47298 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:14:59 -0400
Subject: NFS: Clean up decode_recall_args()

Removing the dprintk() lets us simplify the function by returning status
codes directly, rather than using a goto.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_xdr.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c782261d86d5..0319230fe1a8 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -203,17 +203,12 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 
 	status = decode_delegation_stateid(xdr, &args->stateid);
 	if (unlikely(status != 0))
-		goto out;
+		return status;
 	p = read_buf(xdr, 4);
-	if (unlikely(p == NULL)) {
-		status = htonl(NFS4ERR_RESOURCE);
-		goto out;
-	}
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
 	args->truncate = ntohl(*p);
-	status = decode_fh(xdr, &args->fh);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+	return decode_fh(xdr, &args->fh);
 }
 
 #if defined(CONFIG_NFS_V4_1)
-- 
cgit v1.2.3


From c79d56d2148d94e8158f242560d0d7253c22b5f5 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:00 -0400
Subject: NFS: Clean up decode_layoutrecall_args()

Additionally, this change lets us cut out the goto by returning errors
immediately.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_xdr.c | 38 +++++++++++---------------------------
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 0319230fe1a8..d5ddceb91a9a 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -227,10 +227,8 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
 	uint32_t iomode;
 
 	p = read_buf(xdr, 4 * sizeof(uint32_t));
-	if (unlikely(p == NULL)) {
-		status = htonl(NFS4ERR_BADXDR);
-		goto out;
-	}
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_BADXDR);
 
 	args->cbl_layout_type = ntohl(*p++);
 	/* Depite the spec's xdr, iomode really belongs in the FILE switch,
@@ -244,37 +242,23 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
 		args->cbl_range.iomode = iomode;
 		status = decode_fh(xdr, &args->cbl_fh);
 		if (unlikely(status != 0))
-			goto out;
+			return status;
 
 		p = read_buf(xdr, 2 * sizeof(uint64_t));
-		if (unlikely(p == NULL)) {
-			status = htonl(NFS4ERR_BADXDR);
-			goto out;
-		}
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_BADXDR);
 		p = xdr_decode_hyper(p, &args->cbl_range.offset);
 		p = xdr_decode_hyper(p, &args->cbl_range.length);
-		status = decode_layout_stateid(xdr, &args->cbl_stateid);
-		if (unlikely(status != 0))
-			goto out;
+		return decode_layout_stateid(xdr, &args->cbl_stateid);
 	} else if (args->cbl_recall_type == RETURN_FSID) {
 		p = read_buf(xdr, 2 * sizeof(uint64_t));
-		if (unlikely(p == NULL)) {
-			status = htonl(NFS4ERR_BADXDR);
-			goto out;
-		}
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_BADXDR);
 		p = xdr_decode_hyper(p, &args->cbl_fsid.major);
 		p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
-	} else if (args->cbl_recall_type != RETURN_ALL) {
-		status = htonl(NFS4ERR_BADXDR);
-		goto out;
-	}
-	dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n",
-		__func__,
-		args->cbl_layout_type, iomode,
-		args->cbl_layoutchanged, args->cbl_recall_type);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+	} else if (args->cbl_recall_type != RETURN_ALL)
+		return htonl(NFS4ERR_BADXDR);
+	return 0;
 }
 
 static
-- 
cgit v1.2.3


From 1796549ad48c43dbe40408de39c7afcacbd43f0b Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:01 -0400
Subject: NFS: Clean up decode_cb_sequence_args()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_xdr.c | 25 +++++--------------------
 1 file changed, 5 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d5ddceb91a9a..ce1e293b8a18 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -413,12 +413,11 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 
 	status = decode_sessionid(xdr, &args->csa_sessionid);
 	if (status)
-		goto out;
+		return status;
 
-	status = htonl(NFS4ERR_RESOURCE);
 	p = read_buf(xdr, 5 * sizeof(uint32_t));
 	if (unlikely(p == NULL))
-		goto out;
+		return htonl(NFS4ERR_RESOURCE);
 
 	args->csa_addr = svc_addr(rqstp);
 	args->csa_sequenceid = ntohl(*p++);
@@ -432,7 +431,7 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 						  sizeof(*args->csa_rclists),
 						  GFP_KERNEL);
 		if (unlikely(args->csa_rclists == NULL))
-			goto out;
+			return htonl(NFS4ERR_RESOURCE);
 
 		for (i = 0; i < args->csa_nrclists; i++) {
 			status = decode_rc_list(xdr, &args->csa_rclists[i]);
@@ -442,27 +441,13 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 			}
 		}
 	}
-	status = 0;
-
-	dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u slotid %u "
-		"highestslotid %u cachethis %d nrclists %u\n",
-		__func__,
-		((u32 *)&args->csa_sessionid)[0],
-		((u32 *)&args->csa_sessionid)[1],
-		((u32 *)&args->csa_sessionid)[2],
-		((u32 *)&args->csa_sessionid)[3],
-		args->csa_sequenceid, args->csa_slotid,
-		args->csa_highestslotid, args->csa_cachethis,
-		args->csa_nrclists);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+	return 0;
 
 out_free:
 	for (i = 0; i < args->csa_nrclists; i++)
 		kfree(args->csa_rclists[i].rcl_refcalls);
 	kfree(args->csa_rclists);
-	goto out;
+	return status;
 }
 
 static __be32 decode_recallany_args(struct svc_rqst *rqstp,
-- 
cgit v1.2.3


From 535ece2b8e4b0edf9f18be2d103d19f833fa8b7f Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:02 -0400
Subject: NFS: Clean up decode_notify_lock_args()

Let's cut out the goto and return any errors immedately

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_xdr.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ce1e293b8a18..4cf70dc59933 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -518,11 +518,8 @@ static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream
 
 	status = decode_fh(xdr, &args->cbnl_fh);
 	if (unlikely(status != 0))
-		goto out;
-	status = decode_lockowner(xdr, args);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+		return status;
+	return decode_lockowner(xdr, args);
 }
 
 #endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.3


From 3d0bfaa60dead8e47ede8d08c82cdf393214955d Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:03 -0400
Subject: NFS: Clean up encode_cb_sequence_res()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_xdr.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 4cf70dc59933..e54e697340a4 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -692,11 +692,11 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
 	__be32 status = res->csr_status;
 
 	if (unlikely(status != 0))
-		goto out;
+		return status;
 
 	status = encode_sessionid(xdr, &res->csr_sessionid);
 	if (status)
-		goto out;
+		return status;
 
 	p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t));
 	if (unlikely(p == NULL))
@@ -706,9 +706,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
 	*p++ = htonl(res->csr_slotid);
 	*p++ = htonl(res->csr_highestslotid);
 	*p++ = htonl(res->csr_target_highestslotid);
-out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	return status;
+	return 0;
 }
 
 static __be32
-- 
cgit v1.2.3


From 36718a669e0507d2590af2a3730d58402d1f03f7 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:04 -0400
Subject: NFS: Remove extra dprintk()s from callback_xdr.c

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_xdr.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index e54e697340a4..c14758e08d73 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -171,8 +171,6 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
 		return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
 	}
 	hdr->nops = ntohl(*p);
-	dprintk("%s: minorversion %d nops %d\n", __func__,
-		hdr->minorversion, hdr->nops);
 	return 0;
 }
 
@@ -665,7 +663,6 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 	status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
 	*savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
 out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	return status;
 }
 
@@ -827,14 +824,10 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp,
 	long maxlen;
 	__be32 res;
 
-	dprintk("%s: start\n", __func__);
 	status = decode_op_hdr(xdr_in, &op_nr);
 	if (unlikely(status))
 		return status;
 
-	dprintk("%s: minorversion=%d nop=%d op_nr=%u\n",
-		__func__, cps->minorversion, nop, op_nr);
-
 	switch (cps->minorversion) {
 	case 0:
 		status = preprocess_nfs4_op(op_nr, &op);
@@ -873,7 +866,6 @@ encode_hdr:
 		return res;
 	if (op->encode_res != NULL && status == 0)
 		status = op->encode_res(rqstp, xdr_out, resp);
-	dprintk("%s: done, status = %d\n", __func__, ntohl(status));
 	return status;
 }
 
@@ -893,8 +885,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	};
 	unsigned int nops = 0;
 
-	dprintk("%s: start\n", __func__);
-
 	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
 
 	p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
@@ -933,7 +923,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	*hdr_res.nops = htonl(nops);
 	nfs4_cb_free_slot(&cps);
 	nfs_put_client(cps.clp);
-	dprintk("%s: done, status = %u\n", __func__, ntohl(status));
 	return rpc_success;
 
 out_invalidcred:
-- 
cgit v1.2.3


From 2844b6aecf9ec797fde6787a59bca258d06377c3 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:05 -0400
Subject: NFS: Clean up nfs_init_client()

We always call nfs_mark_client_ready() even if nfs_create_rpc_client()
returns an error, so we can rearrange nfs_init_client() to mark the
client ready from a single place.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/client.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 390ada8741bc..675142189181 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -624,27 +624,21 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,
 {
 	int error;
 
-	if (clp->cl_cons_state == NFS_CS_READY) {
-		/* the client is already initialised */
-		dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
+	/* the client is already initialised */
+	if (clp->cl_cons_state == NFS_CS_READY)
 		return clp;
-	}
 
 	/*
 	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
 	 * - RFC 2623, sec 2.3.2
 	 */
 	error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);
-	if (error < 0)
-		goto error;
-	nfs_mark_client_ready(clp, NFS_CS_READY);
+	nfs_mark_client_ready(clp, error == 0 ? NFS_CS_READY : error);
+	if (error < 0) {
+		nfs_put_client(clp);
+		clp = ERR_PTR(error);
+	}
 	return clp;
-
-error:
-	nfs_mark_client_ready(clp, error);
-	nfs_put_client(clp);
-	dprintk("<-- nfs_init_client() = xerror %d\n", error);
-	return ERR_PTR(error);
 }
 EXPORT_SYMBOL_GPL(nfs_init_client);
 
-- 
cgit v1.2.3


From 4cbb976821a1e79dea9e0c69d2bafd078a3cd345 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:06 -0400
Subject: NFS: Clean up extra dprintk()s in client.c

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/client.c | 45 +++------------------------------------------
 1 file changed, 3 insertions(+), 42 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 675142189181..3ffbffe8f39f 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -240,8 +240,6 @@ static void pnfs_init_server(struct nfs_server *server)
  */
 void nfs_free_client(struct nfs_client *clp)
 {
-	dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
-
 	nfs_fscache_release_client_cookie(clp);
 
 	/* -EIO all pending I/O */
@@ -256,8 +254,6 @@ void nfs_free_client(struct nfs_client *clp)
 	kfree(clp->cl_hostname);
 	kfree(clp->cl_acceptor);
 	kfree(clp);
-
-	dprintk("<-- nfs_free_client()\n");
 }
 EXPORT_SYMBOL_GPL(nfs_free_client);
 
@@ -271,7 +267,6 @@ void nfs_put_client(struct nfs_client *clp)
 	if (!clp)
 		return;
 
-	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
 	nn = net_generic(clp->cl_net, nfs_net_id);
 
 	if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
@@ -382,9 +377,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,
 	}
 
 	smp_rmb();
-
-	dprintk("<-- %s found nfs_client %p for %s\n",
-		__func__, clp, cl_init->hostname ?: "");
 	return clp;
 }
 
@@ -403,9 +395,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
 		return NULL;
 	}
 
-	dprintk("--> nfs_get_client(%s,v%u)\n",
-		cl_init->hostname, rpc_ops->version);
-
 	/* see if the client already exists */
 	do {
 		spin_lock(&nn->nfs_client_lock);
@@ -430,8 +419,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
 		new = rpc_ops->alloc_client(cl_init);
 	} while (!IS_ERR(new));
 
-	dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
-		cl_init->hostname, PTR_ERR(new));
 	return new;
 }
 EXPORT_SYMBOL_GPL(nfs_get_client);
@@ -662,8 +649,6 @@ static int nfs_init_server(struct nfs_server *server,
 	struct nfs_client *clp;
 	int error;
 
-	dprintk("--> nfs_init_server()\n");
-
 	nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
 			data->timeo, data->retrans);
 	if (data->flags & NFS_MOUNT_NORESVPORT)
@@ -671,10 +656,8 @@ static int nfs_init_server(struct nfs_server *server,
 
 	/* Allocate or find a client reference we can use */
 	clp = nfs_get_client(&cl_init);
-	if (IS_ERR(clp)) {
-		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
+	if (IS_ERR(clp))
 		return PTR_ERR(clp);
-	}
 
 	server->nfs_client = clp;
 
@@ -719,13 +702,11 @@ static int nfs_init_server(struct nfs_server *server,
 	server->mountd_protocol = data->mount_server.protocol;
 
 	server->namelen  = data->namlen;
-	dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
 	return 0;
 
 error:
 	server->nfs_client = NULL;
 	nfs_put_client(clp);
-	dprintk("<-- nfs_init_server() = xerror %d\n", error);
 	return error;
 }
 
@@ -795,12 +776,10 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
 	struct nfs_client *clp = server->nfs_client;
 	int error;
 
-	dprintk("--> nfs_probe_fsinfo()\n");
-
 	if (clp->rpc_ops->set_capabilities != NULL) {
 		error = clp->rpc_ops->set_capabilities(server, mntfh);
 		if (error < 0)
-			goto out_error;
+			return error;
 	}
 
 	fsinfo.fattr = fattr;
@@ -808,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
 	memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype));
 	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
 	if (error < 0)
-		goto out_error;
+		return error;
 
 	nfs_server_set_fsinfo(server, &fsinfo);
 
@@ -823,12 +802,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
 			server->namelen = pathinfo.max_namelen;
 	}
 
-	dprintk("<-- nfs_probe_fsinfo() = 0\n");
 	return 0;
-
-out_error:
-	dprintk("nfs_probe_fsinfo: error = %d\n", -error);
-	return error;
 }
 EXPORT_SYMBOL_GPL(nfs_probe_fsinfo);
 
@@ -930,8 +904,6 @@ EXPORT_SYMBOL_GPL(nfs_alloc_server);
  */
 void nfs_free_server(struct nfs_server *server)
 {
-	dprintk("--> nfs_free_server()\n");
-
 	nfs_server_remove_lists(server);
 
 	if (server->destroy != NULL)
@@ -950,7 +922,6 @@ void nfs_free_server(struct nfs_server *server)
 	bdi_destroy(&server->backing_dev_info);
 	kfree(server);
 	nfs_release_automount_timer();
-	dprintk("<-- nfs_free_server()\n");
 }
 EXPORT_SYMBOL_GPL(nfs_free_server);
 
@@ -1030,10 +1001,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	struct nfs_fattr *fattr_fsinfo;
 	int error;
 
-	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
-		(unsigned long long) fattr->fsid.major,
-		(unsigned long long) fattr->fsid.minor);
-
 	server = nfs_alloc_server();
 	if (!server)
 		return ERR_PTR(-ENOMEM);
@@ -1065,10 +1032,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
 		server->namelen = NFS4_MAXNAMLEN;
 
-	dprintk("Cloned FSID: %llx:%llx\n",
-		(unsigned long long) server->fsid.major,
-		(unsigned long long) server->fsid.minor);
-
 	error = nfs_start_lockd(server);
 	if (error < 0)
 		goto out_free_server;
@@ -1077,13 +1040,11 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	server->mount_time = jiffies;
 
 	nfs_free_fattr(fattr_fsinfo);
-	dprintk("<-- nfs_clone_server() = %p\n", server);
 	return server;
 
 out_free_server:
 	nfs_free_fattr(fattr_fsinfo);
 	nfs_free_server(server);
-	dprintk("<-- nfs_clone_server() = error %d\n", error);
 	return ERR_PTR(error);
 }
 EXPORT_SYMBOL_GPL(nfs_clone_server);
-- 
cgit v1.2.3


From beeb53380138231eacdf4fcbf78275d94db26f7f Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:07 -0400
Subject: NFS: Remove nfs_direct_readpage_release()

Just remove the function and have the caller use nfs_release_request()
instead.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index aab32fc3d6a8..0b65e25b71ac 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -392,16 +392,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 	nfs_direct_req_release(dreq);
 }
 
-static void nfs_direct_readpage_release(struct nfs_page *req)
-{
-	dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
-		req->wb_context->dentry->d_sb->s_id,
-		(unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
-		req->wb_bytes,
-		(long long)req_offset(req));
-	nfs_release_request(req);
-}
-
 static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 {
 	unsigned long bytes = 0;
@@ -426,7 +416,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 			set_page_dirty(page);
 		bytes += req->wb_bytes;
 		nfs_list_remove_request(req);
-		nfs_direct_readpage_release(req);
+		nfs_release_request(req);
 	}
 out_put:
 	if (put_dreq(dreq))
-- 
cgit v1.2.3


From fe4f844d49768909150d9c91e283106adcd9a1ea Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:08 -0400
Subject: NFS: Clean up nfs_direct_commit_complete()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0b65e25b71ac..18d0868ee274 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -685,16 +685,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 	int status = data->task.tk_status;
 
 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
-	if (status < 0) {
-		dprintk("NFS: %5u commit failed with error %d.\n",
-			data->task.tk_pid, status);
+	if (status < 0 || nfs_direct_cmp_commit_data_verf(dreq, data))
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-	} else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
-		dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
-		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-	}
 
-	dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
-- 
cgit v1.2.3


From e36d48e9e2c38e7d31c41e41b21f1f47b61250a9 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:09 -0400
Subject: NFS: Remove extra dprintk()s from namespace.c

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/namespace.c | 34 ++++++++--------------------------
 1 file changed, 8 insertions(+), 26 deletions(-)

diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 786f17580582..1a224a33a6c2 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -143,11 +143,8 @@ struct vfsmount *nfs_d_automount(struct path *path)
 	struct nfs_fh *fh = NULL;
 	struct nfs_fattr *fattr = NULL;
 
-	dprintk("--> nfs_d_automount()\n");
-
-	mnt = ERR_PTR(-ESTALE);
 	if (IS_ROOT(path->dentry))
-		goto out_nofree;
+		return ERR_PTR(-ESTALE);
 
 	mnt = ERR_PTR(-ENOMEM);
 	fh = nfs_alloc_fhandle();
@@ -155,13 +152,10 @@ struct vfsmount *nfs_d_automount(struct path *path)
 	if (fh == NULL || fattr == NULL)
 		goto out;
 
-	dprintk("%s: enter\n", __func__);
-
 	mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
 	if (IS_ERR(mnt))
 		goto out;
 
-	dprintk("%s: done, success\n", __func__);
 	mntget(mnt); /* prevent immediate expiration */
 	mnt_set_expiry(mnt, &nfs_automount_list);
 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
@@ -169,11 +163,6 @@ struct vfsmount *nfs_d_automount(struct path *path)
 out:
 	nfs_free_fattr(fattr);
 	nfs_free_fhandle(fh);
-out_nofree:
-	if (IS_ERR(mnt))
-		dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt));
-	else
-		dprintk("<-- %s() = %p\n", __func__, mnt);
 	return mnt;
 }
 
@@ -248,27 +237,20 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
 		.fattr = fattr,
 		.authflavor = authflavor,
 	};
-	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
+	struct vfsmount *mnt;
 	char *page = (char *) __get_free_page(GFP_USER);
 	char *devname;
 
-	dprintk("--> nfs_do_submount()\n");
-
-	dprintk("%s: submounting on %pd2\n", __func__,
-			dentry);
 	if (page == NULL)
-		goto out;
+		return ERR_PTR(-ENOMEM);
+
 	devname = nfs_devname(dentry, page, PAGE_SIZE);
-	mnt = (struct vfsmount *)devname;
 	if (IS_ERR(devname))
-		goto free_page;
-	mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
-free_page:
-	free_page((unsigned long)page);
-out:
-	dprintk("%s: done\n", __func__);
+		mnt = (struct vfsmount *)devname;
+	else
+		mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
 
-	dprintk("<-- nfs_do_submount() = %p\n", mnt);
+	free_page((unsigned long)page);
 	return mnt;
 }
 EXPORT_SYMBOL_GPL(nfs_do_submount);
-- 
cgit v1.2.3


From 5be1810a8dc12573b89602867bc0dc1057e7f79f Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:10 -0400
Subject: NFS: Clean up nfs42_layoutstat_done()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs42proc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 1e486c73ec94..c81c61971625 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -400,8 +400,6 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
 	case -EOPNOTSUPP:
 		NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
 	}
-
-	dprintk("%s server returns %d\n", __func__, task->tk_status);
 }
 
 static void
-- 
cgit v1.2.3


From f251fd9e71890bcfdc838f36c10b1b57a87356ec Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:11 -0400
Subject: NFS: Clean up nfs4_match_clientids()

If we cut out the dprintk()s, then we don't even need this to be a
separate function.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8346ccbf2d52..39b8c38b6a68 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -582,21 +582,6 @@ out:
 }
 
 #ifdef CONFIG_NFS_V4_1
-/*
- * Returns true if the client IDs match
- */
-static bool nfs4_match_clientids(u64 a, u64 b)
-{
-	if (a != b) {
-		dprintk("NFS: --> %s client ID %llx does not match %llx\n",
-			__func__, a, b);
-		return false;
-	}
-	dprintk("NFS: --> %s client ID %llx matches %llx\n",
-		__func__, a, b);
-	return true;
-}
-
 /*
  * Returns true if the server major ids match
  */
@@ -680,7 +665,7 @@ int nfs4_detect_session_trunking(struct nfs_client *clp,
 				 struct rpc_xprt *xprt)
 {
 	/* Check eir_clientid */
-	if (!nfs4_match_clientids(clp->cl_clientid, res->clientid))
+	if (clp->cl_clientid != res->clientid)
 		goto out_err;
 
 	/* Check eir_server_owner so_major_id */
@@ -765,7 +750,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
 		if (pos->cl_cons_state != NFS_CS_READY)
 			continue;
 
-		if (!nfs4_match_clientids(pos->cl_clientid, new->cl_clientid))
+		if (pos->cl_clientid != new->cl_clientid)
 			continue;
 
 		/*
-- 
cgit v1.2.3


From 5b6d3ff605d1b00ce8138da4091c527d9ad48729 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:12 -0400
Subject: NFS: Clean up nfs4_check_serverowner_minor_id()

Once again, we can remove the function and compare integer values
directly.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 39b8c38b6a68..4f4f179cb849 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -603,25 +603,6 @@ out_major_mismatch:
 	return false;
 }
 
-/*
- * Returns true if server minor ids match
- */
-static bool
-nfs4_check_serverowner_minor_id(struct nfs41_server_owner *o1,
-				struct nfs41_server_owner *o2)
-{
-	/* Check eir_server_owner so_minor_id */
-	if (o1->minor_id != o2->minor_id)
-		goto out_minor_mismatch;
-
-	dprintk("NFS: --> %s server owner minor IDs match\n", __func__);
-	return true;
-
-out_minor_mismatch:
-	dprintk("NFS: --> %s server owner minor IDs do not match\n", __func__);
-	return false;
-}
-
 /*
  * Returns true if the server scopes match
  */
@@ -674,8 +655,7 @@ int nfs4_detect_session_trunking(struct nfs_client *clp,
 		goto out_err;
 
 	/* Check eir_server_owner so_minor_id */
-	if (!nfs4_check_serverowner_minor_id(clp->cl_serverowner,
-					     res->server_owner))
+	if (clp->cl_serverowner->minor_id != res->server_owner->minor_id)
 		goto out_err;
 
 	/* Check eir_server_scope */
-- 
cgit v1.2.3


From 14d1bbb0ca425c4fdd9a38eaac346c028661b752 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:13 -0400
Subject: NFS: Create a common nfs4_match_client() function

This puts all the common code in a single place for the
walk_client_list() functions.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c | 119 ++++++++++++++++++++++++----------------------------
 1 file changed, 55 insertions(+), 64 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 4f4f179cb849..8853c32eedf5 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -469,6 +469,50 @@ static bool nfs4_same_verifier(nfs4_verifier *v1, nfs4_verifier *v2)
 	return memcmp(v1->data, v2->data, sizeof(v1->data)) == 0;
 }
 
+static int nfs4_match_client(struct nfs_client  *pos,  struct nfs_client *new,
+			     struct nfs_client **prev, struct nfs_net *nn)
+{
+	int status;
+
+	if (pos->rpc_ops != new->rpc_ops)
+		return 1;
+
+	if (pos->cl_minorversion != new->cl_minorversion)
+		return 1;
+
+	/* If "pos" isn't marked ready, we can't trust the
+	 * remaining fields in "pos", especially the client
+	 * ID and serverowner fields.  Wait for CREATE_SESSION
+	 * to finish. */
+	if (pos->cl_cons_state > NFS_CS_READY) {
+		atomic_inc(&pos->cl_count);
+		spin_unlock(&nn->nfs_client_lock);
+
+		nfs_put_client(*prev);
+		*prev = pos;
+
+		status = nfs_wait_client_init_complete(pos);
+		spin_lock(&nn->nfs_client_lock);
+
+		if (status < 0)
+			return status;
+	}
+
+	if (pos->cl_cons_state != NFS_CS_READY)
+		return 1;
+
+	if (pos->cl_clientid != new->cl_clientid)
+		return 1;
+
+	/* NFSv4.1 always uses the uniform string, however someone
+	 * might switch the uniquifier string on us.
+	 */
+	if (!nfs4_match_client_owner_id(pos, new))
+		return 1;
+
+	return 0;
+}
+
 /**
  * nfs40_walk_client_list - Find server that recognizes a client ID
  *
@@ -497,34 +541,10 @@ int nfs40_walk_client_list(struct nfs_client *new,
 	spin_lock(&nn->nfs_client_lock);
 	list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
 
-		if (pos->rpc_ops != new->rpc_ops)
-			continue;
-
-		if (pos->cl_minorversion != new->cl_minorversion)
-			continue;
-
-		/* If "pos" isn't marked ready, we can't trust the
-		 * remaining fields in "pos" */
-		if (pos->cl_cons_state > NFS_CS_READY) {
-			atomic_inc(&pos->cl_count);
-			spin_unlock(&nn->nfs_client_lock);
-
-			nfs_put_client(prev);
-			prev = pos;
-
-			status = nfs_wait_client_init_complete(pos);
-			if (status < 0)
-				goto out;
-			status = -NFS4ERR_STALE_CLIENTID;
-			spin_lock(&nn->nfs_client_lock);
-		}
-		if (pos->cl_cons_state != NFS_CS_READY)
-			continue;
-
-		if (pos->cl_clientid != new->cl_clientid)
-			continue;
-
-		if (!nfs4_match_client_owner_id(pos, new))
+		status = nfs4_match_client(pos, new, &prev, nn);
+		if (status < 0)
+			goto out_unlock;
+		if (status != 0)
 			continue;
 		/*
 		 * We just sent a new SETCLIENTID, which should have
@@ -567,11 +587,13 @@ int nfs40_walk_client_list(struct nfs_client *new,
 			 */
 			nfs4_schedule_path_down_recovery(pos);
 		default:
+			spin_lock(&nn->nfs_client_lock);
 			goto out;
 		}
 
 		spin_lock(&nn->nfs_client_lock);
 	}
+out_unlock:
 	spin_unlock(&nn->nfs_client_lock);
 
 	/* No match found. The server lost our clientid */
@@ -704,33 +726,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
 		if (pos == new)
 			goto found;
 
-		if (pos->rpc_ops != new->rpc_ops)
-			continue;
-
-		if (pos->cl_minorversion != new->cl_minorversion)
-			continue;
-
-		/* If "pos" isn't marked ready, we can't trust the
-		 * remaining fields in "pos", especially the client
-		 * ID and serverowner fields.  Wait for CREATE_SESSION
-		 * to finish. */
-		if (pos->cl_cons_state > NFS_CS_READY) {
-			atomic_inc(&pos->cl_count);
-			spin_unlock(&nn->nfs_client_lock);
-
-			nfs_put_client(prev);
-			prev = pos;
-
-			status = nfs_wait_client_init_complete(pos);
-			spin_lock(&nn->nfs_client_lock);
-			if (status < 0)
-				break;
-			status = -NFS4ERR_STALE_CLIENTID;
-		}
-		if (pos->cl_cons_state != NFS_CS_READY)
-			continue;
-
-		if (pos->cl_clientid != new->cl_clientid)
+		status = nfs4_match_client(pos, new, &prev, nn);
+		if (status < 0)
+			goto out;
+		if (status != 0)
 			continue;
 
 		/*
@@ -742,23 +741,15 @@ int nfs41_walk_client_list(struct nfs_client *new,
 						     new->cl_serverowner))
 			continue;
 
-		/* Unlike NFSv4.0, we know that NFSv4.1 always uses the
-		 * uniform string, however someone might switch the
-		 * uniquifier string on us.
-		 */
-		if (!nfs4_match_client_owner_id(pos, new))
-			continue;
 found:
 		atomic_inc(&pos->cl_count);
 		*result = pos;
 		status = 0;
-		dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
-			__func__, pos, atomic_read(&pos->cl_count));
 		break;
 	}
 
+out:
 	spin_unlock(&nn->nfs_client_lock);
-	dprintk("NFS: <-- %s status = %d\n", __func__, status);
 	nfs_put_client(prev);
 	return status;
 }
-- 
cgit v1.2.3


From ddfa0d4860347bfc491929af2750088bb75c5462 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:14 -0400
Subject: NFS: Clean up nfs4_check_serverowner_major_id()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8853c32eedf5..ce6f2ef62595 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -612,17 +612,8 @@ nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
 				struct nfs41_server_owner *o2)
 {
 	if (o1->major_id_sz != o2->major_id_sz)
-		goto out_major_mismatch;
-	if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0)
-		goto out_major_mismatch;
-
-	dprintk("NFS: --> %s server owner major IDs match\n", __func__);
-	return true;
-
-out_major_mismatch:
-	dprintk("NFS: --> %s server owner major IDs do not match\n",
-		__func__);
-	return false;
+		return false;
+	return memcmp(o1->major_id, o2->major_id, o1->major_id_sz) == 0;
 }
 
 /*
-- 
cgit v1.2.3


From 8da0f93438ca4a03f23d8f3275b253b0897c8833 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:15 -0400
Subject: NFS: Clean up nfs4_check_server_scope()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index ce6f2ef62595..c9016de3e5bd 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -624,18 +624,9 @@ nfs4_check_server_scope(struct nfs41_server_scope *s1,
 			struct nfs41_server_scope *s2)
 {
 	if (s1->server_scope_sz != s2->server_scope_sz)
-		goto out_scope_mismatch;
-	if (memcmp(s1->server_scope, s2->server_scope,
-		   s1->server_scope_sz) != 0)
-		goto out_scope_mismatch;
-
-	dprintk("NFS: --> %s server scopes match\n", __func__);
-	return true;
-
-out_scope_mismatch:
-	dprintk("NFS: --> %s server scopes do not match\n",
-		__func__);
-	return false;
+		return false;
+	return memcmp(s1->server_scope, s2->server_scope,
+					s1->server_scope_sz) == 0;
 }
 
 /**
-- 
cgit v1.2.3


From 2dc42c0d60e0104f7cd8beee3871f953565392ff Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:16 -0400
Subject: NFS: Clean up nfs4_set_client()

If we cut out the dprintk()s, then we can return error codes directly
and cut out the goto.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index c9016de3e5bd..a380255e85ed 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -854,9 +854,6 @@ static int nfs4_set_client(struct nfs_server *server,
 		.timeparms = timeparms,
 	};
 	struct nfs_client *clp;
-	int error;
-
-	dprintk("--> nfs4_set_client()\n");
 
 	if (server->flags & NFS_MOUNT_NORESVPORT)
 		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -865,15 +862,11 @@ static int nfs4_set_client(struct nfs_server *server,
 
 	/* Allocate or find a client reference we can use */
 	clp = nfs_get_client(&cl_init);
-	if (IS_ERR(clp)) {
-		error = PTR_ERR(clp);
-		goto error;
-	}
+	if (IS_ERR(clp))
+		return PTR_ERR(clp);
 
-	if (server->nfs_client == clp) {
-		error = -ELOOP;
-		goto error;
-	}
+	if (server->nfs_client == clp)
+		return -ELOOP;
 
 	/*
 	 * Query for the lease time on clientid setup or renewal
@@ -885,11 +878,7 @@ static int nfs4_set_client(struct nfs_server *server,
 	set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
 
 	server->nfs_client = clp;
-	dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
 	return 0;
-error:
-	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
-	return error;
 }
 
 /*
-- 
cgit v1.2.3


From 1073d9b49ae4ede7682320168a209d46f70508bf Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:17 -0400
Subject: NFS: Clean up nfs4_init_server()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index a380255e85ed..2e7fc76cbd4b 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1025,8 +1025,6 @@ static int nfs4_init_server(struct nfs_server *server,
 	struct rpc_timeout timeparms;
 	int error;
 
-	dprintk("--> nfs4_init_server()\n");
-
 	nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
 			data->timeo, data->retrans);
 
@@ -1054,7 +1052,7 @@ static int nfs4_init_server(struct nfs_server *server,
 			data->minorversion,
 			data->net);
 	if (error < 0)
-		goto error;
+		return error;
 
 	if (data->rsize)
 		server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1065,16 +1063,10 @@ static int nfs4_init_server(struct nfs_server *server,
 	server->acregmax = data->acregmax * HZ;
 	server->acdirmin = data->acdirmin * HZ;
 	server->acdirmax = data->acdirmax * HZ;
+	server->port     = data->nfs_server.port;
 
-	server->port = data->nfs_server.port;
-
-	error = nfs_init_server_rpcclient(server, &timeparms,
-					  data->selected_flavor);
-
-error:
-	/* Done */
-	dprintk("<-- nfs4_init_server() = %d\n", error);
-	return error;
+	return nfs_init_server_rpcclient(server, &timeparms,
+					 data->selected_flavor);
 }
 
 /*
-- 
cgit v1.2.3


From 4fe6b366d9a2694970f26dcda7026390c5d0d7c7 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:18 -0400
Subject: NFS: Remove extra dprintk()s from nfs4client.c

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c | 62 +++++++++--------------------------------------------
 1 file changed, 10 insertions(+), 52 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 2e7fc76cbd4b..692a7a8bfc7a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -359,11 +359,9 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	struct nfs_client *old;
 	int error;
 
-	if (clp->cl_cons_state == NFS_CS_READY) {
+	if (clp->cl_cons_state == NFS_CS_READY)
 		/* the client is initialised already */
-		dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
 		return clp;
-	}
 
 	/* Check NFS protocol revision and initialize RPC op vector */
 	clp->rpc_ops = &nfs_v4_clientops;
@@ -421,7 +419,6 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 error:
 	nfs_mark_client_ready(clp, error);
 	nfs_put_client(clp);
-	dprintk("<-- nfs4_init_client() = xerror %d\n", error);
 	return ERR_PTR(error);
 }
 
@@ -577,8 +574,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
 
 			prev = NULL;
 			*result = pos;
-			dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
-				__func__, pos, atomic_read(&pos->cl_count));
 			goto out;
 		case -ERESTARTSYS:
 		case -ETIMEDOUT:
@@ -599,7 +594,6 @@ out_unlock:
 	/* No match found. The server lost our clientid */
 out:
 	nfs_put_client(prev);
-	dprintk("NFS: <-- %s status = %d\n", __func__, status);
 	return status;
 }
 
@@ -909,7 +903,6 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 		.net = mds_clp->cl_net,
 		.timeparms = &ds_timeout,
 	};
-	struct nfs_client *clp;
 	char buf[INET6_ADDRSTRLEN + 1];
 
 	if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
@@ -925,10 +918,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 	 * (section 13.1 RFC 5661).
 	 */
 	nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
-	clp = nfs_get_client(&cl_init);
-
-	dprintk("<-- %s %p\n", __func__, clp);
-	return clp;
+	return nfs_get_client(&cl_init);
 }
 EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
 
@@ -1082,8 +1072,6 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
 	bool auth_probe;
 	int error;
 
-	dprintk("--> nfs4_create_server()\n");
-
 	server = nfs_alloc_server();
 	if (!server)
 		return ERR_PTR(-ENOMEM);
@@ -1099,12 +1087,10 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
 	if (error < 0)
 		goto error;
 
-	dprintk("<-- nfs4_create_server() = %p\n", server);
 	return server;
 
 error:
 	nfs_free_server(server);
-	dprintk("<-- nfs4_create_server() = error %d\n", error);
 	return ERR_PTR(error);
 }
 
@@ -1119,8 +1105,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	bool auth_probe;
 	int error;
 
-	dprintk("--> nfs4_create_referral_server()\n");
-
 	server = nfs_alloc_server();
 	if (!server)
 		return ERR_PTR(-ENOMEM);
@@ -1154,12 +1138,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	if (error < 0)
 		goto error;
 
-	dprintk("<-- nfs_create_referral_server() = %p\n", server);
 	return server;
 
 error:
 	nfs_free_server(server);
-	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
 	return ERR_PTR(error);
 }
 
@@ -1219,31 +1201,16 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	struct sockaddr *localaddr = (struct sockaddr *)&address;
 	int error;
 
-	dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__,
-			(unsigned long long)server->fsid.major,
-			(unsigned long long)server->fsid.minor,
-			hostname);
-
 	error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout);
-	if (error != 0) {
-		dprintk("<-- %s(): rpc_switch_client_transport returned %d\n",
-			__func__, error);
-		goto out;
-	}
+	if (error != 0)
+		return error;
 
 	error = rpc_localaddr(clnt, localaddr, sizeof(address));
-	if (error != 0) {
-		dprintk("<-- %s(): rpc_localaddr returned %d\n",
-			__func__, error);
-		goto out;
-	}
+	if (error != 0)
+		return error;
 
-	error = -EAFNOSUPPORT;
-	if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) {
-		dprintk("<-- %s(): rpc_ntop returned %d\n",
-			__func__, error);
-		goto out;
-	}
+	if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0)
+		return -EAFNOSUPPORT;
 
 	nfs_server_remove_lists(server);
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
@@ -1252,21 +1219,12 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	nfs_put_client(clp);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
-		dprintk("<-- %s(): nfs4_set_client returned %d\n",
-			__func__, error);
-		goto out;
+		return error;
 	}
 
 	if (server->nfs_client->cl_hostname == NULL)
 		server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
 	nfs_server_insert_lists(server);
 
-	error = nfs_probe_destination(server);
-	if (error < 0)
-		goto out;
-
-	dprintk("<-- %s() succeeded\n", __func__);
-
-out:
-	return error;
+	return nfs_probe_destination(server);
 }
-- 
cgit v1.2.3


From 539fd1d1f43414bd7a886147c475fa09a3cc528a Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:19 -0400
Subject: NFS: Clean up nfs4_get_rootfh()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4getroot.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 039b3eb6d834..ac8406018962 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -14,8 +14,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
 	struct nfs_fsinfo fsinfo;
 	int ret = -ENOMEM;
 
-	dprintk("--> nfs4_get_rootfh()\n");
-
 	fsinfo.fattr = nfs_alloc_fattr();
 	if (fsinfo.fattr == NULL)
 		goto out;
@@ -38,6 +36,5 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
 	memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
 out:
 	nfs_free_fattr(fsinfo.fattr);
-	dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
 	return ret;
 }
-- 
cgit v1.2.3


From 3183783bbb5f57e183b99f593de7159898f91fdd Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:20 -0400
Subject: NFS: Remove extra dprintk()s from nfs4namespace.c

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4namespace.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index d8b040bd9814..7d531da1bae3 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -340,7 +340,6 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
 out:
 	free_page((unsigned long) page);
 	free_page((unsigned long) page2);
-	dprintk("%s: done\n", __func__);
 	return mnt;
 }
 
@@ -358,11 +357,9 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
 	int err;
 
 	/* BUG_ON(IS_ROOT(dentry)); */
-	dprintk("%s: enter\n", __func__);
-
 	page = alloc_page(GFP_KERNEL);
 	if (page == NULL)
-		goto out;
+		return mnt;
 
 	fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
 	if (fs_locations == NULL)
@@ -386,8 +383,6 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
 out_free:
 	__free_page(page);
 	kfree(fs_locations);
-out:
-	dprintk("%s: done\n", __func__);
 	return mnt;
 }
 
-- 
cgit v1.2.3


From c7ae763903df4d8a2c907588274b4887d04d2413 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:21 -0400
Subject: NFS: Clean up nfs4_proc_bind_one_conn_to_session()

Returning errors directly even lets us remove the goto

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 201ca3f2c4ba..184d57a5098d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7155,8 +7155,6 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
 	};
 	struct rpc_task *task;
 
-	dprintk("--> %s\n", __func__);
-
 	nfs4_copy_sessionid(&args.sessionid, &clp->cl_session->sess_id);
 	if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
 		args.dir = NFS4_CDFC4_FORE;
@@ -7176,24 +7174,20 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
 		if (memcmp(res.sessionid.data,
 		    clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
 			dprintk("NFS: %s: Session ID mismatch\n", __func__);
-			status = -EIO;
-			goto out;
+			return -EIO;
 		}
 		if ((res.dir & args.dir) != res.dir || res.dir == 0) {
 			dprintk("NFS: %s: Unexpected direction from server\n",
 				__func__);
-			status = -EIO;
-			goto out;
+			return -EIO;
 		}
 		if (res.use_conn_in_rdma_mode != args.use_conn_in_rdma_mode) {
 			dprintk("NFS: %s: Server returned RDMA mode = true\n",
 				__func__);
-			status = -EIO;
-			goto out;
+			return -EIO;
 		}
 	}
-out:
-	dprintk("<-- %s status= %d\n", __func__, status);
+
 	return status;
 }
 
-- 
cgit v1.2.3


From e917f0d1cee17b7a4316ac336ac6beb2d5916806 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:22 -0400
Subject: NFS: Clean up _nfs4_proc_exchange_id()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 184d57a5098d..a3808826b655 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7453,15 +7453,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 	};
 	struct nfs41_exchange_id_data *calldata;
 	struct rpc_task *task;
-	int status = -EIO;
+	int status;
 
 	if (!atomic_inc_not_zero(&clp->cl_count))
-		goto out;
+		return -EIO;
 
-	status = -ENOMEM;
 	calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
 	if (!calldata)
-		goto out;
+		return -ENOMEM;
 
 	if (!xprt)
 		nfs4_init_boot_verifier(clp, &verifier);
@@ -7470,10 +7469,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 	if (status)
 		goto out_calldata;
 
-	dprintk("NFS call  exchange_id auth=%s, '%s'\n",
-		clp->cl_rpcclient->cl_auth->au_ops->au_name,
-		clp->cl_owner_id);
-
 	calldata->res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
 						GFP_NOFS);
 	status = -ENOMEM;
@@ -7539,13 +7534,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 
 	rpc_put_task(task);
 out:
-	if (clp->cl_implid != NULL)
-		dprintk("NFS reply exchange_id: Server Implementation ID: "
-			"domain: %s, name: %s, date: %llu,%u\n",
-			clp->cl_implid->domain, clp->cl_implid->name,
-			clp->cl_implid->date.seconds,
-			clp->cl_implid->date.nseconds);
-	dprintk("NFS reply exchange_id: %d\n", status);
 	return status;
 
 out_impl_id:
-- 
cgit v1.2.3


From f6148713b24107c478ce532a4be8e5c58437c80e Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 7 Apr 2017 14:15:23 -0400
Subject: NFS: Clean up nfs4_proc_get_lease_time()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a3808826b655..dda19a35ad9e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7751,17 +7751,13 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 
 	nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
 	nfs4_set_sequence_privileged(&args.la_seq_args);
-	dprintk("--> %s\n", __func__);
 	task = rpc_run_task(&task_setup);
 
 	if (IS_ERR(task))
-		status = PTR_ERR(task);
-	else {
-		status = task->tk_status;
-		rpc_put_task(task);
-	}
-	dprintk("<-- %s return %d\n", __func__, status);
+		return PTR_ERR(task);
 
+	status = task->tk_status;
+	rpc_put_task(task);
 	return status;
 }
 
-- 
cgit v1.2.3


From 518662e0fcb9fa241fe90a337b59bc5066b2a930 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 10 Apr 2017 12:22:09 +1000
Subject: NFS: fix usage of mempools.

When passed GFP flags that allow sleeping (such as
GFP_NOIO), mempool_alloc() will never return NULL, it will
wait until memory is available.

This means that we don't need to handle failure, but that we
do need to ensure one thread doesn't call mempool_alloc()
twice on the one pool without queuing or freeing the first
allocation.  If multiple threads did this during times of
high memory pressure, the pool could be exhausted and a
deadlock could result.

pnfs_generic_alloc_ds_commits() attempts to allocate from
the nfs_commit_mempool while already holding an allocation
from that pool.  This is not safe.  So change
nfs_commitdata_alloc() to take a flag that indicates whether
failure is acceptable.

In pnfs_generic_alloc_ds_commits(), accept failure and
handle it as we currently do.  Else where, do not accept
failure, and do not handle it.

Even when failure is acceptable, we want to succeed if
possible.  That means both
 - using an entry from the pool if there is one
 - waiting for direct reclaim is there isn't.

We call mempool_alloc(GFP_NOWAIT) to achieve the first, then
kmem_cache_alloc(GFP_NOIO|__GFP_NORETRY) to achieve the
second.  Each of these can fail, but together they do the
best they can without blocking indefinitely.

The objects returned by kmem_cache_alloc() will still be freed
by mempool_free().  This is safe as mempool_alloc() uses
exactly the same function to allocate objects (since the mempool
was created with mempool_create_slab_pool()).  The object returned
by mempool_alloc() and kmem_cache_alloc() are indistinguishable
so mempool_free() will handle both identically, either adding to the
pool or calling kmem_cache_free().

Also, don't test for failure when allocating from
nfs_wdata_mempool.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs_nfs.c      | 16 +++++-----------
 fs/nfs/write.c         | 35 +++++++++++++++++++++--------------
 include/linux/nfs_fs.h |  2 +-
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 7250b95549ec..1edf5b84aba5 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -217,7 +217,7 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
 	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
 		if (list_empty(&bucket->committing))
 			continue;
-		data = nfs_commitdata_alloc();
+		data = nfs_commitdata_alloc(false);
 		if (!data)
 			break;
 		data->ds_commit_index = i;
@@ -283,16 +283,10 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 	unsigned int nreq = 0;
 
 	if (!list_empty(mds_pages)) {
-		data = nfs_commitdata_alloc();
-		if (data != NULL) {
-			data->ds_commit_index = -1;
-			list_add(&data->pages, &list);
-			nreq++;
-		} else {
-			nfs_retry_commit(mds_pages, NULL, cinfo, 0);
-			pnfs_generic_retry_commit(cinfo, 0);
-			return -ENOMEM;
-		}
+		data = nfs_commitdata_alloc(true);
+		data->ds_commit_index = -1;
+		list_add(&data->pages, &list);
+		nreq++;
 	}
 
 	nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index abb2c8a3be42..bdfe5a7c5874 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -60,14 +60,28 @@ static mempool_t *nfs_wdata_mempool;
 static struct kmem_cache *nfs_cdata_cachep;
 static mempool_t *nfs_commit_mempool;
 
-struct nfs_commit_data *nfs_commitdata_alloc(void)
+struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
 {
-	struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
+	struct nfs_commit_data *p;
 
-	if (p) {
-		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
+	if (never_fail)
+		p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
+	else {
+		/* It is OK to do some reclaim, not no safe to wait
+		 * for anything to be returned to the pool.
+		 * mempool_alloc() cannot handle that particular combination,
+		 * so we need two separate attempts.
+		 */
+		p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
+		if (!p)
+			p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO |
+					     __GFP_NOWARN | __GFP_NORETRY);
+		if (!p)
+			return NULL;
 	}
+
+	memset(p, 0, sizeof(*p));
+	INIT_LIST_HEAD(&p->pages);
 	return p;
 }
 EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
@@ -82,8 +96,7 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void)
 {
 	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
 
-	if (p)
-		memset(p, 0, sizeof(*p));
+	memset(p, 0, sizeof(*p));
 	return p;
 }
 
@@ -1705,19 +1718,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
 	if (list_empty(head))
 		return 0;
 
-	data = nfs_commitdata_alloc();
-
-	if (!data)
-		goto out_bad;
+	data = nfs_commitdata_alloc(true);
 
 	/* Set up the argument struct */
 	nfs_init_commit(data, head, NULL, cinfo);
 	atomic_inc(&cinfo->mds->rpcs_out);
 	return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
 				   data->mds_ops, how, 0);
- out_bad:
-	nfs_retry_commit(head, NULL, cinfo, 0);
-	return -ENOMEM;
 }
 
 int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 287f34161086..1b29915247b2 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -502,7 +502,7 @@ extern int nfs_wb_all(struct inode *inode);
 extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 extern int  nfs_commit_inode(struct inode *, int);
-extern struct nfs_commit_data *nfs_commitdata_alloc(void);
+extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
 extern void nfs_commit_free(struct nfs_commit_data *data);
 
 static inline int
-- 
cgit v1.2.3


From 62b2417e84ba0734d3f4f95a17e6f5f0be54d75a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 10 Apr 2017 12:19:40 +1000
Subject: sunrpc: don't check for failure from mempool_alloc()

When mempool_alloc() is allowed to sleep (GFP_NOIO allows
sleeping) it cannot fail.
So rpc_alloc_task() cannot fail, so rpc_new_task doesn't need
to test for failure.
Consequently rpc_new_task() cannot fail, so the callers
don't need to test.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/clnt.c  | 8 --------
 net/sunrpc/sched.c | 5 -----
 2 files changed, 13 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 52da3ce54bb5..b5cb921775a0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1042,8 +1042,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 	struct rpc_task *task;
 
 	task = rpc_new_task(task_setup_data);
-	if (IS_ERR(task))
-		goto out;
 
 	rpc_task_set_client(task, task_setup_data->rpc_client);
 	rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
@@ -1053,7 +1051,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 
 	atomic_inc(&task->tk_count);
 	rpc_execute(task);
-out:
 	return task;
 }
 EXPORT_SYMBOL_GPL(rpc_run_task);
@@ -1140,10 +1137,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 	 * Create an rpc_task to send the data
 	 */
 	task = rpc_new_task(&task_setup_data);
-	if (IS_ERR(task)) {
-		xprt_free_bc_request(req);
-		goto out;
-	}
 	task->tk_rqstp = req;
 
 	/*
@@ -1158,7 +1151,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 	WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
 	rpc_execute(task);
 
-out:
 	dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
 	return task;
 }
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5db68b371db2..0cc83839c13c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -965,11 +965,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
 
 	if (task == NULL) {
 		task = rpc_alloc_task();
-		if (task == NULL) {
-			rpc_release_calldata(setup_data->callback_ops,
-					setup_data->callback_data);
-			return ERR_PTR(-ENOMEM);
-		}
 		flags = RPC_TASK_DYNAMIC;
 	}
 
-- 
cgit v1.2.3


From 43b7d964ed30dbca5c83c90cb010985b429ec4f9 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 14 Apr 2017 12:29:54 -0400
Subject: NFS: Fix missing pg_cleanup after nfs_pageio_cond_complete()

Commit a7d42ddb3099727f58366fa006f850a219cce6c8 ("nfs: add mirroring
support to pgio layer") moved pg_cleanup out of the path when there was
non-sequental I/O that needed to be flushed.  The result is that for
layouts that have more than one layout segment per file, the pg_lseg is not
cleared, so we can end up hitting the WARN_ON_ONCE(req_start >= seg_end) in
pnfs_generic_pg_test since the pg_lseg will be pointing to that
previously-flushed layout segment.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Fixes: a7d42ddb3099 ("nfs: add mirroring support to pgio layer")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6e629b856a00..3917268961d9 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -1256,8 +1256,10 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
 		mirror = &desc->pg_mirrors[midx];
 		if (!list_empty(&mirror->pg_list)) {
 			prev = nfs_list_entry(mirror->pg_list.prev);
-			if (index != prev->wb_index + 1)
-				nfs_pageio_complete_mirror(desc, midx);
+			if (index != prev->wb_index + 1) {
+				nfs_pageio_complete(desc);
+				break;
+			}
 		}
 	}
 }
-- 
cgit v1.2.3


From 1f84ccdf37d0db3a70714d02d51b0b6d45887fb8 Mon Sep 17 00:00:00 2001
From: Fred Isaman <fred.isaman@gmail.com>
Date: Fri, 14 Apr 2017 14:24:28 -0400
Subject: NFS: Fix use after free in write error path

Signed-off-by: Fred Isaman <fred.isaman@gmail.com>
Fixes: 0bcbf039f6b2b ("nfs: handle request add failure properly")
Cc: stable@vger.kernel.org # v4.5+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index bdfe5a7c5874..e0bccbefbc9e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -561,9 +561,9 @@ static void nfs_write_error_remove_page(struct nfs_page *req)
 {
 	nfs_unlock_request(req);
 	nfs_end_page_writeback(req);
-	nfs_release_request(req);
 	generic_error_remove_page(page_file_mapping(req->wb_page),
 				  req->wb_page);
+	nfs_release_request(req);
 }
 
 /*
-- 
cgit v1.2.3


From ae97aa524ef495b6276fd26f5d5449fb22975d7c Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Wed, 19 Apr 2017 10:11:33 -0400
Subject: NFS: Use GFP_NOIO for two allocations in writeback

Prevent a deadlock that can occur if we wait on allocations
that try to write back our pages.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Fixes: 00bfa30abe869 ("NFS: Create a common pgio_alloc and pgio_release...")
Cc: stable@vger.kernel.org # 3.16+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 3917268961d9..7d83f3acdca1 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,13 +29,14 @@
 static struct kmem_cache *nfs_page_cachep;
 static const struct rpc_call_ops nfs_pgio_common_ops;
 
-static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount,
+					gfp_t gfp_flags)
 {
 	p->npages = pagecount;
 	if (pagecount <= ARRAY_SIZE(p->page_array))
 		p->pagevec = p->page_array;
 	else {
-		p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+		p->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags);
 		if (!p->pagevec)
 			p->npages = 0;
 	}
@@ -681,6 +682,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_pgio_mirror *new;
 	int i;
+	gfp_t gfp_flags = GFP_KERNEL;
 
 	desc->pg_moreio = 0;
 	desc->pg_inode = inode;
@@ -700,8 +702,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	if (pg_ops->pg_get_mirror_count) {
 		/* until we have a request, we don't have an lseg and no
 		 * idea how many mirrors there will be */
+		if (desc->pg_rw_ops->rw_mode == FMODE_WRITE)
+			gfp_flags = GFP_NOIO;
 		new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX,
-			      sizeof(struct nfs_pgio_mirror), GFP_KERNEL);
+			      sizeof(struct nfs_pgio_mirror), gfp_flags);
 		desc->pg_mirrors_dynamic = new;
 		desc->pg_mirrors = new;
 
@@ -755,9 +759,12 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 	struct list_head *head = &mirror->pg_list;
 	struct nfs_commit_info cinfo;
 	unsigned int pagecount, pageused;
+	gfp_t gfp_flags = GFP_KERNEL;
 
 	pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
-	if (!nfs_pgarray_set(&hdr->page_array, pagecount)) {
+	if (desc->pg_rw_ops->rw_mode == FMODE_WRITE)
+		gfp_flags = GFP_NOIO;
+	if (!nfs_pgarray_set(&hdr->page_array, pagecount, gfp_flags)) {
 		nfs_pgio_error(hdr);
 		desc->pg_error = -ENOMEM;
 		return desc->pg_error;
-- 
cgit v1.2.3


From 8ef9b0b9e1c02879c9a41246437a23f513e4378b Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Wed, 19 Apr 2017 10:11:34 -0400
Subject: NFS: move nfs_pgarray_set() to open code

Since commit 00bfa30abe86 ("NFS: Create a common pgio_alloc and
pgio_release function"), nfs_pgarray_set() has only a single caller.  Let's
open code it.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7d83f3acdca1..453255c30fa1 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,20 +29,6 @@
 static struct kmem_cache *nfs_page_cachep;
 static const struct rpc_call_ops nfs_pgio_common_ops;
 
-static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount,
-					gfp_t gfp_flags)
-{
-	p->npages = pagecount;
-	if (pagecount <= ARRAY_SIZE(p->page_array))
-		p->pagevec = p->page_array;
-	else {
-		p->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags);
-		if (!p->pagevec)
-			p->npages = 0;
-	}
-	return p->pagevec != NULL;
-}
-
 struct nfs_pgio_mirror *
 nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc)
 {
@@ -758,16 +744,24 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 				*last_page;
 	struct list_head *head = &mirror->pg_list;
 	struct nfs_commit_info cinfo;
+	struct nfs_page_array *pg_array = &hdr->page_array;
 	unsigned int pagecount, pageused;
 	gfp_t gfp_flags = GFP_KERNEL;
 
 	pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
-	if (desc->pg_rw_ops->rw_mode == FMODE_WRITE)
-		gfp_flags = GFP_NOIO;
-	if (!nfs_pgarray_set(&hdr->page_array, pagecount, gfp_flags)) {
-		nfs_pgio_error(hdr);
-		desc->pg_error = -ENOMEM;
-		return desc->pg_error;
+
+	if (pagecount <= ARRAY_SIZE(pg_array->page_array))
+		pg_array->pagevec = pg_array->page_array;
+	else {
+		if (desc->pg_rw_ops->rw_mode == FMODE_WRITE)
+			gfp_flags = GFP_NOIO;
+		pg_array->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags);
+		if (!pg_array->pagevec) {
+			pg_array->npages = 0;
+			nfs_pgio_error(hdr);
+			desc->pg_error = -ENOMEM;
+			return desc->pg_error;
+		}
 	}
 
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
-- 
cgit v1.2.3


From fbe77c30e9abcb3429380dec622439991a718e31 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Wed, 19 Apr 2017 10:11:35 -0400
Subject: NFS: move rw_mode to nfs_pageio_header

Let's try to have it in a cacheline in nfs4_proc_pgio_rpc_prepare().

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c        | 2 +-
 fs/nfs/pagelist.c        | 8 +++-----
 fs/nfs/read.c            | 9 ++++++---
 fs/nfs/write.c           | 8 +++++---
 include/linux/nfs_page.h | 4 ++--
 include/linux/nfs_xdr.h  | 1 +
 6 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index dda19a35ad9e..4e52ac773d1f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4610,7 +4610,7 @@ static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
 		return 0;
 	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
 				hdr->args.lock_context,
-				hdr->rw_ops->rw_mode) == -EIO)
+				hdr->rw_mode) == -EIO)
 		return -EIO;
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
 		return -EIO;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 453255c30fa1..f53610672f03 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -664,11 +664,11 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 		     const struct nfs_pgio_completion_ops *compl_ops,
 		     const struct nfs_rw_ops *rw_ops,
 		     size_t bsize,
-		     int io_flags)
+		     int io_flags,
+		     gfp_t gfp_flags)
 {
 	struct nfs_pgio_mirror *new;
 	int i;
-	gfp_t gfp_flags = GFP_KERNEL;
 
 	desc->pg_moreio = 0;
 	desc->pg_inode = inode;
@@ -688,8 +688,6 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	if (pg_ops->pg_get_mirror_count) {
 		/* until we have a request, we don't have an lseg and no
 		 * idea how many mirrors there will be */
-		if (desc->pg_rw_ops->rw_mode == FMODE_WRITE)
-			gfp_flags = GFP_NOIO;
 		new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX,
 			      sizeof(struct nfs_pgio_mirror), gfp_flags);
 		desc->pg_mirrors_dynamic = new;
@@ -753,7 +751,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 	if (pagecount <= ARRAY_SIZE(pg_array->page_array))
 		pg_array->pagevec = pg_array->page_array;
 	else {
-		if (desc->pg_rw_ops->rw_mode == FMODE_WRITE)
+		if (hdr->rw_mode == FMODE_WRITE)
 			gfp_flags = GFP_NOIO;
 		pg_array->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags);
 		if (!pg_array->pagevec) {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index defc9233e985..a8421d9dab6a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -35,7 +35,11 @@ static struct kmem_cache *nfs_rdata_cachep;
 
 static struct nfs_pgio_header *nfs_readhdr_alloc(void)
 {
-	return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+	struct nfs_pgio_header *p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+
+	if (p)
+		p->rw_mode = FMODE_READ;
+	return p;
 }
 
 static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
@@ -64,7 +68,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 		pg_ops = server->pnfs_curr_ld->pg_read_ops;
 #endif
 	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
-			server->rsize, 0);
+			server->rsize, 0, GFP_KERNEL);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
 
@@ -451,7 +455,6 @@ void nfs_destroy_readpagecache(void)
 }
 
 static const struct nfs_rw_ops nfs_rw_read_ops = {
-	.rw_mode		= FMODE_READ,
 	.rw_alloc_header	= nfs_readhdr_alloc,
 	.rw_free_header		= nfs_readhdr_free,
 	.rw_done		= nfs_readpage_done,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e0bccbefbc9e..95ac001b6fdf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -96,7 +96,10 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void)
 {
 	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
 
-	memset(p, 0, sizeof(*p));
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		p->rw_mode = FMODE_WRITE;
+	}
 	return p;
 }
 
@@ -1381,7 +1384,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 		pg_ops = server->pnfs_curr_ld->pg_write_ops;
 #endif
 	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
-			server->wsize, ioflags);
+			server->wsize, ioflags, GFP_NOIO);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
 
@@ -2115,7 +2118,6 @@ void nfs_destroy_writepagecache(void)
 }
 
 static const struct nfs_rw_ops nfs_rw_write_ops = {
-	.rw_mode		= FMODE_WRITE,
 	.rw_alloc_header	= nfs_writehdr_alloc,
 	.rw_free_header		= nfs_writehdr_free,
 	.rw_done		= nfs_writeback_done,
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 957049f72290..6f01e28bba27 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -64,7 +64,6 @@ struct nfs_pageio_ops {
 };
 
 struct nfs_rw_ops {
-	const fmode_t rw_mode;
 	struct nfs_pgio_header *(*rw_alloc_header)(void);
 	void (*rw_free_header)(struct nfs_pgio_header *);
 	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_header *,
@@ -124,7 +123,8 @@ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     const struct nfs_pgio_completion_ops *compl_ops,
 			     const struct nfs_rw_ops *rw_ops,
 			     size_t bsize,
-			     int how);
+			     int how,
+			     gfp_t gfp_flags);
 extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
 				   struct nfs_page *);
 extern  int nfs_pageio_resend(struct nfs_pageio_descriptor *,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 348f7c158084..51e27f9746ee 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1427,6 +1427,7 @@ struct nfs_pgio_header {
 	struct list_head	pages;
 	struct nfs_page		*req;
 	struct nfs_writeverf	verf;		/* Used for writes */
+	fmode_t			rw_mode;
 	struct pnfs_layout_segment *lseg;
 	loff_t			io_start;
 	const struct rpc_call_ops *mds_ops;
-- 
cgit v1.2.3


From 56e0d71ef12f026d96213e45a662bde6bbff4676 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 15 Apr 2017 19:20:01 -0400
Subject: NFSv4: Fix a hang in OPEN related to server reboot

If the server fails to return the attributes as part of an OPEN
reply, and then reboots, we can end up hanging. The reason is that
the client attempts to send a GETATTR in order to pick up the
missing OPEN call, but fails to release the slot first, causing
reboot recovery to deadlock.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Fixes: 2e80dbe7ac51a ("NFSv4.1: Close callback races for OPEN, LAYOUTGET...")
Cc: stable@vger.kernel.org # v4.8+
---
 fs/nfs/nfs4proc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4e52ac773d1f..6e9ff2d9a5bf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2300,8 +2300,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		if (status != 0)
 			return status;
 	}
-	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
+	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) {
+		nfs4_sequence_free_slot(&o_res->seq_res);
 		nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 260f32adb88dadfaac29f47f761a088238ca164c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 20 Apr 2017 14:33:06 -0400
Subject: pNFS/flexfiles: Check the result of nfs4_pnfs_ds_connect

The check in nfs4_ff_layout_prepare_ds() seems to be missing.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Fixes: a33e4b036d461 ("pNFS: return status from nfs4_pnfs_ds_connect")
Cc: Weston Andros Adamson <dros@primarydata.com>
Cc: stable@vger.kernel.org # v4.11
---
 fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 8ca9cc665561..6df7a0cf5660 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -421,7 +421,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 			     mirror->mirror_ds->ds_versions[0].minor_version);
 
 	/* connect success, check rsize/wsize limit */
-	if (ds->ds_clp) {
+	if (!status) {
 		max_payload =
 			nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
 				       NULL);
-- 
cgit v1.2.3


From 6d22323b2e9f95f99aa6f3e93c302f8100277078 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 12 Apr 2017 18:01:08 +0200
Subject: nfs: remove the objlayout driver

The objlayout code has been in the tree, but it's been unmaintained and
no server product for it actually ever shipped.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 Documentation/admin-guide/kernel-parameters.txt |   6 -
 Documentation/filesystems/nfs/pnfs.txt          |  37 --
 fs/nfs/Kconfig                                  |   5 -
 fs/nfs/Makefile                                 |   1 -
 fs/nfs/objlayout/Kbuild                         |   5 -
 fs/nfs/objlayout/objio_osd.c                    | 675 ----------------------
 fs/nfs/objlayout/objlayout.c                    | 706 ------------------------
 fs/nfs/objlayout/objlayout.h                    | 183 ------
 fs/nfs/objlayout/pnfs_osd_xdr_cli.c             | 415 --------------
 9 files changed, 2033 deletions(-)
 delete mode 100644 fs/nfs/objlayout/Kbuild
 delete mode 100644 fs/nfs/objlayout/objio_osd.c
 delete mode 100644 fs/nfs/objlayout/objlayout.c
 delete mode 100644 fs/nfs/objlayout/objlayout.h
 delete mode 100644 fs/nfs/objlayout/pnfs_osd_xdr_cli.c

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index facc20a3f962..17156d66b124 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2419,12 +2419,6 @@
 			and gids from such clients.  This is intended to ease
 			migration from NFSv2/v3.
 
-	objlayoutdriver.osd_login_prog=
-			[NFS] [OBJLAYOUT] sets the pathname to the program which
-			is used to automatically discover and login into new
-			osd-targets. Please see:
-			Documentation/filesystems/pnfs.txt for more explanations
-
 	nmi_debug=	[KNL,AVR32,SH] Specify one or more actions to take
 			when a NMI is triggered.
 			Format: [state][,regs][,debounce][,die]
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt
index 8de578a98222..80dc0bdc302a 100644
--- a/Documentation/filesystems/nfs/pnfs.txt
+++ b/Documentation/filesystems/nfs/pnfs.txt
@@ -64,46 +64,9 @@ table which are called by the nfs-client pnfs-core to implement the
 different layout types.
 
 Files-layout-driver code is in: fs/nfs/filelayout/.. directory
-Objects-layout-driver code is in: fs/nfs/objlayout/.. directory
 Blocks-layout-driver code is in: fs/nfs/blocklayout/.. directory
 Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory
 
-objects-layout setup
---------------------
-
-As part of the full STD implementation the objlayoutdriver.ko needs, at times,
-to automatically login to yet undiscovered iscsi/osd devices. For this the
-driver makes up-calles to a user-mode script called *osd_login*
-
-The path_name of the script to use is by default:
-	/sbin/osd_login.
-This name can be overridden by the Kernel module parameter:
-	objlayoutdriver.osd_login_prog
-
-If Kernel does not find the osd_login_prog path it will zero it out
-and will not attempt farther logins. An admin can then write new value
-to the objlayoutdriver.osd_login_prog Kernel parameter to re-enable it.
-
-The /sbin/osd_login is part of the nfs-utils package, and should usually
-be installed on distributions that support this Kernel version.
-
-The API to the login script is as follows:
-	Usage: $0 -u <URI> -o <OSDNAME> -s <SYSTEMID>
-	Options:
-		-u		target uri e.g. iscsi://<ip>:<port>
-				(always exists)
-				(More protocols can be defined in the future.
-				 The client does not interpret this string it is
-				 passed unchanged as received from the Server)
-		-o		osdname of the requested target OSD
-				(Might be empty)
-				(A string which denotes the OSD name, there is a
-				 limit of 64 chars on this string)
-		-s 		systemid of the requested target OSD
-				(Might be empty)
-				(This string, if not empty is always an hex
-				 representation of the 20 bytes osd_system_id)
-
 blocks-layout setup
 -------------------
 
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index f31fd0dd92c6..69d02cf8cf37 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -123,11 +123,6 @@ config PNFS_BLOCK
 	depends on NFS_V4_1 && BLK_DEV_DM
 	default NFS_V4
 
-config PNFS_OBJLAYOUT
-	tristate
-	depends on NFS_V4_1 && SCSI_OSD_ULD
-	default NFS_V4
-
 config PNFS_FLEXFILE_LAYOUT
 	tristate
 	depends on NFS_V4_1 && NFS_V3
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 6abdda209642..98f4e5728a67 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -31,6 +31,5 @@ nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o pnfs_nfs.o
 nfsv4-$(CONFIG_NFS_V4_2)	+= nfs42proc.o
 
 obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
-obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
 obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
 obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/
diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild
deleted file mode 100644
index ed30ea072bb8..000000000000
--- a/fs/nfs/objlayout/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the pNFS Objects Layout Driver kernel module
-#
-objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o
-obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
deleted file mode 100644
index 049c1b1f2932..000000000000
--- a/fs/nfs/objlayout/objio_osd.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- *  pNFS Objects layout implementation over open-osd initiator library
- *
- *  Copyright (C) 2009 Panasas Inc. [year of first publication]
- *  All rights reserved.
- *
- *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <ooo@electrozaur.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  See the file COPYING included with this distribution for more details.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/module.h>
-#include <scsi/osd_ore.h>
-
-#include "objlayout.h"
-#include "../internal.h"
-
-#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
-
-struct objio_dev_ent {
-	struct nfs4_deviceid_node id_node;
-	struct ore_dev od;
-};
-
-static void
-objio_free_deviceid_node(struct nfs4_deviceid_node *d)
-{
-	struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
-
-	dprintk("%s: free od=%p\n", __func__, de->od.od);
-	osduld_put_device(de->od.od);
-	kfree_rcu(d, rcu);
-}
-
-struct objio_segment {
-	struct pnfs_layout_segment lseg;
-
-	struct ore_layout layout;
-	struct ore_components oc;
-};
-
-static inline struct objio_segment *
-OBJIO_LSEG(struct pnfs_layout_segment *lseg)
-{
-	return container_of(lseg, struct objio_segment, lseg);
-}
-
-struct objio_state {
-	/* Generic layer */
-	struct objlayout_io_res oir;
-
-	bool sync;
-	/*FIXME: Support for extra_bytes at ore_get_rw_state() */
-	struct ore_io_state *ios;
-};
-
-/* Send and wait for a get_device_info of devices in the layout,
-   then look them up with the osd_initiator library */
-struct nfs4_deviceid_node *
-objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
-			gfp_t gfp_flags)
-{
-	struct pnfs_osd_deviceaddr *deviceaddr;
-	struct objio_dev_ent *ode = NULL;
-	struct osd_dev *od;
-	struct osd_dev_info odi;
-	bool retry_flag = true;
-	__be32 *p;
-	int err;
-
-	deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags);
-	if (!deviceaddr)
-		return NULL;
-
-	p = page_address(pdev->pages[0]);
-	pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p);
-
-	odi.systemid_len = deviceaddr->oda_systemid.len;
-	if (odi.systemid_len > sizeof(odi.systemid)) {
-		dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
-			__func__, sizeof(odi.systemid));
-		err = -EINVAL;
-		goto out;
-	} else if (odi.systemid_len)
-		memcpy(odi.systemid, deviceaddr->oda_systemid.data,
-		       odi.systemid_len);
-	odi.osdname_len	 = deviceaddr->oda_osdname.len;
-	odi.osdname	 = (u8 *)deviceaddr->oda_osdname.data;
-
-	if (!odi.osdname_len && !odi.systemid_len) {
-		dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
-			__func__);
-		err = -ENODEV;
-		goto out;
-	}
-
-retry_lookup:
-	od = osduld_info_lookup(&odi);
-	if (IS_ERR(od)) {
-		err = PTR_ERR(od);
-		dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
-		if (err == -ENODEV && retry_flag) {
-			err = objlayout_autologin(deviceaddr);
-			if (likely(!err)) {
-				retry_flag = false;
-				goto retry_lookup;
-			}
-		}
-		goto out;
-	}
-
-	dprintk("Adding new dev_id(%llx:%llx)\n",
-		_DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id));
-
-	ode = kzalloc(sizeof(*ode), gfp_flags);
-	if (!ode) {
-		dprintk("%s: -ENOMEM od=%p\n", __func__, od);
-		goto out;
-	}
-
-	nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id);
-	kfree(deviceaddr);
-
-	ode->od.od = od;
-	return &ode->id_node;
-
-out:
-	kfree(deviceaddr);
-	return NULL;
-}
-
-static void copy_single_comp(struct ore_components *oc, unsigned c,
-			     struct pnfs_osd_object_cred *src_comp)
-{
-	struct ore_comp *ocomp = &oc->comps[c];
-
-	WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
-	WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
-
-	ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
-	ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
-
-	memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
-}
-
-static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
-		       struct objio_segment **pseg)
-{
-/*	This is the in memory structure of the objio_segment
- *
- *	struct __alloc_objio_segment {
- *		struct objio_segment olseg;
- *		struct ore_dev *ods[numdevs];
- *		struct ore_comp	comps[numdevs];
- *	} *aolseg;
- *	NOTE: The code as above compiles and runs perfectly. It is elegant,
- *	type safe and compact. At some Past time Linus has decided he does not
- *	like variable length arrays, For the sake of this principal we uglify
- *	the code as below.
- */
-	struct objio_segment *lseg;
-	size_t lseg_size = sizeof(*lseg) +
-			numdevs * sizeof(lseg->oc.ods[0]) +
-			numdevs * sizeof(*lseg->oc.comps);
-
-	lseg = kzalloc(lseg_size, gfp_flags);
-	if (unlikely(!lseg)) {
-		dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__,
-			numdevs, lseg_size);
-		return -ENOMEM;
-	}
-
-	lseg->oc.numdevs = numdevs;
-	lseg->oc.single_comp = EC_MULTPLE_COMPS;
-	lseg->oc.ods = (void *)(lseg + 1);
-	lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
-
-	*pseg = lseg;
-	return 0;
-}
-
-int objio_alloc_lseg(struct pnfs_layout_segment **outp,
-	struct pnfs_layout_hdr *pnfslay,
-	struct pnfs_layout_range *range,
-	struct xdr_stream *xdr,
-	gfp_t gfp_flags)
-{
-	struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode);
-	struct objio_segment *objio_seg;
-	struct pnfs_osd_xdr_decode_layout_iter iter;
-	struct pnfs_osd_layout layout;
-	struct pnfs_osd_object_cred src_comp;
-	unsigned cur_comp;
-	int err;
-
-	err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
-	if (unlikely(err))
-		return err;
-
-	err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
-	if (unlikely(err))
-		return err;
-
-	objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
-	objio_seg->layout.group_width = layout.olo_map.odm_group_width;
-	objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
-	objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
-	objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
-
-	err = ore_verify_layout(layout.olo_map.odm_num_comps,
-					  &objio_seg->layout);
-	if (unlikely(err))
-		goto err;
-
-	objio_seg->oc.first_dev = layout.olo_comps_index;
-	cur_comp = 0;
-	while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
-		struct nfs4_deviceid_node *d;
-		struct objio_dev_ent *ode;
-
-		copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
-
-		d = nfs4_find_get_deviceid(server,
-				&src_comp.oc_object_id.oid_device_id,
-				pnfslay->plh_lc_cred, gfp_flags);
-		if (!d) {
-			err = -ENXIO;
-			goto err;
-		}
-
-		ode = container_of(d, struct objio_dev_ent, id_node);
-		objio_seg->oc.ods[cur_comp++] = &ode->od;
-	}
-	/* pnfs_osd_xdr_decode_layout_comp returns false on error */
-	if (unlikely(err))
-		goto err;
-
-	*outp = &objio_seg->lseg;
-	return 0;
-
-err:
-	kfree(objio_seg);
-	dprintk("%s: Error: return %d\n", __func__, err);
-	*outp = NULL;
-	return err;
-}
-
-void objio_free_lseg(struct pnfs_layout_segment *lseg)
-{
-	int i;
-	struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
-
-	for (i = 0; i < objio_seg->oc.numdevs; i++) {
-		struct ore_dev *od = objio_seg->oc.ods[i];
-		struct objio_dev_ent *ode;
-
-		if (!od)
-			break;
-		ode = container_of(od, typeof(*ode), od);
-		nfs4_put_deviceid_node(&ode->id_node);
-	}
-	kfree(objio_seg);
-}
-
-static int
-objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
-	struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
-	loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
-	struct objio_state **outp)
-{
-	struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
-	struct ore_io_state *ios;
-	int ret;
-	struct __alloc_objio_state {
-		struct objio_state objios;
-		struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
-	} *aos;
-
-	aos = kzalloc(sizeof(*aos), gfp_flags);
-	if (unlikely(!aos))
-		return -ENOMEM;
-
-	objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
-			aos->ioerrs, rpcdata, pnfs_layout_type);
-
-	ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
-			       offset, count, &ios);
-	if (unlikely(ret)) {
-		kfree(aos);
-		return ret;
-	}
-
-	ios->pages = pages;
-	ios->pgbase = pgbase;
-	ios->private = aos;
-	BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
-
-	aos->objios.sync = 0;
-	aos->objios.ios = ios;
-	*outp = &aos->objios;
-	return 0;
-}
-
-void objio_free_result(struct objlayout_io_res *oir)
-{
-	struct objio_state *objios = container_of(oir, struct objio_state, oir);
-
-	ore_put_io_state(objios->ios);
-	kfree(objios);
-}
-
-static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
-{
-	switch (oep) {
-	case OSD_ERR_PRI_NO_ERROR:
-		return (enum pnfs_osd_errno)0;
-
-	case OSD_ERR_PRI_CLEAR_PAGES:
-		BUG_ON(1);
-		return 0;
-
-	case OSD_ERR_PRI_RESOURCE:
-		return PNFS_OSD_ERR_RESOURCE;
-	case OSD_ERR_PRI_BAD_CRED:
-		return PNFS_OSD_ERR_BAD_CRED;
-	case OSD_ERR_PRI_NO_ACCESS:
-		return PNFS_OSD_ERR_NO_ACCESS;
-	case OSD_ERR_PRI_UNREACHABLE:
-		return PNFS_OSD_ERR_UNREACHABLE;
-	case OSD_ERR_PRI_NOT_FOUND:
-		return PNFS_OSD_ERR_NOT_FOUND;
-	case OSD_ERR_PRI_NO_SPACE:
-		return PNFS_OSD_ERR_NO_SPACE;
-	default:
-		WARN_ON(1);
-		/* fallthrough */
-	case OSD_ERR_PRI_EIO:
-		return PNFS_OSD_ERR_EIO;
-	}
-}
-
-static void __on_dev_error(struct ore_io_state *ios,
-	struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
-	u64 dev_offset, u64  dev_len)
-{
-	struct objio_state *objios = ios->private;
-	struct pnfs_osd_objid pooid;
-	struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
-	/* FIXME: what to do with more-then-one-group layouts. We need to
-	 * translate from ore_io_state index to oc->comps index
-	 */
-	unsigned comp = dev_index;
-
-	pooid.oid_device_id = ode->id_node.deviceid;
-	pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
-	pooid.oid_object_id = ios->oc->comps[comp].obj.id;
-
-	objlayout_io_set_result(&objios->oir, comp,
-				&pooid, osd_pri_2_pnfs_err(oep),
-				dev_offset, dev_len, !ios->reading);
-}
-
-/*
- * read
- */
-static void _read_done(struct ore_io_state *ios, void *private)
-{
-	struct objio_state *objios = private;
-	ssize_t status;
-	int ret = ore_check_io(ios, &__on_dev_error);
-
-	/* FIXME: _io_free(ios) can we dealocate the libosd resources; */
-
-	if (likely(!ret))
-		status = ios->length;
-	else
-		status = ret;
-
-	objlayout_read_done(&objios->oir, status, objios->sync);
-}
-
-int objio_read_pagelist(struct nfs_pgio_header *hdr)
-{
-	struct objio_state *objios;
-	int ret;
-
-	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
-			hdr->lseg, hdr->args.pages, hdr->args.pgbase,
-			hdr->args.offset, hdr->args.count, hdr,
-			GFP_KERNEL, &objios);
-	if (unlikely(ret))
-		return ret;
-
-	objios->ios->done = _read_done;
-	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
-		hdr->args.offset, hdr->args.count);
-	ret = ore_read(objios->ios);
-	if (unlikely(ret))
-		objio_free_result(&objios->oir);
-	return ret;
-}
-
-/*
- * write
- */
-static void _write_done(struct ore_io_state *ios, void *private)
-{
-	struct objio_state *objios = private;
-	ssize_t status;
-	int ret = ore_check_io(ios, &__on_dev_error);
-
-	/* FIXME: _io_free(ios) can we dealocate the libosd resources; */
-
-	if (likely(!ret)) {
-		/* FIXME: should be based on the OSD's persistence model
-		 * See OSD2r05 Section 4.13 Data persistence model */
-		objios->oir.committed = NFS_FILE_SYNC;
-		status = ios->length;
-	} else {
-		status = ret;
-	}
-
-	objlayout_write_done(&objios->oir, status, objios->sync);
-}
-
-static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
-{
-	struct objio_state *objios = priv;
-	struct nfs_pgio_header *hdr = objios->oir.rpcdata;
-	struct address_space *mapping = hdr->inode->i_mapping;
-	pgoff_t index = offset / PAGE_SIZE;
-	struct page *page;
-	loff_t i_size = i_size_read(hdr->inode);
-
-	if (offset >= i_size) {
-		*uptodate = true;
-		dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
-		return ZERO_PAGE(0);
-	}
-
-	page = find_get_page(mapping, index);
-	if (!page) {
-		page = find_or_create_page(mapping, index, GFP_NOFS);
-		if (unlikely(!page)) {
-			dprintk("%s: grab_cache_page Failed index=0x%lx\n",
-				__func__, index);
-			return NULL;
-		}
-		unlock_page(page);
-	}
-	*uptodate = PageUptodate(page);
-	dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
-	return page;
-}
-
-static void __r4w_put_page(void *priv, struct page *page)
-{
-	dprintk("%s: index=0x%lx\n", __func__,
-		(page == ZERO_PAGE(0)) ? -1UL : page->index);
-	if (ZERO_PAGE(0) != page)
-		put_page(page);
-	return;
-}
-
-static const struct _ore_r4w_op _r4w_op = {
-	.get_page = &__r4w_get_page,
-	.put_page = &__r4w_put_page,
-};
-
-int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
-{
-	struct objio_state *objios;
-	int ret;
-
-	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
-			hdr->lseg, hdr->args.pages, hdr->args.pgbase,
-			hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
-			&objios);
-	if (unlikely(ret))
-		return ret;
-
-	objios->sync = 0 != (how & FLUSH_SYNC);
-	objios->ios->r4w = &_r4w_op;
-
-	if (!objios->sync)
-		objios->ios->done = _write_done;
-
-	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
-		hdr->args.offset, hdr->args.count);
-	ret = ore_write(objios->ios);
-	if (unlikely(ret)) {
-		objio_free_result(&objios->oir);
-		return ret;
-	}
-
-	if (objios->sync)
-		_write_done(objios->ios, objios);
-
-	return 0;
-}
-
-/*
- * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
- * of bytes (maximum @req->wb_bytes) that can be coalesced.
- */
-static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
-			  struct nfs_page *prev, struct nfs_page *req)
-{
-	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio);
-	unsigned int size;
-
-	size = pnfs_generic_pg_test(pgio, prev, req);
-
-	if (!size || mirror->pg_count + req->wb_bytes >
-	    (unsigned long)pgio->pg_layout_private)
-		return 0;
-
-	return min(size, req->wb_bytes);
-}
-
-static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
-	pnfs_generic_pg_init_read(pgio, req);
-	if (unlikely(pgio->pg_lseg == NULL))
-		return; /* Not pNFS */
-
-	pgio->pg_layout_private = (void *)
-				OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
-}
-
-static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
-				   unsigned long *stripe_end)
-{
-	u32 stripe_off;
-	unsigned stripe_size;
-
-	if (layout->raid_algorithm == PNFS_OSD_RAID_0)
-		return true;
-
-	stripe_size = layout->stripe_unit *
-				(layout->group_width - layout->parity);
-
-	div_u64_rem(offset, stripe_size, &stripe_off);
-	if (!stripe_off)
-		return true;
-
-	*stripe_end = stripe_size - stripe_off;
-	return false;
-}
-
-static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
-	unsigned long stripe_end = 0;
-	u64 wb_size;
-
-	if (pgio->pg_dreq == NULL)
-		wb_size = i_size_read(pgio->pg_inode) - req_offset(req);
-	else
-		wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
-
-	pnfs_generic_pg_init_write(pgio, req, wb_size);
-	if (unlikely(pgio->pg_lseg == NULL))
-		return; /* Not pNFS */
-
-	if (req->wb_offset ||
-	    !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
-			       &OBJIO_LSEG(pgio->pg_lseg)->layout,
-			       &stripe_end)) {
-		pgio->pg_layout_private = (void *)stripe_end;
-	} else {
-		pgio->pg_layout_private = (void *)
-				OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
-	}
-}
-
-static const struct nfs_pageio_ops objio_pg_read_ops = {
-	.pg_init = objio_init_read,
-	.pg_test = objio_pg_test,
-	.pg_doio = pnfs_generic_pg_readpages,
-	.pg_cleanup = pnfs_generic_pg_cleanup,
-};
-
-static const struct nfs_pageio_ops objio_pg_write_ops = {
-	.pg_init = objio_init_write,
-	.pg_test = objio_pg_test,
-	.pg_doio = pnfs_generic_pg_writepages,
-	.pg_cleanup = pnfs_generic_pg_cleanup,
-};
-
-static struct pnfs_layoutdriver_type objlayout_type = {
-	.id = LAYOUT_OSD2_OBJECTS,
-	.name = "LAYOUT_OSD2_OBJECTS",
-	.flags                   = PNFS_LAYOUTRET_ON_SETATTR |
-				   PNFS_LAYOUTRET_ON_ERROR,
-
-	.max_deviceinfo_size	 = PAGE_SIZE,
-	.owner		       	 = THIS_MODULE,
-	.alloc_layout_hdr        = objlayout_alloc_layout_hdr,
-	.free_layout_hdr         = objlayout_free_layout_hdr,
-
-	.alloc_lseg              = objlayout_alloc_lseg,
-	.free_lseg               = objlayout_free_lseg,
-
-	.read_pagelist           = objlayout_read_pagelist,
-	.write_pagelist          = objlayout_write_pagelist,
-	.pg_read_ops             = &objio_pg_read_ops,
-	.pg_write_ops            = &objio_pg_write_ops,
-
-	.sync			 = pnfs_generic_sync,
-
-	.free_deviceid_node	 = objio_free_deviceid_node,
-
-	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
-	.encode_layoutreturn     = objlayout_encode_layoutreturn,
-};
-
-MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
-MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
-MODULE_LICENSE("GPL");
-
-static int __init
-objlayout_init(void)
-{
-	int ret = pnfs_register_layoutdriver(&objlayout_type);
-
-	if (ret)
-		printk(KERN_INFO
-			"NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
-			__func__, ret);
-	else
-		printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
-			__func__);
-	return ret;
-}
-
-static void __exit
-objlayout_exit(void)
-{
-	pnfs_unregister_layoutdriver(&objlayout_type);
-	printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
-	       __func__);
-}
-
-MODULE_ALIAS("nfs-layouttype4-2");
-
-module_init(objlayout_init);
-module_exit(objlayout_exit);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
deleted file mode 100644
index 8f3d2acb81c3..000000000000
--- a/fs/nfs/objlayout/objlayout.c
+++ /dev/null
@@ -1,706 +0,0 @@
-/*
- *  pNFS Objects layout driver high level definitions
- *
- *  Copyright (C) 2007 Panasas Inc. [year of first publication]
- *  All rights reserved.
- *
- *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <ooo@electrozaur.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  See the file COPYING included with this distribution for more details.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/kmod.h>
-#include <linux/moduleparam.h>
-#include <linux/ratelimit.h>
-#include <scsi/osd_initiator.h>
-#include "objlayout.h"
-
-#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
-/*
- * Create a objlayout layout structure for the given inode and return it.
- */
-struct pnfs_layout_hdr *
-objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
-{
-	struct objlayout *objlay;
-
-	objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
-	if (!objlay)
-		return NULL;
-	spin_lock_init(&objlay->lock);
-	INIT_LIST_HEAD(&objlay->err_list);
-	dprintk("%s: Return %p\n", __func__, objlay);
-	return &objlay->pnfs_layout;
-}
-
-/*
- * Free an objlayout layout structure
- */
-void
-objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
-{
-	struct objlayout *objlay = OBJLAYOUT(lo);
-
-	dprintk("%s: objlay %p\n", __func__, objlay);
-
-	WARN_ON(!list_empty(&objlay->err_list));
-	kfree(objlay);
-}
-
-/*
- * Unmarshall layout and store it in pnfslay.
- */
-struct pnfs_layout_segment *
-objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
-		     struct nfs4_layoutget_res *lgr,
-		     gfp_t gfp_flags)
-{
-	int status = -ENOMEM;
-	struct xdr_stream stream;
-	struct xdr_buf buf = {
-		.pages =  lgr->layoutp->pages,
-		.page_len =  lgr->layoutp->len,
-		.buflen =  lgr->layoutp->len,
-		.len = lgr->layoutp->len,
-	};
-	struct page *scratch;
-	struct pnfs_layout_segment *lseg;
-
-	dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
-
-	scratch = alloc_page(gfp_flags);
-	if (!scratch)
-		goto err_nofree;
-
-	xdr_init_decode(&stream, &buf, NULL);
-	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
-
-	status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
-	if (unlikely(status)) {
-		dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
-			status);
-		goto err;
-	}
-
-	__free_page(scratch);
-
-	dprintk("%s: Return %p\n", __func__, lseg);
-	return lseg;
-
-err:
-	__free_page(scratch);
-err_nofree:
-	dprintk("%s: Err Return=>%d\n", __func__, status);
-	return ERR_PTR(status);
-}
-
-/*
- * Free a layout segement
- */
-void
-objlayout_free_lseg(struct pnfs_layout_segment *lseg)
-{
-	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
-
-	if (unlikely(!lseg))
-		return;
-
-	objio_free_lseg(lseg);
-}
-
-/*
- * I/O Operations
- */
-static inline u64
-end_offset(u64 start, u64 len)
-{
-	u64 end;
-
-	end = start + len;
-	return end >= start ? end : NFS4_MAX_UINT64;
-}
-
-static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
-			   struct page ***p_pages, unsigned *p_pgbase,
-			   u64 offset, unsigned long count)
-{
-	u64 lseg_end_offset;
-
-	BUG_ON(offset < lseg->pls_range.offset);
-	lseg_end_offset = end_offset(lseg->pls_range.offset,
-				     lseg->pls_range.length);
-	BUG_ON(offset >= lseg_end_offset);
-	WARN_ON(offset + count > lseg_end_offset);
-
-	if (*p_pgbase > PAGE_SIZE) {
-		dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
-		*p_pages += *p_pgbase >> PAGE_SHIFT;
-		*p_pgbase &= ~PAGE_MASK;
-	}
-}
-
-/*
- * I/O done common code
- */
-static void
-objlayout_iodone(struct objlayout_io_res *oir)
-{
-	if (likely(oir->status >= 0)) {
-		objio_free_result(oir);
-	} else {
-		struct objlayout *objlay = oir->objlay;
-
-		spin_lock(&objlay->lock);
-		objlay->delta_space_valid = OBJ_DSU_INVALID;
-		list_add(&objlay->err_list, &oir->err_list);
-		spin_unlock(&objlay->lock);
-	}
-}
-
-/*
- * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
- *
- * The @index component IO failed (error returned from target). Register
- * the error for later reporting at layout-return.
- */
-void
-objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
-			struct pnfs_osd_objid *pooid, int osd_error,
-			u64 offset, u64 length, bool is_write)
-{
-	struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
-
-	BUG_ON(index >= oir->num_comps);
-	if (osd_error) {
-		ioerr->oer_component = *pooid;
-		ioerr->oer_comp_offset = offset;
-		ioerr->oer_comp_length = length;
-		ioerr->oer_iswrite = is_write;
-		ioerr->oer_errno = osd_error;
-
-		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
-			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
-			__func__, index, ioerr->oer_errno,
-			ioerr->oer_iswrite,
-			_DEVID_LO(&ioerr->oer_component.oid_device_id),
-			_DEVID_HI(&ioerr->oer_component.oid_device_id),
-			ioerr->oer_component.oid_partition_id,
-			ioerr->oer_component.oid_object_id,
-			ioerr->oer_comp_offset,
-			ioerr->oer_comp_length);
-	} else {
-		/* User need not call if no error is reported */
-		ioerr->oer_errno = 0;
-	}
-}
-
-/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
- * This is because the osd completion is called with ints-off from
- * the block layer
- */
-static void _rpc_read_complete(struct work_struct *work)
-{
-	struct rpc_task *task;
-	struct nfs_pgio_header *hdr;
-
-	dprintk("%s enter\n", __func__);
-	task = container_of(work, struct rpc_task, u.tk_work);
-	hdr = container_of(task, struct nfs_pgio_header, task);
-
-	pnfs_ld_read_done(hdr);
-}
-
-void
-objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
-{
-	struct nfs_pgio_header *hdr = oir->rpcdata;
-
-	oir->status = hdr->task.tk_status = status;
-	if (status >= 0)
-		hdr->res.count = status;
-	else
-		hdr->pnfs_error = status;
-	objlayout_iodone(oir);
-	/* must not use oir after this point */
-
-	dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
-		status, hdr->res.eof, sync);
-
-	if (sync)
-		pnfs_ld_read_done(hdr);
-	else {
-		INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
-		schedule_work(&hdr->task.u.tk_work);
-	}
-}
-
-/*
- * Perform sync or async reads.
- */
-enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_pgio_header *hdr)
-{
-	struct inode *inode = hdr->inode;
-	loff_t offset = hdr->args.offset;
-	size_t count = hdr->args.count;
-	int err;
-	loff_t eof;
-
-	eof = i_size_read(inode);
-	if (unlikely(offset + count > eof)) {
-		if (offset >= eof) {
-			err = 0;
-			hdr->res.count = 0;
-			hdr->res.eof = 1;
-			/*FIXME: do we need to call pnfs_ld_read_done() */
-			goto out;
-		}
-		count = eof - offset;
-	}
-
-	hdr->res.eof = (offset + count) >= eof;
-	_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
-			      &hdr->args.pgbase,
-			      hdr->args.offset, hdr->args.count);
-
-	dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n",
-		__func__, inode->i_ino, offset, count, hdr->res.eof);
-
-	err = objio_read_pagelist(hdr);
- out:
-	if (unlikely(err)) {
-		hdr->pnfs_error = err;
-		dprintk("%s: Returned Error %d\n", __func__, err);
-		return PNFS_NOT_ATTEMPTED;
-	}
-	return PNFS_ATTEMPTED;
-}
-
-/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
- * This is because the osd completion is called with ints-off from
- * the block layer
- */
-static void _rpc_write_complete(struct work_struct *work)
-{
-	struct rpc_task *task;
-	struct nfs_pgio_header *hdr;
-
-	dprintk("%s enter\n", __func__);
-	task = container_of(work, struct rpc_task, u.tk_work);
-	hdr = container_of(task, struct nfs_pgio_header, task);
-
-	pnfs_ld_write_done(hdr);
-}
-
-void
-objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
-{
-	struct nfs_pgio_header *hdr = oir->rpcdata;
-
-	oir->status = hdr->task.tk_status = status;
-	if (status >= 0) {
-		hdr->res.count = status;
-		hdr->verf.committed = oir->committed;
-	} else {
-		hdr->pnfs_error = status;
-	}
-	objlayout_iodone(oir);
-	/* must not use oir after this point */
-
-	dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
-		status, hdr->verf.committed, sync);
-
-	if (sync)
-		pnfs_ld_write_done(hdr);
-	else {
-		INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
-		schedule_work(&hdr->task.u.tk_work);
-	}
-}
-
-/*
- * Perform sync or async writes.
- */
-enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
-{
-	int err;
-
-	_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
-			      &hdr->args.pgbase,
-			      hdr->args.offset, hdr->args.count);
-
-	err = objio_write_pagelist(hdr, how);
-	if (unlikely(err)) {
-		hdr->pnfs_error = err;
-		dprintk("%s: Returned Error %d\n", __func__, err);
-		return PNFS_NOT_ATTEMPTED;
-	}
-	return PNFS_ATTEMPTED;
-}
-
-void
-objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
-			      struct xdr_stream *xdr,
-			      const struct nfs4_layoutcommit_args *args)
-{
-	struct objlayout *objlay = OBJLAYOUT(pnfslay);
-	struct pnfs_osd_layoutupdate lou;
-	__be32 *start;
-
-	dprintk("%s: Begin\n", __func__);
-
-	spin_lock(&objlay->lock);
-	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
-	lou.dsu_delta = objlay->delta_space_used;
-	objlay->delta_space_used = 0;
-	objlay->delta_space_valid = OBJ_DSU_INIT;
-	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
-	spin_unlock(&objlay->lock);
-
-	start = xdr_reserve_space(xdr, 4);
-
-	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
-
-	*start = cpu_to_be32((xdr->p - start - 1) * 4);
-
-	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
-		lou.dsu_delta, lou.olu_ioerr_flag);
-}
-
-static int
-err_prio(u32 oer_errno)
-{
-	switch (oer_errno) {
-	case 0:
-		return 0;
-
-	case PNFS_OSD_ERR_RESOURCE:
-		return OSD_ERR_PRI_RESOURCE;
-	case PNFS_OSD_ERR_BAD_CRED:
-		return OSD_ERR_PRI_BAD_CRED;
-	case PNFS_OSD_ERR_NO_ACCESS:
-		return OSD_ERR_PRI_NO_ACCESS;
-	case PNFS_OSD_ERR_UNREACHABLE:
-		return OSD_ERR_PRI_UNREACHABLE;
-	case PNFS_OSD_ERR_NOT_FOUND:
-		return OSD_ERR_PRI_NOT_FOUND;
-	case PNFS_OSD_ERR_NO_SPACE:
-		return OSD_ERR_PRI_NO_SPACE;
-	default:
-		WARN_ON(1);
-		/* fallthrough */
-	case PNFS_OSD_ERR_EIO:
-		return OSD_ERR_PRI_EIO;
-	}
-}
-
-static void
-merge_ioerr(struct pnfs_osd_ioerr *dest_err,
-	    const struct pnfs_osd_ioerr *src_err)
-{
-	u64 dest_end, src_end;
-
-	if (!dest_err->oer_errno) {
-		*dest_err = *src_err;
-		/* accumulated device must be blank */
-		memset(&dest_err->oer_component.oid_device_id, 0,
-			sizeof(dest_err->oer_component.oid_device_id));
-
-		return;
-	}
-
-	if (dest_err->oer_component.oid_partition_id !=
-				src_err->oer_component.oid_partition_id)
-		dest_err->oer_component.oid_partition_id = 0;
-
-	if (dest_err->oer_component.oid_object_id !=
-				src_err->oer_component.oid_object_id)
-		dest_err->oer_component.oid_object_id = 0;
-
-	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
-		dest_err->oer_comp_offset = src_err->oer_comp_offset;
-
-	dest_end = end_offset(dest_err->oer_comp_offset,
-			      dest_err->oer_comp_length);
-	src_end =  end_offset(src_err->oer_comp_offset,
-			      src_err->oer_comp_length);
-	if (dest_end < src_end)
-		dest_end = src_end;
-
-	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
-
-	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
-	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
-			dest_err->oer_errno = src_err->oer_errno;
-	} else if (src_err->oer_iswrite) {
-		dest_err->oer_iswrite = true;
-		dest_err->oer_errno = src_err->oer_errno;
-	}
-}
-
-static void
-encode_accumulated_error(struct objlayout *objlay, __be32 *p)
-{
-	struct objlayout_io_res *oir, *tmp;
-	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
-
-	list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
-		unsigned i;
-
-		for (i = 0; i < oir->num_comps; i++) {
-			struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
-
-			if (!ioerr->oer_errno)
-				continue;
-
-			printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
-				"is_write=%d dev(%llx:%llx) par=0x%llx "
-				"obj=0x%llx offset=0x%llx length=0x%llx\n",
-				__func__, i, ioerr->oer_errno,
-				ioerr->oer_iswrite,
-				_DEVID_LO(&ioerr->oer_component.oid_device_id),
-				_DEVID_HI(&ioerr->oer_component.oid_device_id),
-				ioerr->oer_component.oid_partition_id,
-				ioerr->oer_component.oid_object_id,
-				ioerr->oer_comp_offset,
-				ioerr->oer_comp_length);
-
-			merge_ioerr(&accumulated_err, ioerr);
-		}
-		list_del(&oir->err_list);
-		objio_free_result(oir);
-	}
-
-	pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
-}
-
-void
-objlayout_encode_layoutreturn(struct xdr_stream *xdr,
-			      const struct nfs4_layoutreturn_args *args)
-{
-	struct pnfs_layout_hdr *pnfslay = args->layout;
-	struct objlayout *objlay = OBJLAYOUT(pnfslay);
-	struct objlayout_io_res *oir, *tmp;
-	__be32 *start;
-
-	dprintk("%s: Begin\n", __func__);
-	start = xdr_reserve_space(xdr, 4);
-	BUG_ON(!start);
-
-	spin_lock(&objlay->lock);
-
-	list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
-		__be32 *last_xdr = NULL, *p;
-		unsigned i;
-		int res = 0;
-
-		for (i = 0; i < oir->num_comps; i++) {
-			struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
-
-			if (!ioerr->oer_errno)
-				continue;
-
-			dprintk("%s: err[%d]: errno=%d is_write=%d "
-				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
-				"offset=0x%llx length=0x%llx\n",
-				__func__, i, ioerr->oer_errno,
-				ioerr->oer_iswrite,
-				_DEVID_LO(&ioerr->oer_component.oid_device_id),
-				_DEVID_HI(&ioerr->oer_component.oid_device_id),
-				ioerr->oer_component.oid_partition_id,
-				ioerr->oer_component.oid_object_id,
-				ioerr->oer_comp_offset,
-				ioerr->oer_comp_length);
-
-			p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
-			if (unlikely(!p)) {
-				res = -E2BIG;
-				break; /* accumulated_error */
-			}
-
-			last_xdr = p;
-			pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
-		}
-
-		/* TODO: use xdr_write_pages */
-		if (unlikely(res)) {
-			/* no space for even one error descriptor */
-			BUG_ON(!last_xdr);
-
-			/* we've encountered a situation with lots and lots of
-			 * errors and no space to encode them all. Use the last
-			 * available slot to report the union of all the
-			 * remaining errors.
-			 */
-			encode_accumulated_error(objlay, last_xdr);
-			goto loop_done;
-		}
-		list_del(&oir->err_list);
-		objio_free_result(oir);
-	}
-loop_done:
-	spin_unlock(&objlay->lock);
-
-	*start = cpu_to_be32((xdr->p - start - 1) * 4);
-	dprintk("%s: Return\n", __func__);
-}
-
-enum {
-	OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
-	OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
-	OSD_LOGIN_UPCALL_PATHLEN  = 256
-};
-
-static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
-
-module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
-		    0600);
-MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
-
-struct __auto_login {
-	char uri[OBJLAYOUT_MAX_URI_LEN];
-	char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
-	char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
-};
-
-static int __objlayout_upcall(struct __auto_login *login)
-{
-	static char *envp[] = { "HOME=/",
-		"TERM=linux",
-		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
-		NULL
-	};
-	char *argv[8];
-	int ret;
-
-	if (unlikely(!osd_login_prog[0])) {
-		dprintk("%s: osd_login_prog is disabled\n", __func__);
-		return -EACCES;
-	}
-
-	dprintk("%s uri: %s\n", __func__, login->uri);
-	dprintk("%s osdname %s\n", __func__, login->osdname);
-	dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
-
-	argv[0] = (char *)osd_login_prog;
-	argv[1] = "-u";
-	argv[2] = login->uri;
-	argv[3] = "-o";
-	argv[4] = login->osdname;
-	argv[5] = "-s";
-	argv[6] = login->systemid_hex;
-	argv[7] = NULL;
-
-	ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
-	/*
-	 * Disable the upcall mechanism if we're getting an ENOENT or
-	 * EACCES error. The admin can re-enable it on the fly by using
-	 * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
-	 * the problem has been fixed.
-	 */
-	if (ret == -ENOENT || ret == -EACCES) {
-		printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
-			"objlayoutdriver.osd_login_prog kernel parameter!\n",
-			osd_login_prog);
-		osd_login_prog[0] = '\0';
-	}
-	dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
-
-	return ret;
-}
-
-/* Assume dest is all zeros */
-static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
-					   char *dest, int max_len,
-					   const char *var_name)
-{
-	if (!s.len)
-		return;
-
-	if (s.len >= max_len) {
-		pr_warn_ratelimited(
-			"objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
-			var_name, s.len, max_len);
-		s.len = max_len - 1; /* space for null terminator */
-	}
-
-	memcpy(dest, s.data, s.len);
-}
-
-/* Assume sysid is all zeros */
-static void _sysid_2_hex(struct nfs4_string s,
-		  char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
-{
-	int i;
-	char *cur;
-
-	if (!s.len)
-		return;
-
-	if (s.len != OSD_SYSTEMID_LEN) {
-		pr_warn_ratelimited(
-		    "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
-		    s.len);
-		if (s.len > OSD_SYSTEMID_LEN)
-			s.len = OSD_SYSTEMID_LEN;
-	}
-
-	cur = sysid;
-	for (i = 0; i < s.len; i++)
-		cur = hex_byte_pack(cur, s.data[i]);
-}
-
-int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
-{
-	int rc;
-	struct __auto_login login;
-
-	if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
-		return -ENODEV;
-
-	memset(&login, 0, sizeof(login));
-	__copy_nfsS_and_zero_terminate(
-		deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
-		login.uri, sizeof(login.uri), "URI");
-
-	__copy_nfsS_and_zero_terminate(
-		deviceaddr->oda_osdname,
-		login.osdname, sizeof(login.osdname), "OSDNAME");
-
-	_sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
-
-	rc = __objlayout_upcall(&login);
-	if (rc > 0) /* script returns positive values */
-		rc = -ENODEV;
-
-	return rc;
-}
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
deleted file mode 100644
index fc94a5872ed4..000000000000
--- a/fs/nfs/objlayout/objlayout.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- *  Data types and function declerations for interfacing with the
- *  pNFS standard object layout driver.
- *
- *  Copyright (C) 2007 Panasas Inc. [year of first publication]
- *  All rights reserved.
- *
- *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <ooo@electrozaur.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  See the file COPYING included with this distribution for more details.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _OBJLAYOUT_H
-#define _OBJLAYOUT_H
-
-#include <linux/nfs_fs.h>
-#include <linux/pnfs_osd_xdr.h>
-#include "../pnfs.h"
-
-/*
- * per-inode layout
- */
-struct objlayout {
-	struct pnfs_layout_hdr pnfs_layout;
-
-	 /* for layout_commit */
-	enum osd_delta_space_valid_enum {
-		OBJ_DSU_INIT = 0,
-		OBJ_DSU_VALID,
-		OBJ_DSU_INVALID,
-	} delta_space_valid;
-	s64 delta_space_used;  /* consumed by write ops */
-
-	 /* for layout_return */
-	spinlock_t lock;
-	struct list_head err_list;
-};
-
-static inline struct objlayout *
-OBJLAYOUT(struct pnfs_layout_hdr *lo)
-{
-	return container_of(lo, struct objlayout, pnfs_layout);
-}
-
-/*
- * per-I/O operation state
- * embedded in objects provider io_state data structure
- */
-struct objlayout_io_res {
-	struct objlayout *objlay;
-
-	void *rpcdata;
-	int status;             /* res */
-	int committed;          /* res */
-
-	/* Error reporting (layout_return) */
-	struct list_head err_list;
-	unsigned num_comps;
-	/* Pointer to array of error descriptors of size num_comps.
-	 * It should contain as many entries as devices in the osd_layout
-	 * that participate in the I/O. It is up to the io_engine to allocate
-	 * needed space and set num_comps.
-	 */
-	struct pnfs_osd_ioerr *ioerrs;
-};
-
-static inline
-void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps,
-			struct pnfs_osd_ioerr *ioerrs, void *rpcdata,
-			struct pnfs_layout_hdr *pnfs_layout_type)
-{
-	oir->objlay = OBJLAYOUT(pnfs_layout_type);
-	oir->rpcdata = rpcdata;
-	INIT_LIST_HEAD(&oir->err_list);
-	oir->num_comps = num_comps;
-	oir->ioerrs = ioerrs;
-}
-
-/*
- * Raid engine I/O API
- */
-extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
-	struct pnfs_layout_hdr *pnfslay,
-	struct pnfs_layout_range *range,
-	struct xdr_stream *xdr,
-	gfp_t gfp_flags);
-extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
-
-/* objio_free_result will free these @oir structs received from
- * objlayout_{read,write}_done
- */
-extern void objio_free_result(struct objlayout_io_res *oir);
-
-extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
-extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
-
-/*
- * callback API
- */
-extern void objlayout_io_set_result(struct objlayout_io_res *oir,
-			unsigned index, struct pnfs_osd_objid *pooid,
-			int osd_error, u64 offset, u64 length, bool is_write);
-
-static inline void
-objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used)
-{
-	/* If one of the I/Os errored out and the delta_space_used was
-	 * invalid we render the complete report as invalid. Protocol mandate
-	 * the DSU be accurate or not reported.
-	 */
-	spin_lock(&objlay->lock);
-	if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
-		objlay->delta_space_valid = OBJ_DSU_VALID;
-		objlay->delta_space_used += space_used;
-	}
-	spin_unlock(&objlay->lock);
-}
-
-extern void objlayout_read_done(struct objlayout_io_res *oir,
-				ssize_t status, bool sync);
-extern void objlayout_write_done(struct objlayout_io_res *oir,
-				 ssize_t status, bool sync);
-
-/*
- * exported generic objects function vectors
- */
-
-extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags);
-extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *);
-
-extern struct pnfs_layout_segment *objlayout_alloc_lseg(
-	struct pnfs_layout_hdr *,
-	struct nfs4_layoutget_res *,
-	gfp_t gfp_flags);
-extern void objlayout_free_lseg(struct pnfs_layout_segment *);
-
-extern enum pnfs_try_status objlayout_read_pagelist(
-	struct nfs_pgio_header *);
-
-extern enum pnfs_try_status objlayout_write_pagelist(
-	struct nfs_pgio_header *,
-	int how);
-
-extern void objlayout_encode_layoutcommit(
-	struct pnfs_layout_hdr *,
-	struct xdr_stream *,
-	const struct nfs4_layoutcommit_args *);
-
-extern void objlayout_encode_layoutreturn(
-	struct xdr_stream *,
-	const struct nfs4_layoutreturn_args *);
-
-extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr);
-
-#endif /* _OBJLAYOUT_H */
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
deleted file mode 100644
index f093c7ec983b..000000000000
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- *  Object-Based pNFS Layout XDR layer
- *
- *  Copyright (C) 2007 Panasas Inc. [year of first publication]
- *  All rights reserved.
- *
- *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <ooo@electrozaur.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  See the file COPYING included with this distribution for more details.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/pnfs_osd_xdr.h>
-
-#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
-
-/*
- * The following implementation is based on RFC5664
- */
-
-/*
- * struct pnfs_osd_objid {
- *	struct nfs4_deviceid	oid_device_id;
- *	u64			oid_partition_id;
- *	u64			oid_object_id;
- * }; // xdr size 32 bytes
- */
-static __be32 *
-_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
-{
-	p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data,
-				    sizeof(objid->oid_device_id.data));
-
-	p = xdr_decode_hyper(p, &objid->oid_partition_id);
-	p = xdr_decode_hyper(p, &objid->oid_object_id);
-	return p;
-}
-/*
- * struct pnfs_osd_opaque_cred {
- *	u32 cred_len;
- *	void *cred;
- * }; // xdr size [variable]
- * The return pointers are from the xdr buffer
- */
-static int
-_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred,
-			    struct xdr_stream *xdr)
-{
-	__be32 *p = xdr_inline_decode(xdr, 1);
-
-	if (!p)
-		return -EINVAL;
-
-	opaque_cred->cred_len = be32_to_cpu(*p++);
-
-	p = xdr_inline_decode(xdr, opaque_cred->cred_len);
-	if (!p)
-		return -EINVAL;
-
-	opaque_cred->cred = p;
-	return 0;
-}
-
-/*
- * struct pnfs_osd_object_cred {
- *	struct pnfs_osd_objid		oc_object_id;
- *	u32				oc_osd_version;
- *	u32				oc_cap_key_sec;
- *	struct pnfs_osd_opaque_cred	oc_cap_key
- *	struct pnfs_osd_opaque_cred	oc_cap;
- * }; // xdr size 32 + 4 + 4 + [variable] + [variable]
- */
-static int
-_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp,
-			    struct xdr_stream *xdr)
-{
-	__be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4);
-	int ret;
-
-	if (!p)
-		return -EIO;
-
-	p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
-	comp->oc_osd_version = be32_to_cpup(p++);
-	comp->oc_cap_key_sec = be32_to_cpup(p);
-
-	ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr);
-	if (unlikely(ret))
-		return ret;
-
-	ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr);
-	return ret;
-}
-
-/*
- * struct pnfs_osd_data_map {
- *	u32	odm_num_comps;
- *	u64	odm_stripe_unit;
- *	u32	odm_group_width;
- *	u32	odm_group_depth;
- *	u32	odm_mirror_cnt;
- *	u32	odm_raid_algorithm;
- * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4
- */
-static inline int
-_osd_data_map_xdr_sz(void)
-{
-	return 4 + 8 + 4 + 4 + 4 + 4;
-}
-
-static __be32 *
-_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map)
-{
-	data_map->odm_num_comps = be32_to_cpup(p++);
-	p = xdr_decode_hyper(p, &data_map->odm_stripe_unit);
-	data_map->odm_group_width = be32_to_cpup(p++);
-	data_map->odm_group_depth = be32_to_cpup(p++);
-	data_map->odm_mirror_cnt = be32_to_cpup(p++);
-	data_map->odm_raid_algorithm = be32_to_cpup(p++);
-	dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
-		"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
-		__func__,
-		data_map->odm_num_comps,
-		(unsigned long long)data_map->odm_stripe_unit,
-		data_map->odm_group_width,
-		data_map->odm_group_depth,
-		data_map->odm_mirror_cnt,
-		data_map->odm_raid_algorithm);
-	return p;
-}
-
-int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
-	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr)
-{
-	__be32 *p;
-
-	memset(iter, 0, sizeof(*iter));
-
-	p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4);
-	if (unlikely(!p))
-		return -EINVAL;
-
-	p = _osd_xdr_decode_data_map(p, &layout->olo_map);
-	layout->olo_comps_index = be32_to_cpup(p++);
-	layout->olo_num_comps = be32_to_cpup(p++);
-	dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
-		layout->olo_comps_index, layout->olo_num_comps);
-
-	iter->total_comps = layout->olo_num_comps;
-	return 0;
-}
-
-bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
-	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
-	int *err)
-{
-	BUG_ON(iter->decoded_comps > iter->total_comps);
-	if (iter->decoded_comps == iter->total_comps)
-		return false;
-
-	*err = _osd_xdr_decode_object_cred(comp, xdr);
-	if (unlikely(*err)) {
-		dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d "
-			"total_comps=%d\n", __func__, *err,
-			iter->decoded_comps, iter->total_comps);
-		return false; /* stop the loop */
-	}
-	dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx "
-		"key_len=%u cap_len=%u\n",
-		__func__,
-		_DEVID_LO(&comp->oc_object_id.oid_device_id),
-		_DEVID_HI(&comp->oc_object_id.oid_device_id),
-		comp->oc_object_id.oid_partition_id,
-		comp->oc_object_id.oid_object_id,
-		comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
-
-	iter->decoded_comps++;
-	return true;
-}
-
-/*
- * Get Device Information Decoding
- *
- * Note: since Device Information is currently done synchronously, all
- *       variable strings fields are left inside the rpc buffer and are only
- *       pointed to by the pnfs_osd_deviceaddr members. So the read buffer
- *       should not be freed while the returned information is in use.
- */
-/*
- *struct nfs4_string {
- *	unsigned int len;
- *	char *data;
- *}; // size [variable]
- * NOTE: Returned string points to inside the XDR buffer
- */
-static __be32 *
-__read_u8_opaque(__be32 *p, struct nfs4_string *str)
-{
-	str->len = be32_to_cpup(p++);
-	str->data = (char *)p;
-
-	p += XDR_QUADLEN(str->len);
-	return p;
-}
-
-/*
- * struct pnfs_osd_targetid {
- *	u32			oti_type;
- *	struct nfs4_string	oti_scsi_device_id;
- * };// size 4 + [variable]
- */
-static __be32 *
-__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid)
-{
-	u32 oti_type;
-
-	oti_type = be32_to_cpup(p++);
-	targetid->oti_type = oti_type;
-
-	switch (oti_type) {
-	case OBJ_TARGET_SCSI_NAME:
-	case OBJ_TARGET_SCSI_DEVICE_ID:
-		p = __read_u8_opaque(p, &targetid->oti_scsi_device_id);
-	}
-
-	return p;
-}
-
-/*
- * struct pnfs_osd_net_addr {
- *	struct nfs4_string	r_netid;
- *	struct nfs4_string	r_addr;
- * };
- */
-static __be32 *
-__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr)
-{
-	p = __read_u8_opaque(p, &netaddr->r_netid);
-	p = __read_u8_opaque(p, &netaddr->r_addr);
-
-	return p;
-}
-
-/*
- * struct pnfs_osd_targetaddr {
- *	u32				ota_available;
- *	struct pnfs_osd_net_addr	ota_netaddr;
- * };
- */
-static __be32 *
-__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr)
-{
-	u32 ota_available;
-
-	ota_available = be32_to_cpup(p++);
-	targetaddr->ota_available = ota_available;
-
-	if (ota_available)
-		p = __read_net_addr(p, &targetaddr->ota_netaddr);
-
-
-	return p;
-}
-
-/*
- * struct pnfs_osd_deviceaddr {
- *	struct pnfs_osd_targetid	oda_targetid;
- *	struct pnfs_osd_targetaddr	oda_targetaddr;
- *	u8				oda_lun[8];
- *	struct nfs4_string		oda_systemid;
- *	struct pnfs_osd_object_cred	oda_root_obj_cred;
- *	struct nfs4_string		oda_osdname;
- * };
- */
-
-/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does
- * not have an xdr_stream
- */
-static __be32 *
-__read_opaque_cred(__be32 *p,
-			      struct pnfs_osd_opaque_cred *opaque_cred)
-{
-	opaque_cred->cred_len = be32_to_cpu(*p++);
-	opaque_cred->cred = p;
-	return p + XDR_QUADLEN(opaque_cred->cred_len);
-}
-
-static __be32 *
-__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp)
-{
-	p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
-	comp->oc_osd_version = be32_to_cpup(p++);
-	comp->oc_cap_key_sec = be32_to_cpup(p++);
-
-	p = __read_opaque_cred(p, &comp->oc_cap_key);
-	p = __read_opaque_cred(p, &comp->oc_cap);
-	return p;
-}
-
-void pnfs_osd_xdr_decode_deviceaddr(
-	struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p)
-{
-	p = __read_targetid(p, &deviceaddr->oda_targetid);
-
-	p = __read_targetaddr(p, &deviceaddr->oda_targetaddr);
-
-	p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun,
-				    sizeof(deviceaddr->oda_lun));
-
-	p = __read_u8_opaque(p, &deviceaddr->oda_systemid);
-
-	p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred);
-
-	p = __read_u8_opaque(p, &deviceaddr->oda_osdname);
-
-	/* libosd likes this terminated in dbg. It's last, so no problems */
-	deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0;
-}
-
-/*
- * struct pnfs_osd_layoutupdate {
- *	u32	dsu_valid;
- *	s64	dsu_delta;
- *	u32	olu_ioerr_flag;
- * }; xdr size 4 + 8 + 4
- */
-int
-pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
-				 struct pnfs_osd_layoutupdate *lou)
-{
-	__be32 *p = xdr_reserve_space(xdr,  4 + 8 + 4);
-
-	if (!p)
-		return -E2BIG;
-
-	*p++ = cpu_to_be32(lou->dsu_valid);
-	if (lou->dsu_valid)
-		p = xdr_encode_hyper(p, lou->dsu_delta);
-	*p++ = cpu_to_be32(lou->olu_ioerr_flag);
-	return 0;
-}
-
-/*
- * struct pnfs_osd_objid {
- *	struct nfs4_deviceid	oid_device_id;
- *	u64			oid_partition_id;
- *	u64			oid_object_id;
- * }; // xdr size 32 bytes
- */
-static inline __be32 *
-pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id)
-{
-	p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
-				    sizeof(object_id->oid_device_id.data));
-	p = xdr_encode_hyper(p, object_id->oid_partition_id);
-	p = xdr_encode_hyper(p, object_id->oid_object_id);
-
-	return p;
-}
-
-/*
- * struct pnfs_osd_ioerr {
- *	struct pnfs_osd_objid	oer_component;
- *	u64			oer_comp_offset;
- *	u64			oer_comp_length;
- *	u32			oer_iswrite;
- *	u32			oer_errno;
- * }; // xdr size 32 + 24 bytes
- */
-void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr)
-{
-	p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component);
-	p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
-	p = xdr_encode_hyper(p, ioerr->oer_comp_length);
-	*p++ = cpu_to_be32(ioerr->oer_iswrite);
-	*p   = cpu_to_be32(ioerr->oer_errno);
-}
-
-__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr)
-{
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, 32 + 24);
-	if (unlikely(!p))
-		dprintk("%s: out of xdr space\n", __func__);
-
-	return p;
-}
-- 
cgit v1.2.3


From 73504740df253b653ba7df4bd98e170b72cfd58b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 20 Apr 2017 16:48:14 -0400
Subject: pNFS: Remove unused layout driver callbacks

encode_layoutreturn and encode_layoutcommit are now unused. Let's
remove them.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4xdr.c | 17 ++++-------------
 fs/nfs/pnfs.h    |  5 -----
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 80ce289eea05..125212588115 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2005,16 +2005,10 @@ encode_layoutcommit(struct xdr_stream *xdr,
 	*p++ = cpu_to_be32(0); /* Never send time_modify_changed */
 	*p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
 
-	if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) {
-		NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
-			NFS_I(inode)->layout, xdr, args);
-	} else {
-		encode_uint32(xdr, args->layoutupdate_len);
-		if (args->layoutupdate_pages) {
-			xdr_write_pages(xdr, args->layoutupdate_pages, 0,
-					args->layoutupdate_len);
-		}
-	}
+	encode_uint32(xdr, args->layoutupdate_len);
+	if (args->layoutupdate_pages)
+		xdr_write_pages(xdr, args->layoutupdate_pages, 0,
+				args->layoutupdate_len);
 
 	return 0;
 }
@@ -2024,7 +2018,6 @@ encode_layoutreturn(struct xdr_stream *xdr,
 		    const struct nfs4_layoutreturn_args *args,
 		    struct compound_hdr *hdr)
 {
-	const struct pnfs_layoutdriver_type *lr_ops = NFS_SERVER(args->inode)->pnfs_curr_ld;
 	__be32 *p;
 
 	encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr);
@@ -2041,8 +2034,6 @@ encode_layoutreturn(struct xdr_stream *xdr,
 	spin_unlock(&args->inode->i_lock);
 	if (args->ld_private->ops && args->ld_private->ops->encode)
 		args->ld_private->ops->encode(xdr, args, args->ld_private);
-	else if (lr_ops->encode_layoutreturn)
-		lr_ops->encode_layoutreturn(xdr, args);
 	else
 		encode_uint32(xdr, 0);
 }
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 590e1e35781f..825a1b8ddc4f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -173,14 +173,9 @@ struct pnfs_layoutdriver_type {
 			gfp_t gfp_flags);
 
 	int (*prepare_layoutreturn) (struct nfs4_layoutreturn_args *);
-	void (*encode_layoutreturn) (struct xdr_stream *xdr,
-				     const struct nfs4_layoutreturn_args *args);
 
 	void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data);
 	int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args);
-	void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
-				     struct xdr_stream *xdr,
-				     const struct nfs4_layoutcommit_args *args);
 	int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
 };
 
-- 
cgit v1.2.3


From b94196888feaedc93d659572f556207f1564acd6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 20 Apr 2017 16:53:58 -0400
Subject: pNFS: Unexport pnfs_put_lseg_locked and _pnfs_return_layout

They are not used outside the NFSv4 module.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index dd042498ce7c..3917a6272789 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -563,7 +563,6 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg)
 		}
 	}
 }
-EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked);
 
 /*
  * is l2 fully contained in l1?
@@ -1209,7 +1208,6 @@ out:
 	dprintk("<-- %s status: %d\n", __func__, status);
 	return status;
 }
-EXPORT_SYMBOL_GPL(_pnfs_return_layout);
 
 int
 pnfs_commit_and_return_layout(struct inode *inode)
-- 
cgit v1.2.3


From 675e508f53e2cc0b1ab750a0ff2b477ccbab4cfb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 20 Apr 2017 16:58:50 -0400
Subject: pNFS: unexport nfs4_pnfs_v3_ds_connect_unload

It is not used outside the NFSv4 module.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs_nfs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 1edf5b84aba5..dc4111d08a27 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -613,7 +613,6 @@ void nfs4_pnfs_v3_ds_connect_unload(void)
 		get_v3_ds_connect = NULL;
 	}
 }
-EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload);
 
 static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 				 struct nfs4_pnfs_ds *ds,
-- 
cgit v1.2.3


From a98e1788b005fbbf3f74f6f2325c743872dc583f Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 21 Apr 2017 03:06:06 +0200
Subject: NET: sb1250-mac: Add missing MODULE_LICENSE()

As per comment, the code has always been GPLv2 licensed.

This fixes the follwoing modpost warning:

WARNING: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/broadcom/sb1250-mac.o

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/net/ethernet/broadcom/sb1250-mac.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 55c8e25b43d9..16a0f192daec 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2641,3 +2641,4 @@ static struct platform_driver sbmac_driver = {
 };
 
 module_platform_driver(sbmac_driver);
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From c1d590081e3f77b364e8563d3edf83ca245a51d9 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 21 Apr 2017 03:14:51 +0200
Subject: MIPS: Sibyte: Export symbol periph_rev to sb1250-mac network driver.

This fixes the following modpost error:

ERROR: "periph_rev" [drivers/net/ethernet/broadcom/sb1250-mac.ko] undefined!

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sibyte/bcm1480/setup.c | 1 +
 arch/mips/sibyte/sb1250/setup.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/mips/sibyte/bcm1480/setup.c b/arch/mips/sibyte/bcm1480/setup.c
index a05246cbf54c..2035aaec8514 100644
--- a/arch/mips/sibyte/bcm1480/setup.c
+++ b/arch/mips/sibyte/bcm1480/setup.c
@@ -36,6 +36,7 @@ unsigned int soc_pass;
 unsigned int soc_type;
 EXPORT_SYMBOL(soc_type);
 unsigned int periph_rev;
+EXPORT_SYMBOL_GPL(periph_rev);
 unsigned int zbbus_mhz;
 EXPORT_SYMBOL(zbbus_mhz);
 
diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c
index 90e43782342b..aa7713adfa58 100644
--- a/arch/mips/sibyte/sb1250/setup.c
+++ b/arch/mips/sibyte/sb1250/setup.c
@@ -34,6 +34,7 @@ unsigned int soc_pass;
 unsigned int soc_type;
 EXPORT_SYMBOL(soc_type);
 unsigned int periph_rev;
+EXPORT_SYMBOL_GPL(periph_rev);
 unsigned int zbbus_mhz;
 EXPORT_SYMBOL(zbbus_mhz);
 
-- 
cgit v1.2.3


From 3e441845caf1c9591b5b961f34ff1a37d023c9e2 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 21 Apr 2017 03:28:18 +0200
Subject: MIPS: Sibyte: Fix Kconfig warning.

warning: (SB1XXX_CORELIS) selects DEBUG_INFO which has unmet direct dependencies (DEBUG_KERNEL &amp;&amp; !COMPILE_TEST)

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 7f975b20b20c..42a97c59200f 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -82,7 +82,7 @@ config CMDLINE_OVERRIDE
 config SB1XXX_CORELIS
 	bool "Corelis Debugger"
 	depends on SIBYTE_SB1xxx_SOC
-	select DEBUG_INFO
+	select DEBUG_INFO if !COMPILE_TEST
 	help
 	  Select compile flags that produce code that can be processed by the
 	  Corelis mksym utility and UDB Emulator.
-- 
cgit v1.2.3


From 6ade8694f471d847500c7cec152cc15171cef5d5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 20 Apr 2017 17:30:06 -0700
Subject: kvm: Move srcu_struct fields to end of struct kvm

Parallelizing SRCU callback handling will increase the size of
srcu_struct, which will move the kvm structure's kvm_arch field out
of reach of powerpc's current assembly code, which will result in the
following sort of build error:

arch/powerpc/kvm/book3s_hv_rmhandlers.S:617: Error: operand out of range (0x000000000000b328 is not between 0xffffffffffff8000 and 0x0000000000007fff)

This commit moves the srcu_struct fields in the kvm structure to follow
the kvm_arch field, which will allow powerpc's assembly code to continue
to be able to reach the kvm_arch field.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: Michael Ellerman <michaele@au1.ibm.com>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
[ paulmck: Moved this commit to precede SRCU callback parallelization,
  and reworded the commit log into future tense, all in the name of
  bisectability. ]
---
 include/linux/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c14ad9809da..96c8e29c6442 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -375,8 +375,6 @@ struct kvm {
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
-	struct srcu_struct srcu;
-	struct srcu_struct irq_srcu;
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 
 	/*
@@ -429,6 +427,8 @@ struct kvm {
 	struct list_head devices;
 	struct dentry *debugfs_dentry;
 	struct kvm_stat_data **debugfs_stat_data;
+	struct srcu_struct srcu;
+	struct srcu_struct irq_srcu;
 };
 
 #define kvm_err(fmt, ...) \
-- 
cgit v1.2.3


From da915ad5cf25b5f5d358dd3670c3378d8ae8c03e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 5 Apr 2017 09:01:53 -0700
Subject: srcu: Parallelize callback handling

Peter Zijlstra proposed using SRCU to reduce mmap_sem contention [1,2],
however, there are workloads that could result in a high volume of
concurrent invocations of call_srcu(), which with current SRCU would
result in excessive lock contention on the srcu_struct structure's
->queue_lock, which protects SRCU's callback lists.  This commit therefore
moves SRCU to per-CPU callback lists, thus greatly reducing contention.

Because a given SRCU instance no longer has a single centralized callback
list, starting grace periods and invoking callbacks are both more complex
than in the single-list Classic SRCU implementation.  Starting grace
periods and handling callbacks are now handled using an srcu_node tree
that is in some ways similar to the rcu_node trees used by RCU-bh,
RCU-preempt, and RCU-sched (for example, the srcu_node tree shape is
controlled by exactly the same Kconfig options and boot parameters that
control the shape of the rcu_node tree).

In addition, the old per-CPU srcu_array structure is now named srcu_data
and contains an rcu_segcblist structure named ->srcu_cblist for its
callbacks (and a spinlock to protect this).  The srcu_struct gets
an srcu_gp_seq that is used to associate callback segments with the
corresponding completion-time grace-period number.  These completion-time
grace-period numbers are propagated up the srcu_node tree so that the
grace-period workqueue handler can determine whether additional grace
periods are needed on the one hand and where to look for callbacks that
are ready to be invoked.

The srcu_barrier() function must now wait on all instances of the per-CPU
->srcu_cblist.  Because each ->srcu_cblist is protected by ->lock,
srcu_barrier() can remotely add the needed callbacks.  In theory,
it could also remotely start grace periods, but in practice doing so
is complex and racy.  And interestingly enough, it is never necessary
for srcu_barrier() to start a grace period because srcu_barrier() only
enqueues a callback when a callback is already present--and it turns out
that a grace period has to have already been started for this pre-existing
callback.  Furthermore, it is only the callback that srcu_barrier()
needs to wait on, not any particular grace period.  Therefore, a new
rcu_segcblist_entrain() function enqueues the srcu_barrier() function's
callback into the same segment occupied by the last pre-existing callback
in the list.  The special case where all the pre-existing callbacks are
on a different list (because they are in the process of being invoked)
is handled by enqueuing srcu_barrier()'s callback into the RCU_DONE_TAIL
segment, relying on the done-callbacks check that takes place after all
callbacks are inovked.

Note that the readers use the same algorithm as before.  Note that there
is a separate srcu_idx that tells the readers what counter to increment.
This unfortunately cannot be combined with srcu_gp_seq because they
need to be incremented at different times.

This commit introduces some ugly #ifdefs in rcutorture.  These will go
away when I feel good enough about Tree SRCU to ditch Classic SRCU.

Some crude performance comparisons, courtesy of a quickly hacked rcuperf
asynchronous-grace-period capability:

			Callback Queuing Overhead
			-------------------------
	# CPUS		Classic SRCU	Tree SRCU
	------          ------------    ---------
	     2              0.349 us     0.342 us
	    16             31.66  us     0.4   us
	    41             ---------     0.417 us

The times are the 90th percentiles, a statistic that was chosen to reject
the overheads of the occasional srcu_barrier() call needed to avoid OOMing
the test machine.  The rcuperf test hangs when running Classic SRCU at 41
CPUs, hence the line of dashes.  Despite the hacks to both the rcuperf code
and that statistics, this is a convincing demonstration of Tree SRCU's
performance and scalability advantages.

[1] https://lwn.net/Articles/309030/
[2] https://patchwork.kernel.org/patch/5108281/

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Fix initialization if synchronize_srcu_expedited() called first. ]
---
 include/linux/rcu_segcblist.h |  42 ++-
 include/linux/srcutree.h      |  80 ++++--
 kernel/rcu/rcutorture.c       |  20 +-
 kernel/rcu/srcutree.c         | 642 +++++++++++++++++++++++++++++++++---------
 kernel/rcu/tree.c             |   6 +
 kernel/rcu/tree.h             |   8 +
 6 files changed, 647 insertions(+), 151 deletions(-)

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 74b1e7243955..ced8f313fd05 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -401,6 +401,37 @@ static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
 	rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
 }
 
+/*
+ * Entrain the specified callback onto the specified rcu_segcblist at
+ * the end of the last non-empty segment.  If the entire rcu_segcblist
+ * is empty, make no change, but return false.
+ *
+ * This is intended for use by rcu_barrier()-like primitives, -not-
+ * for normal grace-period use.  IMPORTANT:  The callback you enqueue
+ * will wait for all prior callbacks, NOT necessarily for a grace
+ * period.  You have been warned.
+ */
+static inline bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
+					 struct rcu_head *rhp, bool lazy)
+{
+	int i;
+
+	if (rcu_segcblist_n_cbs(rsclp) == 0)
+		return false;
+	WRITE_ONCE(rsclp->len, rsclp->len + 1);
+	if (lazy)
+		rsclp->len_lazy++;
+	smp_mb(); /* Ensure counts are updated before callback is entrained. */
+	rhp->next = NULL;
+	for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] != rsclp->tails[i - 1])
+			break;
+	*rsclp->tails[i] = rhp;
+	for (; i <= RCU_NEXT_TAIL; i++)
+		rsclp->tails[i] = &rhp->next;
+	return true;
+}
+
 /*
  * Extract only the counts from the specified rcu_segcblist structure,
  * and place them in the specified rcu_cblist structure.  This function
@@ -537,7 +568,8 @@ static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
 	int i, j;
 
 	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
-	WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
+	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
+		return;
 
 	/*
 	 * Find all callbacks whose ->gp_seq numbers indicate that they
@@ -582,8 +614,9 @@ static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
  * them to complete at the end of the earlier grace period.
  *
  * This function operates on an rcu_segcblist structure, and also the
- * grace-period sequence number at which new callbacks would become
- * ready to invoke.
+ * grace-period sequence number seq at which new callbacks would become
+ * ready to invoke.  Returns true if there are callbacks that won't be
+ * ready to invoke until seq, false otherwise.
  */
 static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
 					    unsigned long seq)
@@ -591,7 +624,8 @@ static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
 	int i;
 
 	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
-	WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
+	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
+		return false;
 
 	/*
 	 * Find the segment preceding the oldest segment of callbacks
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index f2b3bd6c6bc2..0400e211aa44 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -24,25 +24,75 @@
 #ifndef _LINUX_SRCU_TREE_H
 #define _LINUX_SRCU_TREE_H
 
-struct srcu_array {
-	unsigned long lock_count[2];
-	unsigned long unlock_count[2];
+#include <linux/rcu_node_tree.h>
+#include <linux/completion.h>
+
+struct srcu_node;
+struct srcu_struct;
+
+/*
+ * Per-CPU structure feeding into leaf srcu_node, similar in function
+ * to rcu_node.
+ */
+struct srcu_data {
+	/* Read-side state. */
+	unsigned long srcu_lock_count[2];	/* Locks per CPU. */
+	unsigned long srcu_unlock_count[2];	/* Unlocks per CPU. */
+
+	/* Update-side state. */
+	spinlock_t lock ____cacheline_internodealigned_in_smp;
+	struct rcu_segcblist srcu_cblist;	/* List of callbacks.*/
+	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
+	bool srcu_cblist_invoking;		/* Invoking these CBs? */
+	struct delayed_work work;		/* Context for CB invoking. */
+	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
+	struct srcu_node *mynode;		/* Leaf srcu_node. */
+	int cpu;
+	struct srcu_struct *sp;
 };
 
+/*
+ * Node in SRCU combining tree, similar in function to rcu_data.
+ */
+struct srcu_node {
+	spinlock_t lock;
+	unsigned long srcu_have_cbs[4];		/* GP seq for children */
+						/*  having CBs, but only */
+						/*  is > ->srcu_gq_seq. */
+	struct srcu_node *srcu_parent;		/* Next up in tree. */
+	int grplo;				/* Least CPU for node. */
+	int grphi;				/* Biggest CPU for node. */
+};
+
+/*
+ * Per-SRCU-domain structure, similar in function to rcu_state.
+ */
 struct srcu_struct {
-	unsigned long completed;
-	unsigned long srcu_gp_seq;
-	atomic_t srcu_exp_cnt;
-	struct srcu_array __percpu *per_cpu_ref;
-	spinlock_t queue_lock; /* protect ->srcu_cblist */
-	struct rcu_segcblist srcu_cblist;
+	struct srcu_node node[NUM_RCU_NODES];	/* Combining tree. */
+	struct srcu_node *level[RCU_NUM_LVLS + 1];
+						/* First node at each level. */
+	struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */
+	spinlock_t gp_lock;			/* protect ->srcu_cblist */
+	struct mutex srcu_gp_mutex;		/* Serialize GP work. */
+	unsigned int srcu_idx;			/* Current rdr array element. */
+	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
+	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
+	atomic_t srcu_exp_cnt;			/* # ongoing expedited GPs. */
+	struct srcu_data __percpu *sda;		/* Per-CPU srcu_data array. */
+	unsigned long srcu_barrier_seq;		/* srcu_barrier seq #. */
+	struct mutex srcu_barrier_mutex;	/* Serialize barrier ops. */
+	struct completion srcu_barrier_completion;
+						/* Awaken barrier rq at end. */
+	atomic_t srcu_barrier_cpu_cnt;		/* # CPUs not yet posting a */
+						/*  callback for the barrier */
+						/*  operation. */
 	struct delayed_work work;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 };
 
-/* Values for -> state variable. */
+/* Values for state variable (bottom bits of ->srcu_gp_seq). */
 #define SRCU_STATE_IDLE		0
 #define SRCU_STATE_SCAN1	1
 #define SRCU_STATE_SCAN2	2
@@ -51,11 +101,9 @@ void process_srcu(struct work_struct *work);
 
 #define __SRCU_STRUCT_INIT(name)					\
 	{								\
-		.completed = -300,					\
-		.per_cpu_ref = &name##_srcu_array,			\
-		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
-		.srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\
-		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
+		.sda = &name##_srcu_data,				\
+		.gp_lock = __SPIN_LOCK_UNLOCKED(name.gp_lock),		\
+		.srcu_gp_seq_needed = 0 - 1,				\
 		__SRCU_DEP_MAP_INIT(name)				\
 	}
 
@@ -79,7 +127,7 @@ void process_srcu(struct work_struct *work);
  * See include/linux/percpu-defs.h for the rules on per-CPU variables.
  */
 #define __DEFINE_SRCU(name, is_static)					\
-	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
+	static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\
 	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
 #define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
 #define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 6f344b6748a8..e9d4527cdd43 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -563,17 +563,30 @@ static void srcu_torture_stats(void)
 	int idx;
 
 #if defined(CONFIG_TREE_SRCU) || defined(CONFIG_CLASSIC_SRCU)
+#ifdef CONFIG_TREE_SRCU
+	idx = srcu_ctlp->srcu_idx & 0x1;
+#else /* #ifdef CONFIG_TREE_SRCU */
 	idx = srcu_ctlp->completed & 0x1;
+#endif /* #else #ifdef CONFIG_TREE_SRCU */
 	pr_alert("%s%s Tree SRCU per-CPU(idx=%d):",
 		 torture_type, TORTURE_FLAG, idx);
 	for_each_possible_cpu(cpu) {
 		unsigned long l0, l1;
 		unsigned long u0, u1;
 		long c0, c1;
-		struct srcu_array *counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu);
+#ifdef CONFIG_TREE_SRCU
+		struct srcu_data *counts;
 
+		counts = per_cpu_ptr(srcu_ctlp->sda, cpu);
+		u0 = counts->srcu_unlock_count[!idx];
+		u1 = counts->srcu_unlock_count[idx];
+#else /* #ifdef CONFIG_TREE_SRCU */
+		struct srcu_array *counts;
+
+		counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu);
 		u0 = counts->unlock_count[!idx];
 		u1 = counts->unlock_count[idx];
+#endif /* #else #ifdef CONFIG_TREE_SRCU */
 
 		/*
 		 * Make sure that a lock is always counted if the corresponding
@@ -581,8 +594,13 @@ static void srcu_torture_stats(void)
 		 */
 		smp_rmb();
 
+#ifdef CONFIG_TREE_SRCU
+		l0 = counts->srcu_lock_count[!idx];
+		l1 = counts->srcu_lock_count[idx];
+#else /* #ifdef CONFIG_TREE_SRCU */
 		l0 = counts->lock_count[!idx];
 		l1 = counts->lock_count[idx];
+#endif /* #else #ifdef CONFIG_TREE_SRCU */
 
 		c0 = l0 - u0;
 		c1 = l1 - u1;
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index da676b0d016b..12feeca18f46 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -36,19 +36,110 @@
 #include <linux/delay.h>
 #include <linux/srcu.h>
 
-#include <linux/rcu_node_tree.h>
 #include "rcu.h"
 
-static int init_srcu_struct_fields(struct srcu_struct *sp)
+static void srcu_invoke_callbacks(struct work_struct *work);
+static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
+
+/*
+ * Initialize SRCU combining tree.  Note that statically allocated
+ * srcu_struct structures might already have srcu_read_lock() and
+ * srcu_read_unlock() running against them.  So if the is_static parameter
+ * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
+ */
+static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 {
-	sp->completed = 0;
+	int cpu;
+	int i;
+	int level = 0;
+	int levelspread[RCU_NUM_LVLS];
+	struct srcu_data *sdp;
+	struct srcu_node *snp;
+	struct srcu_node *snp_first;
+
+	/* Work out the overall tree geometry. */
+	sp->level[0] = &sp->node[0];
+	for (i = 1; i < rcu_num_lvls; i++)
+		sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1];
+	rcu_init_levelspread(levelspread, num_rcu_lvl);
+
+	/* Each pass through this loop initializes one srcu_node structure. */
+	rcu_for_each_node_breadth_first(sp, snp) {
+		spin_lock_init(&snp->lock);
+		for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++)
+			snp->srcu_have_cbs[i] = 0;
+		snp->grplo = -1;
+		snp->grphi = -1;
+		if (snp == &sp->node[0]) {
+			/* Root node, special case. */
+			snp->srcu_parent = NULL;
+			continue;
+		}
+
+		/* Non-root node. */
+		if (snp == sp->level[level + 1])
+			level++;
+		snp->srcu_parent = sp->level[level - 1] +
+				   (snp - sp->level[level]) /
+				   levelspread[level - 1];
+	}
+
+	/*
+	 * Initialize the per-CPU srcu_data array, which feeds into the
+	 * leaves of the srcu_node tree.
+	 */
+	WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
+		     ARRAY_SIZE(sdp->srcu_unlock_count));
+	level = rcu_num_lvls - 1;
+	snp_first = sp->level[level];
+	for_each_possible_cpu(cpu) {
+		sdp = per_cpu_ptr(sp->sda, cpu);
+		spin_lock_init(&sdp->lock);
+		rcu_segcblist_init(&sdp->srcu_cblist);
+		sdp->srcu_cblist_invoking = false;
+		sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
+		sdp->mynode = &snp_first[cpu / levelspread[level]];
+		for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
+			if (snp->grplo < 0)
+				snp->grplo = cpu;
+			snp->grphi = cpu;
+		}
+		sdp->cpu = cpu;
+		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
+		sdp->sp = sp;
+		if (is_static)
+			continue;
+
+		/* Dynamically allocated, better be no srcu_read_locks()! */
+		for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) {
+			sdp->srcu_lock_count[i] = 0;
+			sdp->srcu_unlock_count[i] = 0;
+		}
+	}
+}
+
+/*
+ * Initialize non-compile-time initialized fields, including the
+ * associated srcu_node and srcu_data structures.  The is_static
+ * parameter is passed through to init_srcu_struct_nodes(), and
+ * also tells us that ->sda has already been wired up to srcu_data.
+ */
+static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
+{
+	mutex_init(&sp->srcu_cb_mutex);
+	mutex_init(&sp->srcu_gp_mutex);
+	sp->srcu_idx = 0;
 	sp->srcu_gp_seq = 0;
 	atomic_set(&sp->srcu_exp_cnt, 0);
-	spin_lock_init(&sp->queue_lock);
-	rcu_segcblist_init(&sp->srcu_cblist);
+	sp->srcu_barrier_seq = 0;
+	mutex_init(&sp->srcu_barrier_mutex);
+	atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
-	sp->per_cpu_ref = alloc_percpu(struct srcu_array);
-	return sp->per_cpu_ref ? 0 : -ENOMEM;
+	if (!is_static)
+		sp->sda = alloc_percpu(struct srcu_data);
+	init_srcu_struct_nodes(sp, is_static);
+	smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */
+	return sp->sda ? 0 : -ENOMEM;
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -59,7 +150,8 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
 	/* Don't re-initialize a lock while it is held. */
 	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
 	lockdep_init_map(&sp->dep_map, name, key, 0);
-	return init_srcu_struct_fields(sp);
+	spin_lock_init(&sp->gp_lock);
+	return init_srcu_struct_fields(sp, false);
 }
 EXPORT_SYMBOL_GPL(__init_srcu_struct);
 
@@ -75,15 +167,41 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct);
  */
 int init_srcu_struct(struct srcu_struct *sp)
 {
-	return init_srcu_struct_fields(sp);
+	spin_lock_init(&sp->gp_lock);
+	return init_srcu_struct_fields(sp, false);
 }
 EXPORT_SYMBOL_GPL(init_srcu_struct);
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 /*
- * Returns approximate total of the readers' ->lock_count[] values for the
- * rank of per-CPU counters specified by idx.
+ * First-use initialization of statically allocated srcu_struct
+ * structure.  Wiring up the combining tree is more than can be
+ * done with compile-time initialization, so this check is added
+ * to each update-side SRCU primitive.  Use ->gp_lock, which -is-
+ * compile-time initialized, to resolve races involving multiple
+ * CPUs trying to garner first-use privileges.
+ */
+static void check_init_srcu_struct(struct srcu_struct *sp)
+{
+	unsigned long flags;
+
+	WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT);
+	/* The smp_load_acquire() pairs with the smp_store_release(). */
+	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
+		return; /* Already initialized. */
+	spin_lock_irqsave(&sp->gp_lock, flags);
+	if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
+		spin_unlock_irqrestore(&sp->gp_lock, flags);
+		return;
+	}
+	init_srcu_struct_fields(sp, true);
+	spin_unlock_irqrestore(&sp->gp_lock, flags);
+}
+
+/*
+ * Returns approximate total of the readers' ->srcu_lock_count[] values
+ * for the rank of per-CPU counters specified by idx.
  */
 static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
 {
@@ -91,16 +209,16 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
 	unsigned long sum = 0;
 
 	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
+		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
 
-		sum += READ_ONCE(cpuc->lock_count[idx]);
+		sum += READ_ONCE(cpuc->srcu_lock_count[idx]);
 	}
 	return sum;
 }
 
 /*
- * Returns approximate total of the readers' ->unlock_count[] values for the
- * rank of per-CPU counters specified by idx.
+ * Returns approximate total of the readers' ->srcu_unlock_count[] values
+ * for the rank of per-CPU counters specified by idx.
  */
 static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
 {
@@ -108,9 +226,9 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
 	unsigned long sum = 0;
 
 	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
+		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
 
-		sum += READ_ONCE(cpuc->unlock_count[idx]);
+		sum += READ_ONCE(cpuc->srcu_unlock_count[idx]);
 	}
 	return sum;
 }
@@ -145,14 +263,14 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
 	 * the current index but not have incremented the lock counter yet.
 	 *
 	 * Possible bug: There is no guarantee that there haven't been
-	 * ULONG_MAX increments of ->lock_count[] since the unlocks were
+	 * ULONG_MAX increments of ->srcu_lock_count[] since the unlocks were
 	 * counted, meaning that this could return true even if there are
 	 * still active readers.  Since there are no memory barriers around
-	 * srcu_flip(), the CPU is not required to increment ->completed
+	 * srcu_flip(), the CPU is not required to increment ->srcu_idx
 	 * before running srcu_readers_unlock_idx(), which means that there
 	 * could be an arbitrarily large number of critical sections that
 	 * execute after srcu_readers_unlock_idx() but use the old value
-	 * of ->completed.
+	 * of ->srcu_idx.
 	 */
 	return srcu_readers_lock_idx(sp, idx) == unlocks;
 }
@@ -172,12 +290,12 @@ static bool srcu_readers_active(struct srcu_struct *sp)
 	unsigned long sum = 0;
 
 	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
+		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
 
-		sum += READ_ONCE(cpuc->lock_count[0]);
-		sum += READ_ONCE(cpuc->lock_count[1]);
-		sum -= READ_ONCE(cpuc->unlock_count[0]);
-		sum -= READ_ONCE(cpuc->unlock_count[1]);
+		sum += READ_ONCE(cpuc->srcu_lock_count[0]);
+		sum += READ_ONCE(cpuc->srcu_lock_count[1]);
+		sum -= READ_ONCE(cpuc->srcu_unlock_count[0]);
+		sum -= READ_ONCE(cpuc->srcu_unlock_count[1]);
 	}
 	return sum;
 }
@@ -193,18 +311,21 @@ static bool srcu_readers_active(struct srcu_struct *sp)
  */
 void cleanup_srcu_struct(struct srcu_struct *sp)
 {
+	int cpu;
+
 	WARN_ON_ONCE(atomic_read(&sp->srcu_exp_cnt));
 	if (WARN_ON(srcu_readers_active(sp)))
 		return; /* Leakage unless caller handles error. */
-	if (WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)))
-		return; /* Leakage unless caller handles error. */
 	flush_delayed_work(&sp->work);
-	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE)) {
-		pr_info("cleanup_srcu_struct: Active srcu_struct %lu CBs %c state: %d\n", rcu_segcblist_n_cbs(&sp->srcu_cblist), ".E"[rcu_segcblist_empty(&sp->srcu_cblist)], rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
+	for_each_possible_cpu(cpu)
+		flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
+	    WARN_ON(srcu_readers_active(sp))) {
+		pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
 		return; /* Caller forgot to stop doing call_srcu()? */
 	}
-	free_percpu(sp->per_cpu_ref);
-	sp->per_cpu_ref = NULL;
+	free_percpu(sp->sda);
+	sp->sda = NULL;
 }
 EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
@@ -217,8 +338,8 @@ int __srcu_read_lock(struct srcu_struct *sp)
 {
 	int idx;
 
-	idx = READ_ONCE(sp->completed) & 0x1;
-	__this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
+	idx = READ_ONCE(sp->srcu_idx) & 0x1;
+	__this_cpu_inc(sp->sda->srcu_lock_count[idx]);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	return idx;
 }
@@ -233,7 +354,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
 	smp_mb(); /* C */  /* Avoid leaking the critical section. */
-	this_cpu_inc(sp->per_cpu_ref->unlock_count[idx]);
+	this_cpu_inc(sp->sda->srcu_unlock_count[idx]);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
@@ -251,19 +372,207 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  */
 static void srcu_gp_start(struct srcu_struct *sp)
 {
+	struct srcu_data *sdp = this_cpu_ptr(sp->sda);
 	int state;
 
-	rcu_segcblist_accelerate(&sp->srcu_cblist,
-				 rcu_seq_snap(&sp->srcu_gp_seq));
+	RCU_LOCKDEP_WARN(!lockdep_is_held(&sp->gp_lock),
+			 "Invoked srcu_gp_start() without ->gp_lock!");
+	WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
+	rcu_segcblist_advance(&sdp->srcu_cblist,
+			      rcu_seq_current(&sp->srcu_gp_seq));
+	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
+				       rcu_seq_snap(&sp->srcu_gp_seq));
 	rcu_seq_start(&sp->srcu_gp_seq);
 	state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
 	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
 }
 
+/*
+ * Track online CPUs to guide callback workqueue placement.
+ */
+DEFINE_PER_CPU(bool, srcu_online);
+
+void srcu_online_cpu(unsigned int cpu)
+{
+	WRITE_ONCE(per_cpu(srcu_online, cpu), true);
+}
+
+void srcu_offline_cpu(unsigned int cpu)
+{
+	WRITE_ONCE(per_cpu(srcu_online, cpu), false);
+}
+
+/*
+ * Place the workqueue handler on the specified CPU if online, otherwise
+ * just run it whereever.  This is useful for placing workqueue handlers
+ * that are to invoke the specified CPU's callbacks.
+ */
+static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+				       struct delayed_work *dwork,
+				       unsigned long delay)
+{
+	bool ret;
+
+	preempt_disable();
+	if (READ_ONCE(per_cpu(srcu_online, cpu)))
+		ret = queue_delayed_work_on(cpu, wq, dwork, delay);
+	else
+		ret = queue_delayed_work(wq, dwork, delay);
+	preempt_enable();
+	return ret;
+}
+
+/*
+ * Schedule callback invocation for the specified srcu_data structure,
+ * if possible, on the corresponding CPU.
+ */
+static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
+{
+	srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq,
+				   &sdp->work, delay);
+}
+
+/*
+ * Schedule callback invocation for all srcu_data structures associated
+ * with the specified srcu_node structure, if possible, on the corresponding
+ * CPUs.
+ */
+static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp)
+{
+	int cpu;
+
+	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++)
+		srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), SRCU_INTERVAL);
+}
+
+/*
+ * Note the end of an SRCU grace period.  Initiates callback invocation
+ * and starts a new grace period if needed.
+ *
+ * The ->srcu_cb_mutex acquisition does not protect any data, but
+ * instead prevents more than one grace period from starting while we
+ * are initiating callback invocation.  This allows the ->srcu_have_cbs[]
+ * array to have a finite number of elements.
+ */
+static void srcu_gp_end(struct srcu_struct *sp)
+{
+	bool cbs;
+	unsigned long gpseq;
+	int idx;
+	int idxnext;
+	struct srcu_node *snp;
+
+	/* Prevent more than one additional grace period. */
+	mutex_lock(&sp->srcu_cb_mutex);
+
+	/* End the current grace period. */
+	spin_lock_irq(&sp->gp_lock);
+	idx = rcu_seq_state(sp->srcu_gp_seq);
+	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
+	rcu_seq_end(&sp->srcu_gp_seq);
+	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
+	spin_unlock_irq(&sp->gp_lock);
+	mutex_unlock(&sp->srcu_gp_mutex);
+	/* A new grace period can start at this point.  But only one. */
+
+	/* Initiate callback invocation as needed. */
+	idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
+	idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
+	rcu_for_each_node_breadth_first(sp, snp) {
+		spin_lock_irq(&snp->lock);
+		cbs = false;
+		if (snp >= sp->level[rcu_num_lvls - 1])
+			cbs = snp->srcu_have_cbs[idx] == gpseq;
+		snp->srcu_have_cbs[idx] = gpseq;
+		rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
+		spin_unlock_irq(&snp->lock);
+		if (cbs) {
+			smp_mb(); /* GP end before CB invocation. */
+			srcu_schedule_cbs_snp(sp, snp);
+		}
+	}
+
+	/* Callback initiation done, allow grace periods after next. */
+	mutex_unlock(&sp->srcu_cb_mutex);
+
+	/* Start a new grace period if needed. */
+	spin_lock_irq(&sp->gp_lock);
+	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
+	if (!rcu_seq_state(gpseq) &&
+	    ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
+		srcu_gp_start(sp);
+		spin_unlock_irq(&sp->gp_lock);
+		/* Throttle expedited grace periods: Should be rare! */
+		srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) &&
+				    rcu_seq_ctr(gpseq) & 0xf
+				    ? 0
+				    : SRCU_INTERVAL);
+	} else {
+		spin_unlock_irq(&sp->gp_lock);
+	}
+}
+
+/*
+ * Funnel-locking scheme to scalably mediate many concurrent grace-period
+ * requests.  The winner has to do the work of actually starting grace
+ * period s.  Losers must either ensure that their desired grace-period
+ * number is recorded on at least their leaf srcu_node structure, or they
+ * must take steps to invoke their own callbacks.
+ */
+static void srcu_funnel_gp_start(struct srcu_struct *sp,
+				 struct srcu_data *sdp,
+				 unsigned long s)
+{
+	unsigned long flags;
+	int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
+	struct srcu_node *snp = sdp->mynode;
+	unsigned long snp_seq;
+
+	/* Each pass through the loop does one level of the srcu_node tree. */
+	for (; snp != NULL; snp = snp->srcu_parent) {
+		if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
+			return; /* GP already done and CBs recorded. */
+		spin_lock_irqsave(&snp->lock, flags);
+		if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
+			snp_seq = snp->srcu_have_cbs[idx];
+			spin_unlock_irqrestore(&snp->lock, flags);
+			if (snp == sdp->mynode && snp_seq != s) {
+				smp_mb(); /* CBs after GP! */
+				srcu_schedule_cbs_sdp(sdp, 0);
+			}
+			return;
+		}
+		snp->srcu_have_cbs[idx] = s;
+		spin_unlock_irqrestore(&snp->lock, flags);
+	}
+
+	/* Top of tree, must ensure the grace period will be started. */
+	spin_lock_irqsave(&sp->gp_lock, flags);
+	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
+		/*
+		 * Record need for grace period s.  Pair with load
+		 * acquire setting up for initialization.
+		 */
+		smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/
+	}
+
+	/* If grace period not already done and none in progress, start it. */
+	if (!rcu_seq_done(&sp->srcu_gp_seq, s) &&
+	    rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
+		WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
+		srcu_gp_start(sp);
+		queue_delayed_work(system_power_efficient_wq, &sp->work,
+				   atomic_read(&sp->srcu_exp_cnt)
+				   ? 0
+				   : SRCU_INTERVAL);
+	}
+	spin_unlock_irqrestore(&sp->gp_lock, flags);
+}
+
 /*
  * Wait until all readers counted by array index idx complete, but
  * loop an additional time if there is an expedited grace period pending.
- * The caller must ensure that ->completed is not changed while checking.
+ * The caller must ensure that ->srcu_idx is not changed while checking.
  */
 static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
 {
@@ -277,13 +586,13 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
 }
 
 /*
- * Increment the ->completed counter so that future SRCU readers will
- * use the other rank of the ->(un)lock_count[] arrays.  This allows
+ * Increment the ->srcu_idx counter so that future SRCU readers will
+ * use the other rank of the ->srcu_(un)lock_count[] arrays.  This allows
  * us to wait for pre-existing readers in a starvation-free manner.
  */
 static void srcu_flip(struct srcu_struct *sp)
 {
-	WRITE_ONCE(sp->completed, sp->completed + 1);
+	WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1);
 
 	/*
 	 * Ensure that if the updater misses an __srcu_read_unlock()
@@ -296,21 +605,9 @@ static void srcu_flip(struct srcu_struct *sp)
 }
 
 /*
- * End an SRCU grace period.
- */
-static void srcu_gp_end(struct srcu_struct *sp)
-{
-	rcu_seq_end(&sp->srcu_gp_seq);
-
-	spin_lock_irq(&sp->queue_lock);
-	rcu_segcblist_advance(&sp->srcu_cblist,
-			      rcu_seq_current(&sp->srcu_gp_seq));
-	spin_unlock_irq(&sp->queue_lock);
-}
-
-/*
- * Enqueue an SRCU callback on the specified srcu_struct structure,
- * initiating grace-period processing if it is not already running.
+ * Enqueue an SRCU callback on the srcu_data structure associated with
+ * the current CPU and the specified srcu_struct structure, initiating
+ * grace-period processing if it is not already running.
  *
  * Note that all CPUs must agree that the grace period extended beyond
  * all pre-existing SRCU read-side critical section.  On systems with
@@ -335,33 +632,40 @@ static void srcu_gp_end(struct srcu_struct *sp)
  * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
  * srcu_struct structure.
  */
-void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
+void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	       rcu_callback_t func)
 {
 	unsigned long flags;
-
-	head->next = NULL;
-	head->func = func;
-	spin_lock_irqsave(&sp->queue_lock, flags);
-	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
-	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_IDLE) {
-		srcu_gp_start(sp);
-		queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
+	bool needgp = false;
+	unsigned long s;
+	struct srcu_data *sdp;
+
+	check_init_srcu_struct(sp);
+	rhp->func = func;
+	local_irq_save(flags);
+	sdp = this_cpu_ptr(sp->sda);
+	spin_lock(&sdp->lock);
+	rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
+	rcu_segcblist_advance(&sdp->srcu_cblist,
+			      rcu_seq_current(&sp->srcu_gp_seq));
+	s = rcu_seq_snap(&sp->srcu_gp_seq);
+	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
+	if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
+		sdp->srcu_gp_seq_needed = s;
+		needgp = true;
 	}
-	spin_unlock_irqrestore(&sp->queue_lock, flags);
+	spin_unlock_irqrestore(&sdp->lock, flags);
+	if (needgp)
+		srcu_funnel_gp_start(sp, sdp, s);
 }
 EXPORT_SYMBOL_GPL(call_srcu);
 
-static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
-
 /*
  * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
  */
 static void __synchronize_srcu(struct srcu_struct *sp)
 {
 	struct rcu_synchronize rcu;
-	struct rcu_head *head = &rcu.head;
 
 	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
 			 lock_is_held(&rcu_bh_lock_map) ||
@@ -372,26 +676,12 @@ static void __synchronize_srcu(struct srcu_struct *sp)
 	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
 		return;
 	might_sleep();
+	check_init_srcu_struct(sp);
 	init_completion(&rcu.completion);
-
-	head->next = NULL;
-	head->func = wakeme_after_rcu;
-	spin_lock_irq(&sp->queue_lock);
-	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_IDLE) {
-		/* steal the processing owner */
-		rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
-		srcu_gp_start(sp);
-		spin_unlock_irq(&sp->queue_lock);
-		/* give the processing owner to work_struct */
-		srcu_reschedule(sp, 0);
-	} else {
-		rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
-		spin_unlock_irq(&sp->queue_lock);
-	}
-
+	init_rcu_head_on_stack(&rcu.head);
+	call_srcu(sp, &rcu.head, wakeme_after_rcu);
 	wait_for_completion(&rcu.completion);
-	smp_mb(); /* Caller's later accesses after GP. */
+	destroy_rcu_head_on_stack(&rcu.head);
 }
 
 /**
@@ -408,6 +698,7 @@ void synchronize_srcu_expedited(struct srcu_struct *sp)
 {
 	bool do_norm = rcu_gp_is_normal();
 
+	check_init_srcu_struct(sp);
 	if (!do_norm) {
 		atomic_inc(&sp->srcu_exp_cnt);
 		smp_mb__after_atomic(); /* increment before GP. */
@@ -415,7 +706,7 @@ void synchronize_srcu_expedited(struct srcu_struct *sp)
 	__synchronize_srcu(sp);
 	if (!do_norm) {
 		smp_mb__before_atomic(); /* GP before decrement. */
-		atomic_dec(&sp->srcu_exp_cnt);
+		WARN_ON_ONCE(atomic_dec_return(&sp->srcu_exp_cnt) < 0);
 	}
 }
 EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
@@ -426,8 +717,8 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
  *
  * Wait for the count to drain to zero of both indexes. To avoid the
  * possible starvation of synchronize_srcu(), it waits for the count of
- * the index=((->completed & 1) ^ 1) to drain to zero at first,
- * and then flip the completed and wait for the count of the other index.
+ * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first,
+ * and then flip the srcu_idx and wait for the count of the other index.
  *
  * Can block; must be called from process context.
  *
@@ -468,13 +759,69 @@ void synchronize_srcu(struct srcu_struct *sp)
 }
 EXPORT_SYMBOL_GPL(synchronize_srcu);
 
+/*
+ * Callback function for srcu_barrier() use.
+ */
+static void srcu_barrier_cb(struct rcu_head *rhp)
+{
+	struct srcu_data *sdp;
+	struct srcu_struct *sp;
+
+	sdp = container_of(rhp, struct srcu_data, srcu_barrier_head);
+	sp = sdp->sp;
+	if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt))
+		complete(&sp->srcu_barrier_completion);
+}
+
 /**
  * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
  * @sp: srcu_struct on which to wait for in-flight callbacks.
  */
 void srcu_barrier(struct srcu_struct *sp)
 {
-	synchronize_srcu(sp);
+	int cpu;
+	struct srcu_data *sdp;
+	unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq);
+
+	check_init_srcu_struct(sp);
+	mutex_lock(&sp->srcu_barrier_mutex);
+	if (rcu_seq_done(&sp->srcu_barrier_seq, s)) {
+		smp_mb(); /* Force ordering following return. */
+		mutex_unlock(&sp->srcu_barrier_mutex);
+		return; /* Someone else did our work for us. */
+	}
+	rcu_seq_start(&sp->srcu_barrier_seq);
+	init_completion(&sp->srcu_barrier_completion);
+
+	/* Initial count prevents reaching zero until all CBs are posted. */
+	atomic_set(&sp->srcu_barrier_cpu_cnt, 1);
+
+	/*
+	 * Each pass through this loop enqueues a callback, but only
+	 * on CPUs already having callbacks enqueued.  Note that if
+	 * a CPU already has callbacks enqueue, it must have already
+	 * registered the need for a future grace period, so all we
+	 * need do is enqueue a callback that will use the same
+	 * grace period as the last callback already in the queue.
+	 */
+	for_each_possible_cpu(cpu) {
+		sdp = per_cpu_ptr(sp->sda, cpu);
+		spin_lock_irq(&sdp->lock);
+		atomic_inc(&sp->srcu_barrier_cpu_cnt);
+		sdp->srcu_barrier_head.func = srcu_barrier_cb;
+		if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
+					   &sdp->srcu_barrier_head, 0))
+			atomic_dec(&sp->srcu_barrier_cpu_cnt);
+		spin_unlock_irq(&sdp->lock);
+	}
+
+	/* Remove the initial count, at which point reaching zero can happen. */
+	if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt))
+		complete(&sp->srcu_barrier_completion);
+	wait_for_completion(&sp->srcu_barrier_completion);
+
+	rcu_seq_end(&sp->srcu_barrier_seq);
+	mutex_unlock(&sp->srcu_barrier_mutex);
 }
 EXPORT_SYMBOL_GPL(srcu_barrier);
 
@@ -487,21 +834,24 @@ EXPORT_SYMBOL_GPL(srcu_barrier);
  */
 unsigned long srcu_batches_completed(struct srcu_struct *sp)
 {
-	return sp->completed;
+	return sp->srcu_idx;
 }
 EXPORT_SYMBOL_GPL(srcu_batches_completed);
 
 /*
- * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
- * ->batch_check1 and then to ->batch_done as readers drain.
+ * Core SRCU state machine.  Push state bits of ->srcu_gp_seq
+ * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has
+ * completed in that state.
  */
-static void srcu_advance_batches(struct srcu_struct *sp)
+static void srcu_advance_state(struct srcu_struct *sp)
 {
 	int idx;
 
+	mutex_lock(&sp->srcu_gp_mutex);
+
 	/*
 	 * Because readers might be delayed for an extended period after
-	 * fetching ->completed for their index, at any point in time there
+	 * fetching ->srcu_idx for their index, at any point in time there
 	 * might well be readers using both idx=0 and idx=1.  We therefore
 	 * need to wait for readers to clear from both index values before
 	 * invoking a callback.
@@ -511,23 +861,29 @@ static void srcu_advance_batches(struct srcu_struct *sp)
 	 */
 	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
 	if (idx == SRCU_STATE_IDLE) {
-		spin_lock_irq(&sp->queue_lock);
-		if (rcu_segcblist_empty(&sp->srcu_cblist)) {
-			spin_unlock_irq(&sp->queue_lock);
+		spin_lock_irq(&sp->gp_lock);
+		if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
+			WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
+			spin_unlock_irq(&sp->gp_lock);
+			mutex_unlock(&sp->srcu_gp_mutex);
 			return;
 		}
 		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
 		if (idx == SRCU_STATE_IDLE)
 			srcu_gp_start(sp);
-		spin_unlock_irq(&sp->queue_lock);
-		if (idx != SRCU_STATE_IDLE)
+		spin_unlock_irq(&sp->gp_lock);
+		if (idx != SRCU_STATE_IDLE) {
+			mutex_unlock(&sp->srcu_gp_mutex);
 			return; /* Someone else started the grace period. */
+		}
 	}
 
 	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
-		idx = 1 ^ (sp->completed & 1);
-		if (!try_check_zero(sp, idx, 1))
+		idx = 1 ^ (sp->srcu_idx & 1);
+		if (!try_check_zero(sp, idx, 1)) {
+			mutex_unlock(&sp->srcu_gp_mutex);
 			return; /* readers present, retry later. */
+		}
 		srcu_flip(sp);
 		rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2);
 	}
@@ -538,10 +894,12 @@ static void srcu_advance_batches(struct srcu_struct *sp)
 		 * SRCU read-side critical sections are normally short,
 		 * so check at least twice in quick succession after a flip.
 		 */
-		idx = 1 ^ (sp->completed & 1);
-		if (!try_check_zero(sp, idx, 2))
-			return; /* readers present, retry after later. */
-		srcu_gp_end(sp);
+		idx = 1 ^ (sp->srcu_idx & 1);
+		if (!try_check_zero(sp, idx, 2)) {
+			mutex_unlock(&sp->srcu_gp_mutex);
+			return; /* readers present, retry later. */
+		}
+		srcu_gp_end(sp);  /* Releases ->srcu_gp_mutex. */
 	}
 }
 
@@ -551,28 +909,51 @@ static void srcu_advance_batches(struct srcu_struct *sp)
  * the workqueue.  Note that needed memory barriers have been executed
  * in this task's context by srcu_readers_active_idx_check().
  */
-static void srcu_invoke_callbacks(struct srcu_struct *sp)
+static void srcu_invoke_callbacks(struct work_struct *work)
 {
+	bool more;
 	struct rcu_cblist ready_cbs;
 	struct rcu_head *rhp;
+	struct srcu_data *sdp;
+	struct srcu_struct *sp;
 
-	spin_lock_irq(&sp->queue_lock);
-	if (!rcu_segcblist_ready_cbs(&sp->srcu_cblist)) {
-		spin_unlock_irq(&sp->queue_lock);
-		return;
-	}
+	sdp = container_of(work, struct srcu_data, work.work);
+	sp = sdp->sp;
 	rcu_cblist_init(&ready_cbs);
-	rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs);
-	spin_unlock_irq(&sp->queue_lock);
+	spin_lock_irq(&sdp->lock);
+	smp_mb(); /* Old grace periods before callback invocation! */
+	rcu_segcblist_advance(&sdp->srcu_cblist,
+			      rcu_seq_current(&sp->srcu_gp_seq));
+	if (sdp->srcu_cblist_invoking ||
+	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
+		spin_unlock_irq(&sdp->lock);
+		return;  /* Someone else on the job or nothing to do. */
+	}
+
+	/* We are on the job!  Extract and invoke ready callbacks. */
+	sdp->srcu_cblist_invoking = true;
+	rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
+	spin_unlock_irq(&sdp->lock);
 	rhp = rcu_cblist_dequeue(&ready_cbs);
 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
 		local_bh_disable();
 		rhp->func(rhp);
 		local_bh_enable();
 	}
-	spin_lock_irq(&sp->queue_lock);
-	rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs);
-	spin_unlock_irq(&sp->queue_lock);
+
+	/*
+	 * Update counts, accelerate new callbacks, and if needed,
+	 * schedule another round of callback invocation.
+	 */
+	spin_lock_irq(&sdp->lock);
+	rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
+	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
+				       rcu_seq_snap(&sp->srcu_gp_seq));
+	sdp->srcu_cblist_invoking = false;
+	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
+	spin_unlock_irq(&sdp->lock);
+	if (more)
+		srcu_schedule_cbs_sdp(sdp, 0);
 }
 
 /*
@@ -581,19 +962,21 @@ static void srcu_invoke_callbacks(struct srcu_struct *sp)
  */
 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 {
-	bool pending = true;
-	int state;
+	bool pushgp = true;
 
-	if (rcu_segcblist_empty(&sp->srcu_cblist)) {
-		spin_lock_irq(&sp->queue_lock);
-		state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
-		if (rcu_segcblist_empty(&sp->srcu_cblist) &&
-		    state == SRCU_STATE_IDLE)
-			pending = false;
-		spin_unlock_irq(&sp->queue_lock);
+	spin_lock_irq(&sp->gp_lock);
+	if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
+		if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
+			/* All requests fulfilled, time to go idle. */
+			pushgp = false;
+		}
+	} else if (!rcu_seq_state(sp->srcu_gp_seq)) {
+		/* Outstanding request and no GP.  Start one. */
+		srcu_gp_start(sp);
 	}
+	spin_unlock_irq(&sp->gp_lock);
 
-	if (pending)
+	if (pushgp)
 		queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
 }
 
@@ -606,8 +989,7 @@ void process_srcu(struct work_struct *work)
 
 	sp = container_of(work, struct srcu_struct, work.work);
 
-	srcu_advance_batches(sp);
-	srcu_invoke_callbacks(sp);
+	srcu_advance_state(sp);
 	srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL);
 }
 EXPORT_SYMBOL_GPL(process_srcu);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 346948b51b0b..3c23435d2083 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3776,12 +3776,16 @@ int rcutree_online_cpu(unsigned int cpu)
 {
 	sync_sched_exp_online_cleanup(cpu);
 	rcutree_affinity_setting(cpu, -1);
+	if (IS_ENABLED(CONFIG_TREE_SRCU))
+		srcu_online_cpu(cpu);
 	return 0;
 }
 
 int rcutree_offline_cpu(unsigned int cpu)
 {
 	rcutree_affinity_setting(cpu, cpu);
+	if (IS_ENABLED(CONFIG_TREE_SRCU))
+		srcu_offline_cpu(cpu);
 	return 0;
 }
 
@@ -4157,6 +4161,8 @@ void __init rcu_init(void)
 	for_each_online_cpu(cpu) {
 		rcutree_prepare_cpu(cpu);
 		rcu_cpu_starting(cpu);
+		if (IS_ENABLED(CONFIG_TREE_SRCU))
+			srcu_online_cpu(cpu);
 	}
 }
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index a2a45cb629d6..0e598ab08fea 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -541,6 +541,14 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
 static void rcu_dynticks_task_enter(void);
 static void rcu_dynticks_task_exit(void);
 
+#ifdef CONFIG_SRCU
+void srcu_online_cpu(unsigned int cpu);
+void srcu_offline_cpu(unsigned int cpu);
+#else /* #ifdef CONFIG_SRCU */
+void srcu_online_cpu(unsigned int cpu) { }
+void srcu_offline_cpu(unsigned int cpu) { }
+#endif /* #else #ifdef CONFIG_SRCU */
+
 #endif /* #ifndef RCU_TREE_NONCORE */
 
 #ifdef CONFIG_RCU_TRACE
-- 
cgit v1.2.3


From 0497b489b8255054f113fd31faeb72f6dbc50a68 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 18 Apr 2017 10:28:31 -0700
Subject: srcu: Expedite srcu_schedule_cbs_snp() callback invocation

Although Tree SRCU does reduce delays when there is at least one
synchronize_srcu_expedited() invocation pending, srcu_schedule_cbs_snp()
still waits for SRCU_INTERVAL before invoking callbacks.  Since
synchronize_srcu_expedited() now posts a callback and waits for
that callback to do a wakeup, this destroys the expedited nature of
synchronize_srcu_expedited().  This destruction became apparent to
Marc Zyngier in the guise of a guest-OS bootup slowdown from five
seconds to no fewer than forty seconds.

This commit therefore invokes callbacks immediately at the end of the
grace period when there is at least one synchronize_srcu_expedited()
invocation pending.  This brought Marc's guest-OS bootup times back
into the realm of reason.

Reported-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
---
 kernel/rcu/srcutree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 12feeca18f46..9ecf0acc18eb 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -442,7 +442,8 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp)
 	int cpu;
 
 	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++)
-		srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), SRCU_INTERVAL);
+		srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu),
+				      atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL);
 }
 
 /*
-- 
cgit v1.2.3


From bcbfdd01dce5556a952fae84ef16fd0f12525e7b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 11 Apr 2017 15:50:41 -0700
Subject: rcu: Make non-preemptive schedule be Tasks RCU quiescent state

Currently, a call to schedule() acts as a Tasks RCU quiescent state
only if a context switch actually takes place.  However, just the
call to schedule() guarantees that the calling task has moved off of
whatever tracing trampoline that it might have been one previously.
This commit therefore plumbs schedule()'s "preempt" parameter into
rcu_note_context_switch(), which then records the Tasks RCU quiescent
state, but only if this call to schedule() was -not- due to a preemption.

To avoid adding overhead to the common-case context-switch path,
this commit hides the rcu_note_context_switch() check under an existing
non-common-case check.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 11 ++++++++---
 include/linux/rcutiny.h  | 13 +++++++++----
 include/linux/rcutree.h  |  5 +++--
 kernel/rcu/tree.c        | 22 +++++++++++++++++++++-
 kernel/rcu/update.c      |  1 +
 kernel/sched/core.c      |  2 +-
 6 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e6146d0074f8..f531b29207da 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -363,15 +363,20 @@ static inline void rcu_init_nohz(void)
 #ifdef CONFIG_TASKS_RCU
 #define TASKS_RCU(x) x
 extern struct srcu_struct tasks_rcu_exit_srcu;
-#define rcu_note_voluntary_context_switch(t) \
+#define rcu_note_voluntary_context_switch_lite(t) \
 	do { \
-		rcu_all_qs(); \
 		if (READ_ONCE((t)->rcu_tasks_holdout)) \
 			WRITE_ONCE((t)->rcu_tasks_holdout, false); \
 	} while (0)
+#define rcu_note_voluntary_context_switch(t) \
+	do { \
+		rcu_all_qs(); \
+		rcu_note_voluntary_context_switch_lite(t); \
+	} while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define TASKS_RCU(x) do { } while (0)
-#define rcu_note_voluntary_context_switch(t)	rcu_all_qs()
+#define rcu_note_voluntary_context_switch_lite(t)	do { } while (0)
+#define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
 /**
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 5219be250f00..74d9c3a1feee 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -92,10 +92,11 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 	call_rcu(head, func);
 }
 
-static inline void rcu_note_context_switch(void)
-{
-	rcu_sched_qs();
-}
+#define rcu_note_context_switch(preempt) \
+	do { \
+		rcu_sched_qs(); \
+		rcu_note_voluntary_context_switch_lite(current); \
+	} while (0)
 
 /*
  * Take advantage of the fact that there is only one CPU, which
@@ -242,6 +243,10 @@ static inline bool rcu_is_watching(void)
 
 #endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
 
+static inline void rcu_request_urgent_qs_task(struct task_struct *t)
+{
+}
+
 static inline void rcu_all_qs(void)
 {
 	barrier(); /* Avoid RCU read-side critical sections leaking across. */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 63a4e4cf40a5..0bacb6b2af69 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,7 +30,7 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
-void rcu_note_context_switch(void);
+void rcu_note_context_switch(bool preempt);
 int rcu_needs_cpu(u64 basem, u64 *nextevt);
 void rcu_cpu_stall_reset(void);
 
@@ -41,7 +41,7 @@ void rcu_cpu_stall_reset(void);
  */
 static inline void rcu_virt_note_context_switch(int cpu)
 {
-	rcu_note_context_switch();
+	rcu_note_context_switch(false);
 }
 
 void synchronize_rcu_bh(void);
@@ -108,6 +108,7 @@ void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 
 bool rcu_is_watching(void);
+void rcu_request_urgent_qs_task(struct task_struct *t);
 
 void rcu_all_qs(void);
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3c23435d2083..891d97109e09 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -458,7 +458,7 @@ static void rcu_momentary_dyntick_idle(void)
  * and requires special handling for preemptible RCU.
  * The caller must have disabled interrupts.
  */
-void rcu_note_context_switch(void)
+void rcu_note_context_switch(bool preempt)
 {
 	barrier(); /* Avoid RCU read-side critical sections leaking down. */
 	trace_rcu_utilization(TPS("Start context switch"));
@@ -471,6 +471,8 @@ void rcu_note_context_switch(void)
 	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
 		rcu_momentary_dyntick_idle();
 	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+	if (!preempt)
+		rcu_note_voluntary_context_switch_lite(current);
 out:
 	trace_rcu_utilization(TPS("End context switch"));
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
@@ -1149,6 +1151,24 @@ bool notrace rcu_is_watching(void)
 }
 EXPORT_SYMBOL_GPL(rcu_is_watching);
 
+/*
+ * If a holdout task is actually running, request an urgent quiescent
+ * state from its CPU.  This is unsynchronized, so migrations can cause
+ * the request to go to the wrong CPU.  Which is OK, all that will happen
+ * is that the CPU's next context switch will be a bit slower and next
+ * time around this task will generate another request.
+ */
+void rcu_request_urgent_qs_task(struct task_struct *t)
+{
+	int cpu;
+
+	barrier();
+	cpu = task_cpu(t);
+	if (!task_curr(t))
+		return; /* This task is not running on that CPU. */
+	smp_store_release(per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, cpu), true);
+}
+
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
 
 /*
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c5df0d756900..273e869ca21d 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -665,6 +665,7 @@ static void check_holdout_task(struct task_struct *t,
 		put_task_struct(t);
 		return;
 	}
+	rcu_request_urgent_qs_task(t);
 	if (!needreport)
 		return;
 	if (*firstreport) {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3b31fc05a0f1..2adf7b6c04e7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3378,7 +3378,7 @@ static void __sched notrace __schedule(bool preempt)
 		hrtick_clear(rq);
 
 	local_irq_disable();
-	rcu_note_context_switch();
+	rcu_note_context_switch(preempt);
 
 	/*
 	 * Make sure that signal_pending_state()->signal_pending() below
-- 
cgit v1.2.3


From 12a16d15b67c66fc0a8274d094a270e4fbaed3eb Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 11 Apr 2017 12:50:07 -0400
Subject: NFS4: remove a redundant lock range check

flock64_to_posix_lock() is already doing this check

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jeff.layton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6e9ff2d9a5bf..d682ef3b2da2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6447,9 +6447,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 	ctx = nfs_file_open_context(filp);
 	state = ctx->state;
 
-	if (request->fl_start < 0 || request->fl_end < 0)
-		return -EINVAL;
-
 	if (IS_GETLK(cmd)) {
 		if (state != NULL)
 			return nfs4_proc_getlk(state, F_GETLK, request);
-- 
cgit v1.2.3


From e12937279c8b07798651de7f5c8c52974c699931 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 11 Apr 2017 12:50:08 -0400
Subject: NFS: Move the flock open mode check into nfs_flock()

We only need to check lock exclusive/shared types against open mode when
flock() is used on NFS, so move it into the flock-specific path instead of
checking it for all locks.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c     | 18 ++++++++++++++++--
 fs/nfs/nfs4proc.c | 14 --------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 668213984d68..b7f4af3483b6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -820,9 +820,23 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 	if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
 		is_local = 1;
 
-	/* We're simulating flock() locks using posix locks on the server */
-	if (fl->fl_type == F_UNLCK)
+	/*
+	 * VFS doesn't require the open mode to match a flock() lock's type.
+	 * NFS, however, may simulate flock() locking with posix locking which
+	 * requires the open mode to match the lock type.
+	 */
+	switch (fl->fl_type) {
+	case F_UNLCK:
 		return do_unlk(filp, cmd, fl, is_local);
+	case F_RDLCK:
+		if (!(filp->f_mode & FMODE_READ))
+			return -EBADF;
+		break;
+	case F_WRLCK:
+		if (!(filp->f_mode & FMODE_WRITE))
+			return -EBADF;
+	}
+
 	return do_setlk(filp, cmd, fl, is_local);
 }
 EXPORT_SYMBOL_GPL(nfs_flock);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d682ef3b2da2..c52f72c86940 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6469,20 +6469,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 	    !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
 		return -ENOLCK;
 
-	/*
-	 * Don't rely on the VFS having checked the file open mode,
-	 * since it won't do this for flock() locks.
-	 */
-	switch (request->fl_type) {
-	case F_RDLCK:
-		if (!(filp->f_mode & FMODE_READ))
-			return -EBADF;
-		break;
-	case F_WRLCK:
-		if (!(filp->f_mode & FMODE_WRITE))
-			return -EBADF;
-	}
-
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
 		return status;
-- 
cgit v1.2.3


From 50f2112cf7a3e62a8d33838eb205d5fef306457a Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 11 Apr 2017 12:50:09 -0400
Subject: locks: Set FL_CLOSE when removing flock locks on close()

Set FL_CLOSE in fl_flags as in locks_remove_posix() when clearing locks.
NFS will check for this flag to ensure an unlock is sent in a following
patch.

Fuse handles flock and posix locks differently for FL_CLOSE, and so
requires a fixup to retain the existing behavior for flock.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/fuse/file.c     | 2 +-
 fs/locks.c         | 2 +-
 include/linux/fs.h | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ec238fb5a584..995da8957f6f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2168,7 +2168,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 	}
 
 	/* Unlock on close is handled by the flush method */
-	if (fl->fl_flags & FL_CLOSE)
+	if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
 		return 0;
 
 	fuse_lk_fill(&args, file, fl, opcode, pid, flock, &inarg);
diff --git a/fs/locks.c b/fs/locks.c
index 26811321d39b..af2031a1fcff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2504,7 +2504,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
 		.fl_owner = filp,
 		.fl_pid = current->tgid,
 		.fl_file = filp,
-		.fl_flags = FL_FLOCK,
+		.fl_flags = FL_FLOCK | FL_CLOSE,
 		.fl_type = F_UNLCK,
 		.fl_end = OFFSET_MAX,
 	};
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7251f7bb45e8..72061aa65405 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -909,6 +909,8 @@ static inline struct file *get_file(struct file *f)
 #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
 #define FL_LAYOUT	2048	/* outstanding pNFS layout */
 
+#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
+
 /*
  * Special return value from posix_lock_file() and vfs_lock_file() for
  * asynchronous locking.
-- 
cgit v1.2.3


From 7d6ddf88c4db372689c8aa65ea652d0514d66c06 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 11 Apr 2017 12:50:10 -0400
Subject: NFS: Add an iocounter wait function for async RPC tasks

By sleeping on a new NFS Unlock-On-Close waitqueue, rpc tasks may wait for
a lock context's iocounter to reach zero.  The rpc waitqueue is only woken
when the open_context has the NFS_CONTEXT_UNLOCK flag set in order to
mitigate spurious wake-ups for any iocounter reaching zero.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/client.c           |  1 +
 fs/nfs/pagelist.c         | 34 +++++++++++++++++++++++++++++++++-
 include/linux/nfs_fs.h    |  1 +
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_page.h  |  1 +
 5 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 3ffbffe8f39f..3e7b2e6a7cfb 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -218,6 +218,7 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
 static void pnfs_init_server(struct nfs_server *server)
 {
 	rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
+	rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
 }
 
 #else
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index f53610672f03..ad92b401326c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -102,6 +102,35 @@ nfs_iocounter_wait(struct nfs_lock_context *l_ctx)
 			TASK_KILLABLE);
 }
 
+/**
+ * nfs_async_iocounter_wait - wait on a rpc_waitqueue for I/O
+ * to complete
+ * @task: the rpc_task that should wait
+ * @l_ctx: nfs_lock_context with io_counter to check
+ *
+ * Returns true if there is outstanding I/O to wait on and the
+ * task has been put to sleep.
+ */
+bool
+nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx)
+{
+	struct inode *inode = d_inode(l_ctx->open_context->dentry);
+	bool ret = false;
+
+	if (atomic_read(&l_ctx->io_count) > 0) {
+		rpc_sleep_on(&NFS_SERVER(inode)->uoc_rpcwaitq, task, NULL);
+		ret = true;
+	}
+
+	if (atomic_read(&l_ctx->io_count) == 0) {
+		rpc_wake_up_queued_task(&NFS_SERVER(inode)->uoc_rpcwaitq, task);
+		ret = false;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
+
 /*
  * nfs_page_group_lock - lock the head of the page group
  * @req - request in group that is to be locked
@@ -385,8 +414,11 @@ static void nfs_clear_request(struct nfs_page *req)
 		req->wb_page = NULL;
 	}
 	if (l_ctx != NULL) {
-		if (atomic_dec_and_test(&l_ctx->io_count))
+		if (atomic_dec_and_test(&l_ctx->io_count)) {
 			wake_up_atomic_t(&l_ctx->io_count);
+			if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags))
+				rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq);
+		}
 		nfs_put_lock_context(l_ctx);
 		req->wb_lock_context = NULL;
 	}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1b29915247b2..9aa044e76820 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -76,6 +76,7 @@ struct nfs_open_context {
 #define NFS_CONTEXT_ERROR_WRITE		(0)
 #define NFS_CONTEXT_RESEND_WRITES	(1)
 #define NFS_CONTEXT_BAD			(2)
+#define NFS_CONTEXT_UNLOCK	(3)
 	int error;
 
 	struct list_head list;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b34097c67848..2a70f34dffe8 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -222,6 +222,7 @@ struct nfs_server {
 	u32			mountd_version;
 	unsigned short		mountd_port;
 	unsigned short		mountd_protocol;
+	struct rpc_wait_queue	uoc_rpcwaitq;
 };
 
 /* Server capabilities */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 6f01e28bba27..247cc3d3498f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -141,6 +141,7 @@ extern int nfs_page_group_lock(struct nfs_page *, bool);
 extern void nfs_page_group_lock_wait(struct nfs_page *);
 extern void nfs_page_group_unlock(struct nfs_page *);
 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
+extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
 
 /*
  * Lock the page of an asynchronous request
-- 
cgit v1.2.3


From b1ece737f44f91dca8f4829cf0b442e752e406db Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 11 Apr 2017 12:50:11 -0400
Subject: lockd: Introduce nlmclnt_operations

NFS would enjoy the ability to modify the behavior of the NLM client's
unlock RPC task in order to delay the transmission of the unlock until IO
that was submitted under that lock has completed.  This ability can ensure
that the NLM client will always complete the transmission of an unlock even
if the waiting caller has been interrupted with fatal signal.

For this purpose, a pointer to a struct nlmclnt_operations can be assigned
in a nfs_module's nfs_rpc_ops that will install those nlmclnt_operations on
the nlm_host.  The struct nlmclnt_operations defines three callback
operations that will be used in a following patch:

nlmclnt_alloc_call - used to call back after a successful allocation of
	a struct nlm_rqst in nlmclnt_proc().

nlmclnt_unlock_prepare - used to call back during NLM unlock's
	rpc_call_prepare.  The NLM client defers calling rpc_call_start()
	until this callback returns false.

nlmclnt_release_call - used to call back when the NLM client's struct
	nlm_rqst is freed.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clntlock.c         |  1 +
 fs/lockd/clntproc.c         | 26 +++++++++++++++++++++++++-
 fs/nfs/client.c             |  1 +
 fs/nfs/nfs3proc.c           |  2 +-
 fs/nfs/proc.c               |  2 +-
 include/linux/lockd/bind.h  | 24 ++++++++++++++++++++++--
 include/linux/lockd/lockd.h |  2 ++
 include/linux/nfs_xdr.h     |  1 +
 8 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 41e491b8e5d7..27d577dbe51a 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -69,6 +69,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
 	if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
 		goto out_nobind;
 
+	host->h_nlmclnt_ops = nlm_init->nlmclnt_ops;
 	return host;
 out_nobind:
 	nlmclnt_release_host(host);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 112952037933..066ac313ae5c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -150,17 +150,22 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
  * @host: address of a valid nlm_host context representing the NLM server
  * @cmd: fcntl-style file lock operation to perform
  * @fl: address of arguments for the lock operation
+ * @data: address of data to be sent to callback operations
  *
  */
-int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
+int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data)
 {
 	struct nlm_rqst		*call;
 	int			status;
+	const struct nlmclnt_operations *nlmclnt_ops = host->h_nlmclnt_ops;
 
 	call = nlm_alloc_call(host);
 	if (call == NULL)
 		return -ENOMEM;
 
+	if (nlmclnt_ops && nlmclnt_ops->nlmclnt_alloc_call)
+		nlmclnt_ops->nlmclnt_alloc_call(data);
+
 	nlmclnt_locks_init_private(fl, host);
 	if (!fl->fl_u.nfs_fl.owner) {
 		/* lockowner allocation has failed */
@@ -169,6 +174,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
 	}
 	/* Set up the argument struct */
 	nlmclnt_setlockargs(call, fl);
+	call->a_callback_data = data;
 
 	if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
 		if (fl->fl_type != F_UNLCK) {
@@ -214,8 +220,12 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 
 void nlmclnt_release_call(struct nlm_rqst *call)
 {
+	const struct nlmclnt_operations *nlmclnt_ops = call->a_host->h_nlmclnt_ops;
+
 	if (!atomic_dec_and_test(&call->a_count))
 		return;
+	if (nlmclnt_ops && nlmclnt_ops->nlmclnt_release_call)
+		nlmclnt_ops->nlmclnt_release_call(call->a_callback_data);
 	nlmclnt_release_host(call->a_host);
 	nlmclnt_release_lockargs(call);
 	kfree(call);
@@ -687,6 +697,19 @@ out:
 	return status;
 }
 
+static void nlmclnt_unlock_prepare(struct rpc_task *task, void *data)
+{
+	struct nlm_rqst	*req = data;
+	const struct nlmclnt_operations *nlmclnt_ops = req->a_host->h_nlmclnt_ops;
+	bool defer_call = false;
+
+	if (nlmclnt_ops && nlmclnt_ops->nlmclnt_unlock_prepare)
+		defer_call = nlmclnt_ops->nlmclnt_unlock_prepare(task, req->a_callback_data);
+
+	if (!defer_call)
+		rpc_call_start(task);
+}
+
 static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 {
 	struct nlm_rqst	*req = data;
@@ -720,6 +743,7 @@ die:
 }
 
 static const struct rpc_call_ops nlmclnt_unlock_ops = {
+	.rpc_call_prepare = nlmclnt_unlock_prepare,
 	.rpc_call_done = nlmclnt_unlock_callback,
 	.rpc_release = nlmclnt_rpc_release,
 };
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 3e7b2e6a7cfb..e0302101e18a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -546,6 +546,7 @@ static int nfs_start_lockd(struct nfs_server *server)
 		.noresvport	= server->flags & NFS_MOUNT_NORESVPORT ?
 					1 : 0,
 		.net		= clp->cl_net,
+		.nlmclnt_ops 	= clp->cl_nfs_mod->rpc_ops->nlmclnt_ops,
 	};
 
 	if (nlm_init.nfs_version > 3)
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index dc925b531f32..03b3c3de28f1 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -870,7 +870,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(filp);
 
-	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
+	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, NULL);
 }
 
 static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b7bca8303989..9872cf676a50 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -638,7 +638,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(filp);
 
-	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
+	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, NULL);
 }
 
 /* Helper functions for NFS lock bounds checking */
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 140edab64446..05728396a1a1 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -18,6 +18,7 @@
 
 /* Dummy declarations */
 struct svc_rqst;
+struct rpc_task;
 
 /*
  * This is the set of functions for lockd->nfsd communication
@@ -43,6 +44,7 @@ struct nlmclnt_initdata {
 	u32			nfs_version;
 	int			noresvport;
 	struct net		*net;
+	const struct nlmclnt_operations	*nlmclnt_ops;
 };
 
 /*
@@ -52,8 +54,26 @@ struct nlmclnt_initdata {
 extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init);
 extern void	nlmclnt_done(struct nlm_host *host);
 
-extern int	nlmclnt_proc(struct nlm_host *host, int cmd,
-					struct file_lock *fl);
+/*
+ * NLM client operations provide a means to modify RPC processing of NLM
+ * requests.  Callbacks receive a pointer to data passed into the call to
+ * nlmclnt_proc().
+ */
+struct nlmclnt_operations {
+	/* Called on successful allocation of nlm_rqst, use for allocation or
+	 * reference counting. */
+	void (*nlmclnt_alloc_call)(void *);
+
+	/* Called in rpc_task_prepare for unlock.  A return value of true
+	 * indicates the callback has put the task to sleep on a waitqueue
+	 * and NLM should not call rpc_call_start(). */
+	bool (*nlmclnt_unlock_prepare)(struct rpc_task*, void *);
+
+	/* Called when the nlm_rqst is freed, callbacks should clean up here */
+	void (*nlmclnt_release_call)(void *);
+};
+
+extern int	nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data);
 extern int	lockd_up(struct net *net);
 extern void	lockd_down(struct net *net);
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b37dee3acaba..41f7b6a04d69 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -69,6 +69,7 @@ struct nlm_host {
 	char			*h_addrbuf;	/* address eyecatcher */
 	struct net		*net;		/* host net */
 	char			nodename[UNX_MAXNODENAME + 1];
+	const struct nlmclnt_operations	*h_nlmclnt_ops;	/* Callback ops for NLM users */
 };
 
 /*
@@ -142,6 +143,7 @@ struct nlm_rqst {
 	struct nlm_block *	a_block;
 	unsigned int		a_retries;	/* Retry count */
 	u8			a_owner[NLMCLNT_OHSIZE];
+	void *	a_callback_data; /* sent to nlmclnt_operations callbacks */
 };
 
 /*
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 51e27f9746ee..677c6b91dfcd 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1551,6 +1551,7 @@ struct nfs_rpc_ops {
 	const struct inode_operations *dir_inode_ops;
 	const struct inode_operations *file_inode_ops;
 	const struct file_operations *file_ops;
+	const struct nlmclnt_operations *nlmclnt_ops;
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
-- 
cgit v1.2.3


From f30cb757f680f965ba8a2e53cb3588052a01aeb5 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 11 Apr 2017 12:50:12 -0400
Subject: NFS: Always wait for I/O completion before unlock

NFS attempts to wait for read and write completion before unlocking in
order to ensure that the data returned was protected by the lock.  When
this waiting is interrupted by a signal, the unlock may be skipped, and
messages similar to the following are seen in the kernel ring buffer:

[20.167876] Leaked locks on dev=0x0:0x2b ino=0x8dd4c3:
[20.168286] POSIX: fl_owner=ffff880078b06940 fl_flags=0x1 fl_type=0x0 fl_pid=20183
[20.168727] POSIX: fl_owner=ffff880078b06680 fl_flags=0x1 fl_type=0x0 fl_pid=20185

For NFSv3, the missing unlock will cause the server to refuse conflicting
locks indefinitely.  For NFSv4, the leftover lock will be removed by the
server after the lease timeout.

This patch fixes this issue by skipping the usual wait in
nfs_iocounter_wait if the FL_CLOSE flag is set when signaled.  Instead, the
wait happens in the unlock RPC task on the NFS UOC rpc_waitqueue.

For NFSv3, use lockd's new nlmclnt_operations along with
nfs_async_iocounter_wait to defer NLM's unlock task until the lock
context's iocounter reaches zero.

For NFSv4, call nfs_async_iocounter_wait() directly from unlock's
current rpc_call_prepare.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c     | 10 +++++-----
 fs/nfs/nfs3proc.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfs/nfs4proc.c |  9 +++++++++
 3 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index b7f4af3483b6..bebed885b6e4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -697,14 +697,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 	if (!IS_ERR(l_ctx)) {
 		status = nfs_iocounter_wait(l_ctx);
 		nfs_put_lock_context(l_ctx);
-		if (status < 0)
+		/*  NOTE: special case
+		 * 	If we're signalled while cleaning up locks on process exit, we
+		 * 	still need to complete the unlock.
+		 */
+		if (status < 0 && !(fl->fl_flags & FL_CLOSE))
 			return status;
 	}
 
-	/* NOTE: special case
-	 * 	If we're signalled while cleaning up locks on process exit, we
-	 * 	still need to complete the unlock.
-	 */
 	/*
 	 * Use local locking if mounted with "-onolock" or with appropriate
 	 * "-olocal_lock="
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 03b3c3de28f1..ce06ae0a25cf 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -865,12 +865,63 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
 }
 
+void nfs3_nlm_alloc_call(void *data)
+{
+	struct nfs_lock_context *l_ctx = data;
+	if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) {
+		get_nfs_open_context(l_ctx->open_context);
+		nfs_get_lock_context(l_ctx->open_context);
+	}
+}
+
+bool nfs3_nlm_unlock_prepare(struct rpc_task *task, void *data)
+{
+	struct nfs_lock_context *l_ctx = data;
+	if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags))
+		return nfs_async_iocounter_wait(task, l_ctx);
+	return false;
+
+}
+
+void nfs3_nlm_release_call(void *data)
+{
+	struct nfs_lock_context *l_ctx = data;
+	struct nfs_open_context *ctx;
+	if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) {
+		ctx = l_ctx->open_context;
+		nfs_put_lock_context(l_ctx);
+		put_nfs_open_context(ctx);
+	}
+}
+
+const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = {
+	.nlmclnt_alloc_call = nfs3_nlm_alloc_call,
+	.nlmclnt_unlock_prepare = nfs3_nlm_unlock_prepare,
+	.nlmclnt_release_call = nfs3_nlm_release_call,
+};
+
 static int
 nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(filp);
+	struct nfs_lock_context *l_ctx = NULL;
+	struct nfs_open_context *ctx = nfs_file_open_context(filp);
+	int status;
 
-	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, NULL);
+	if (fl->fl_flags & FL_CLOSE) {
+		l_ctx = nfs_get_lock_context(ctx);
+		if (IS_ERR(l_ctx))
+			l_ctx = NULL;
+		else
+			set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
+	}
+
+	status = nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, l_ctx);
+
+	if (l_ctx)
+		nfs_put_lock_context(l_ctx);
+
+	return status;
 }
 
 static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
@@ -921,6 +972,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.dir_inode_ops	= &nfs3_dir_inode_operations,
 	.file_inode_ops	= &nfs3_file_inode_operations,
 	.file_ops	= &nfs_file_operations,
+	.nlmclnt_ops	= &nlmclnt_fl_close_lock_ops,
 	.getroot	= nfs3_proc_get_root,
 	.submount	= nfs_submount,
 	.try_mount	= nfs_try_mount,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c52f72c86940..dbfc7574fab0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5784,6 +5784,7 @@ struct nfs4_unlockdata {
 	struct nfs_locku_res res;
 	struct nfs4_lock_state *lsp;
 	struct nfs_open_context *ctx;
+	struct nfs_lock_context *l_ctx;
 	struct file_lock fl;
 	struct nfs_server *server;
 	unsigned long timestamp;
@@ -5808,6 +5809,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
 	atomic_inc(&lsp->ls_count);
 	/* Ensure we don't close file until we're done freeing locks! */
 	p->ctx = get_nfs_open_context(ctx);
+	p->l_ctx = nfs_get_lock_context(ctx);
 	memcpy(&p->fl, fl, sizeof(p->fl));
 	p->server = NFS_SERVER(inode);
 	return p;
@@ -5818,6 +5820,7 @@ static void nfs4_locku_release_calldata(void *data)
 	struct nfs4_unlockdata *calldata = data;
 	nfs_free_seqid(calldata->arg.seqid);
 	nfs4_put_lock_state(calldata->lsp);
+	nfs_put_lock_context(calldata->l_ctx);
 	put_nfs_open_context(calldata->ctx);
 	kfree(calldata);
 }
@@ -5859,6 +5862,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_unlockdata *calldata = data;
 
+	if (test_bit(NFS_CONTEXT_UNLOCK, &calldata->l_ctx->open_context->flags) &&
+		nfs_async_iocounter_wait(task, calldata->l_ctx))
+		return;
+
 	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		goto out_wait;
 	nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid);
@@ -5910,6 +5917,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 	 * canceled lock is passed in, and it won't be an unlock.
 	 */
 	fl->fl_type = F_UNLCK;
+	if (fl->fl_flags & FL_CLOSE)
+		set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
 
 	data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
 	if (data == NULL) {
-- 
cgit v1.2.3


From b62ea4112ce3746664dcc2f232d03461f0e6f3c7 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Fri, 21 Apr 2017 16:47:11 +0200
Subject: video: fbdev: imxfb: support AUS mode

Some displays require setting AUS mode in the LDCD AUS Mode Control
Register to work with the imxfb driver. Like the value of the Panel
Configuration Register, the AUS mode setting depends on the display
mode.

Allow setting AUS mode from the device tree by adding a boolean
property. Make this property optional to keep the DT ABI stable.
AUS mode can be set only on imx21 and compatible chipsets.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Cc: Sascha Hauer <kernel@pengutronix.de>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/imxfb.c               | 17 +++++++++++++++++
 include/linux/platform_data/video-imxfb.h |  1 +
 2 files changed, 18 insertions(+)

diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
index 1b0faadb3080..c166e0725be5 100644
--- a/drivers/video/fbdev/imxfb.c
+++ b/drivers/video/fbdev/imxfb.c
@@ -117,6 +117,9 @@
 
 #define IMXFB_LSCR1_DEFAULT 0x00120300
 
+#define LCDC_LAUSCR	0x80
+#define LAUSCR_AUS_MODE	(1<<31)
+
 /* Used fb-mode. Can be set on kernel command line, therefore file-static. */
 static const char *fb_mode;
 
@@ -158,6 +161,7 @@ struct imxfb_info {
 	dma_addr_t		dbar2;
 
 	u_int			pcr;
+	u_int			lauscr;
 	u_int			pwmr;
 	u_int			lscr1;
 	u_int			dmacr;
@@ -422,6 +426,11 @@ static int imxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	pcr |= imxfb_mode->pcr & ~(0x3f | (7 << 25));
 
 	fbi->pcr = pcr;
+	/*
+	 * The LCDC AUS Mode Control Register does not exist on imx1.
+	 */
+	if (!is_imx1_fb(fbi) && imxfb_mode->aus_mode)
+		fbi->lauscr = LAUSCR_AUS_MODE;
 
 	/*
 	 * Copy the RGB parameters for this display
@@ -638,6 +647,9 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
 	if (fbi->dmacr)
 		writel(fbi->dmacr, fbi->regs + LCDC_DMACR);
 
+	if (fbi->lauscr)
+		writel(fbi->lauscr, fbi->regs + LCDC_LAUSCR);
+
 	return 0;
 }
 
@@ -734,6 +746,11 @@ static int imxfb_of_read_mode(struct device *dev, struct device_node *np,
 	imxfb_mode->bpp = bpp;
 	imxfb_mode->pcr = pcr;
 
+	/*
+	 * fsl,aus-mode is optional
+	 */
+	imxfb_mode->aus_mode = of_property_read_bool(np, "fsl,aus-mode");
+
 	return 0;
 }
 
diff --git a/include/linux/platform_data/video-imxfb.h b/include/linux/platform_data/video-imxfb.h
index a5c0a71ec914..cf9348b376ac 100644
--- a/include/linux/platform_data/video-imxfb.h
+++ b/include/linux/platform_data/video-imxfb.h
@@ -50,6 +50,7 @@
 struct imx_fb_videomode {
 	struct fb_videomode mode;
 	u32 pcr;
+	bool aus_mode;
 	unsigned char	bpp;
 };
 
-- 
cgit v1.2.3


From 1e9caa2a3b56ccf62cc5bc29e480652bddec6a80 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Fri, 21 Apr 2017 16:47:11 +0200
Subject: dt-bindings: display: imx: entry for AUS mode

Allow setting AUS mode for a display from the device tree. Use an
optional boolean property. AUS mode can be set only on imx21 and
compatible chipsets.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Acked-by: Rob Herring <robh@kernel.org>
Cc: Sascha Hauer <kernel@pengutronix.de>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt b/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
index 7a5c0e204c8e..e5a8b363d829 100644
--- a/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
@@ -13,6 +13,8 @@ Required nodes:
 	Additional, the display node has to define properties:
 	- bits-per-pixel: Bits per pixel
 	- fsl,pcr: LCDC PCR value
+	A display node may optionally define
+	- fsl,aus-mode: boolean to enable AUS mode (only for imx21)
 
 Optional properties:
 - lcd-supply: Regulator for LCD supply voltage.
-- 
cgit v1.2.3


From 3bbb5b8234161db431992e5d1f8b7cb88ad9dec3 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Fri, 21 Apr 2017 16:56:10 +0200
Subject: video: console: Remove reference to CONFIG_8xx

CONFIG_8xx is deprecated and should soon be removed in favor
of CONFIG_PPC_8xx.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/console/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 5b71bd905a60..2111d06f8c81 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -6,7 +6,7 @@ menu "Console display driver support"
 
 config VGA_CONSOLE
 	bool "VGA text console" if EXPERT || !X86
-	depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && \
+	depends on !4xx && !PPC_8xx && !SPARC && !M68K && !PARISC && !FRV && \
 		!SUPERH && !BLACKFIN && !AVR32 && !MN10300 && !CRIS && \
 		(!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \
 		!ARM64 && !ARC && !MICROBLAZE && !OPENRISC
-- 
cgit v1.2.3


From 159b095628851966b5fbf2637b0c40709911ca88 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 15 Apr 2017 15:58:56 -0400
Subject: make sure that fchdir() won't accept referral points, etc.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 949cef29c3bb..9f4bbd7cc51a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -459,20 +459,17 @@ out:
 SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 {
 	struct fd f = fdget_raw(fd);
-	struct inode *inode;
-	int error = -EBADF;
+	int error;
 
 	error = -EBADF;
 	if (!f.file)
 		goto out;
 
-	inode = file_inode(f.file);
-
 	error = -ENOTDIR;
-	if (!S_ISDIR(inode->i_mode))
+	if (!d_can_lookup(f.file->f_path.dentry))
 		goto out_putf;
 
-	error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR);
 	if (!error)
 		set_fs_pwd(current->fs, &f.file->f_path);
 out_putf:
-- 
cgit v1.2.3


From 93893862fb7ba704ec5a6872a294c9cc2b0d4ca3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 15 Apr 2017 17:29:14 -0400
Subject: path_init(): don't bother with checking MAY_EXEC for LOOKUP_ROOT

we'll hit that check in link_path_walk() anyway.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 19dcf62133cc..60c0a78ebca7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2142,7 +2142,6 @@ OK:
 
 static const char *path_init(struct nameidata *nd, unsigned flags)
 {
-	int retval = 0;
 	const char *s = nd->name->name;
 
 	if (!*s)
@@ -2154,13 +2153,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 	if (flags & LOOKUP_ROOT) {
 		struct dentry *root = nd->root.dentry;
 		struct inode *inode = root->d_inode;
-		if (*s) {
-			if (!d_can_lookup(root))
-				return ERR_PTR(-ENOTDIR);
-			retval = inode_permission(inode, MAY_EXEC);
-			if (retval)
-				return ERR_PTR(retval);
-		}
+		if (*s && unlikely(!d_can_lookup(root)))
+			return ERR_PTR(-ENOTDIR);
 		nd->path = nd->root;
 		nd->inode = inode;
 		if (flags & LOOKUP_RCU) {
-- 
cgit v1.2.3


From 4f757f3cbf54edef7b75c68d6d6d2f1a0ca08d2e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 15 Apr 2017 17:31:22 -0400
Subject: make sure that mntns_install() doesn't end up with referral for root

new flag: LOOKUP_DOWN.  If the starting point is overmounted, cross
into whatever's mounted on top, triggering referrals et.al.

Use that instead of follow_down_one() loop in mntns_install(), handle
errors properly.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c            | 38 ++++++++++++++++++++++++++++++++++++++
 fs/namespace.c        | 18 +++++++++++-------
 include/linux/namei.h |  1 +
 3 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 60c0a78ebca7..646db9cf2579 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2252,6 +2252,35 @@ static inline int lookup_last(struct nameidata *nd)
 	return walk_component(nd, 0);
 }
 
+static int handle_lookup_down(struct nameidata *nd)
+{
+	struct path path = nd->path;
+	struct inode *inode = nd->inode;
+	unsigned seq = nd->seq;
+	int err;
+
+	if (nd->flags & LOOKUP_RCU) {
+		/*
+		 * don't bother with unlazy_walk on failure - we are
+		 * at the very beginning of walk, so we lose nothing
+		 * if we simply redo everything in non-RCU mode
+		 */
+		if (unlikely(!__follow_mount_rcu(nd, &path, &inode, &seq)))
+			return -ECHILD;
+	} else {
+		dget(path.dentry);
+		err = follow_managed(&path, nd);
+		if (unlikely(err < 0))
+			return err;
+		inode = d_backing_inode(path.dentry);
+		seq = 0;
+	}
+	path_to_nameidata(&path, nd);
+	nd->inode = inode;
+	nd->seq = seq;
+	return 0;
+}
+
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
 static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
 {
@@ -2260,6 +2289,15 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
 
 	if (IS_ERR(s))
 		return PTR_ERR(s);
+
+	if (unlikely(flags & LOOKUP_DOWN)) {
+		err = handle_lookup_down(nd);
+		if (unlikely(err < 0)) {
+			terminate_walk(nd);
+			return err;
+		}
+	}
+
 	while (!(err = link_path_walk(s, nd))
 		&& ((err = lookup_last(nd)) > 0)) {
 		s = trailing_symlink(nd);
diff --git a/fs/namespace.c b/fs/namespace.c
index cc1375eff88c..0886ef28bff6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3465,8 +3465,9 @@ static void mntns_put(struct ns_common *ns)
 static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 {
 	struct fs_struct *fs = current->fs;
-	struct mnt_namespace *mnt_ns = to_mnt_ns(ns);
+	struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
 	struct path root;
+	int err;
 
 	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
 	    !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
@@ -3477,15 +3478,18 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 		return -EINVAL;
 
 	get_mnt_ns(mnt_ns);
-	put_mnt_ns(nsproxy->mnt_ns);
+	old_mnt_ns = nsproxy->mnt_ns;
 	nsproxy->mnt_ns = mnt_ns;
 
 	/* Find the root */
-	root.mnt    = &mnt_ns->root->mnt;
-	root.dentry = mnt_ns->root->mnt.mnt_root;
-	path_get(&root);
-	while(d_mountpoint(root.dentry) && follow_down_one(&root))
-		;
+	err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
+				"/", LOOKUP_DOWN, &root);
+	if (err) {
+		/* revert to old namespace */
+		nsproxy->mnt_ns = old_mnt_ns;
+		put_mnt_ns(mnt_ns);
+		return err;
+	}
 
 	/* Update the pwd and root */
 	set_fs_pwd(fs, &root);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f29abda31e6d..8b4794e83196 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -44,6 +44,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_JUMPED		0x1000
 #define LOOKUP_ROOT		0x2000
 #define LOOKUP_EMPTY		0x4000
+#define LOOKUP_DOWN		0x8000
 
 extern int path_pts(struct path *path);
 
-- 
cgit v1.2.3


From 2158a09395b3751db99663b2efdcce0ae09ca133 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Apr 2017 23:43:10 +0200
Subject: clk: ti: fix linker error with !SOC_OMAP4

When none of the OMAP4-generation SoCs are enabled, we run into a link
error for am43xx/am43xx:

drivers/clk/ti/dpll.o: In function `of_ti_am3_dpll_x2_setup':
dpll.c:(.init.text+0xd8): undefined reference to `clkhwops_omap4_dpllmx'

This is easily fixed by adding another #ifdef.

While looking at the code, I also spotted another problem with the
assignment of hw_ops variable that is not used again later. I'm
changing this to setting clk_hw->ops instead, which I guess is what
was intended here.

Fixes: 473adbf4e028 ("clk: ti: dpll44xx: fix clksel register initialization")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Tero Kristo <t-kristo@ti.com>
[sboyd@codeaurora.org: Replaced fixes tag with correct one]
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/ti/dpll.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index 96d84888c6c5..d4e4444bc5ca 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -312,7 +312,6 @@ static void _register_dpll_x2(struct device_node *node,
 	struct clk_hw_omap *clk_hw;
 	const char *name = node->name;
 	const char *parent_name;
-	int ret;
 
 	parent_name = of_clk_get_parent_name(node, 0);
 	if (!parent_name) {
@@ -332,16 +331,21 @@ static void _register_dpll_x2(struct device_node *node,
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
 
+#if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) || \
+	defined(CONFIG_SOC_DRA7XX)
 	if (hw_ops == &clkhwops_omap4_dpllmx) {
+		int ret;
+
 		/* Check if register defined, if not, drop hw-ops */
 		ret = of_property_count_elems_of_size(node, "reg", 1);
 		if (ret <= 0) {
-			hw_ops = NULL;
+			clk_hw->ops = NULL;
 		} else if (ti_clk_get_reg_addr(node, 0, &clk_hw->clksel_reg)) {
 			kfree(clk_hw);
 			return;
 		}
 	}
+#endif
 
 	/* register the clock */
 	clk = ti_clk_register(NULL, &clk_hw->hw, name);
-- 
cgit v1.2.3


From fc04f27dfc4280ef35540ab40fa14c24ff8cc799 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Apr 2017 19:44:56 +0200
Subject: clk: ti: fix building without legacy omap3

When CONFIG_ATAGS or CONFIG_OMAP3 is disabled, we get a build error:

In file included from include/linux/clk-provider.h:15:0,
                 from drivers/clk/ti/clk.c:19:
drivers/clk/ti/clk.c: In function 'ti_clk_add_aliases':
drivers/clk/ti/clk.c:438:29: error: 'simple_clk_match_table' undeclared (first use in this function); did you mean 'simple_attr_write'?

Moving the match table down fixes it.

Fixes: c17435c56bb1 ("clk: ti: add API for creating aliases automatically for simple clock types")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/ti/clk.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index ddbad7e8d7c9..e5a1c8297a1d 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -371,12 +371,6 @@ struct clk __init *ti_clk_register_clk(struct ti_clk *setup)
 	return clk;
 }
 
-static const struct of_device_id simple_clk_match_table[] __initconst = {
-	{ .compatible = "fixed-clock" },
-	{ .compatible = "fixed-factor-clock" },
-	{ }
-};
-
 int __init ti_clk_register_legacy_clks(struct ti_clk_alias *clks)
 {
 	struct clk *clk;
@@ -425,6 +419,12 @@ int __init ti_clk_register_legacy_clks(struct ti_clk_alias *clks)
 }
 #endif
 
+static const struct of_device_id simple_clk_match_table[] __initconst = {
+	{ .compatible = "fixed-clock" },
+	{ .compatible = "fixed-factor-clock" },
+	{ }
+};
+
 /**
  * ti_clk_add_aliases - setup clock aliases
  *
-- 
cgit v1.2.3


From b68adc23bc3d0c81a08b69cf1ba0bf0405c9d868 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Mon, 17 Apr 2017 19:19:25 +0200
Subject: clk: hi6220: Add the hi655x's pmic clock

The hi655x multi function device is a PMIC providing regulators.

The PMIC also provides a clock for the WiFi and the Bluetooth, let's implement
this clock in order to add it in the hi655x MFD and allow proper wireless
initialization.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
[sboyd@codeaurora.org: Remove clkdev usage]
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/Kconfig      |   8 +++
 drivers/clk/Makefile     |   1 +
 drivers/clk/clk-hi655x.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+)
 create mode 100644 drivers/clk/clk-hi655x.c

diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 9356ab4b7d76..36cfea38135f 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -47,6 +47,14 @@ config COMMON_CLK_RK808
 	  clocked at 32KHz each. Clkout1 is always on, Clkout2 can off
 	  by control register.
 
+config COMMON_CLK_HI655X
+	tristate "Clock driver for Hi655x"
+	depends on MFD_HI655X_PMIC || COMPILE_TEST
+	---help---
+	  This driver supports the hi655x PMIC clock. This
+	  multi-function device has one fixed-rate oscillator, clocked
+	  at 32KHz.
+
 config COMMON_CLK_SCPI
 	tristate "Clock driver controlled via SCPI interface"
 	depends on ARM_SCPI_PROTOCOL || COMPILE_TEST
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 92c12b86c2e8..c19983afcb81 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_COMMON_CLK_PALMAS)		+= clk-palmas.o
 obj-$(CONFIG_COMMON_CLK_PWM)		+= clk-pwm.o
 obj-$(CONFIG_CLK_QORIQ)			+= clk-qoriq.o
 obj-$(CONFIG_COMMON_CLK_RK808)		+= clk-rk808.o
+obj-$(CONFIG_COMMON_CLK_HI655X)		+= clk-hi655x.o
 obj-$(CONFIG_COMMON_CLK_S2MPS11)	+= clk-s2mps11.o
 obj-$(CONFIG_COMMON_CLK_SCPI)           += clk-scpi.o
 obj-$(CONFIG_COMMON_CLK_SI5351)		+= clk-si5351.o
diff --git a/drivers/clk/clk-hi655x.c b/drivers/clk/clk-hi655x.c
new file mode 100644
index 000000000000..403a0188634a
--- /dev/null
+++ b/drivers/clk/clk-hi655x.c
@@ -0,0 +1,126 @@
+/*
+ * Clock driver for Hi655x
+ *
+ * Copyright (c) 2017, Linaro Ltd.
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/hi655x-pmic.h>
+
+#define HI655X_CLK_BASE	HI655X_BUS_ADDR(0x1c)
+#define HI655X_CLK_SET	BIT(6)
+
+struct hi655x_clk {
+	struct hi655x_pmic *hi655x;
+	struct clk_hw       clk_hw;
+};
+
+static unsigned long hi655x_clk_recalc_rate(struct clk_hw *hw,
+					    unsigned long parent_rate)
+{
+	return 32768;
+}
+
+static int hi655x_clk_enable(struct clk_hw *hw, bool enable)
+{
+	struct hi655x_clk *hi655x_clk =
+		container_of(hw, struct hi655x_clk, clk_hw);
+
+	struct hi655x_pmic *hi655x = hi655x_clk->hi655x;
+
+	return regmap_update_bits(hi655x->regmap, HI655X_CLK_BASE,
+				  HI655X_CLK_SET, enable ? HI655X_CLK_SET : 0);
+}
+
+static int hi655x_clk_prepare(struct clk_hw *hw)
+{
+	return hi655x_clk_enable(hw, true);
+}
+
+static void hi655x_clk_unprepare(struct clk_hw *hw)
+{
+	hi655x_clk_enable(hw, false);
+}
+
+static int hi655x_clk_is_prepared(struct clk_hw *hw)
+{
+	struct hi655x_clk *hi655x_clk =
+		container_of(hw, struct hi655x_clk, clk_hw);
+	struct hi655x_pmic *hi655x = hi655x_clk->hi655x;
+	int ret;
+	uint32_t val;
+
+	ret = regmap_read(hi655x->regmap, HI655X_CLK_BASE, &val);
+	if (ret < 0)
+		return ret;
+
+	return val & HI655X_CLK_BASE;
+}
+
+static const struct clk_ops hi655x_clk_ops = {
+	.prepare     = hi655x_clk_prepare,
+	.unprepare   = hi655x_clk_unprepare,
+	.is_prepared = hi655x_clk_is_prepared,
+	.recalc_rate = hi655x_clk_recalc_rate,
+};
+
+static int hi655x_clk_probe(struct platform_device *pdev)
+{
+	struct device *parent = pdev->dev.parent;
+	struct hi655x_pmic *hi655x = dev_get_drvdata(parent);
+	struct hi655x_clk *hi655x_clk;
+	const char *clk_name = "hi655x-clk";
+	struct clk_init_data init = {
+		.name = clk_name,
+		.ops = &hi655x_clk_ops
+	};
+	int ret;
+
+	hi655x_clk = devm_kzalloc(&pdev->dev, sizeof(*hi655x_clk), GFP_KERNEL);
+	if (!hi655x_clk)
+		return -ENOMEM;
+
+	of_property_read_string_index(parent->of_node, "clock-output-names",
+				      0, &clk_name);
+
+	hi655x_clk->clk_hw.init	= &init;
+	hi655x_clk->hi655x	= hi655x;
+
+	platform_set_drvdata(pdev, hi655x_clk);
+
+	ret = devm_clk_hw_register(&pdev->dev, &hi655x_clk->clk_hw);
+	if (ret)
+		return ret;
+
+	return of_clk_add_hw_provider(parent->of_node, of_clk_hw_simple_get,
+				     &hi655x_clk->clk_hw);
+}
+
+static struct platform_driver hi655x_clk_driver = {
+	.probe =  hi655x_clk_probe,
+	.driver		= {
+		.name	= "hi655x-clk",
+	},
+};
+
+module_platform_driver(hi655x_clk_driver);
+
+MODULE_DESCRIPTION("Clk driver for the hi655x series PMICs");
+MODULE_AUTHOR("Daniel Lezcano <daniel.lezcano@linaro.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:hi655x-clk");
-- 
cgit v1.2.3


From db9c4a1e6581e45976526eb45c032a73f944dd8a Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Mon, 23 Jan 2017 13:48:26 +0100
Subject: clk: mediatek: add mt2701 ethernet reset

The ethernet clock core has a reset register that is currently not exposed
to the user. Fix this by adding the missing registration code.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/mediatek/clk-mt2701-eth.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/clk/mediatek/clk-mt2701-eth.c b/drivers/clk/mediatek/clk-mt2701-eth.c
index 877be8715afa..9251a6551522 100644
--- a/drivers/clk/mediatek/clk-mt2701-eth.c
+++ b/drivers/clk/mediatek/clk-mt2701-eth.c
@@ -66,6 +66,8 @@ static int clk_mt2701_eth_probe(struct platform_device *pdev)
 			"could not register clock provider: %s: %d\n",
 			pdev->name, r);
 
+	mtk_register_reset_controller(node, 1, 0x34);
+
 	return r;
 }
 
-- 
cgit v1.2.3


From 7c2adaf11036654e4a5f5da0bf09916901f3e1ec Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Mon, 23 Jan 2017 13:48:27 +0100
Subject: reset: mediatek: Add MT2701 ethsys reset controller include file

Add the missing reset bits of the ethsys core to the mt2701-reset include
file, so that we can reference them from within a devicetree file.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 include/dt-bindings/reset/mt2701-resets.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/dt-bindings/reset/mt2701-resets.h b/include/dt-bindings/reset/mt2701-resets.h
index aaf03057f755..21deb547cfa4 100644
--- a/include/dt-bindings/reset/mt2701-resets.h
+++ b/include/dt-bindings/reset/mt2701-resets.h
@@ -80,4 +80,11 @@
 #define MT2701_HIFSYS_PCIE1_RST			25
 #define MT2701_HIFSYS_PCIE2_RST			26
 
+/* ETHSYS resets */
+#define MT2701_ETHSYS_SYS_RST			0
+#define MT2701_ETHSYS_MCM_RST			2
+#define MT2701_ETHSYS_FE_RST			6
+#define MT2701_ETHSYS_GMAC_RST			23
+#define MT2701_ETHSYS_PPE_RST			31
+
 #endif  /* _DT_BINDINGS_RESET_CONTROLLER_MT2701 */
-- 
cgit v1.2.3


From 1e5c844441e6dbf2e2433384ac835b712c35c533 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Wed, 19 Apr 2017 19:43:03 +0200
Subject: clk: at91: Use kcalloc() in of_at91_clk_pll_get_characteristics()

Multiplications for the size determination of memory allocations
indicated that array data structures should be processed.
Thus use the corresponding function "kcalloc".

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/at91/clk-pll.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c
index 45ad168e1496..7d3223fc7161 100644
--- a/drivers/clk/at91/clk-pll.c
+++ b/drivers/clk/at91/clk-pll.c
@@ -399,18 +399,18 @@ of_at91_clk_pll_get_characteristics(struct device_node *np)
 	if (!characteristics)
 		return NULL;
 
-	output = kzalloc(sizeof(*output) * num_output, GFP_KERNEL);
+	output = kcalloc(num_output, sizeof(*output), GFP_KERNEL);
 	if (!output)
 		goto out_free_characteristics;
 
 	if (num_cells > 2) {
-		out = kzalloc(sizeof(*out) * num_output, GFP_KERNEL);
+		out = kcalloc(num_output, sizeof(*out), GFP_KERNEL);
 		if (!out)
 			goto out_free_output;
 	}
 
 	if (num_cells > 3) {
-		icpll = kzalloc(sizeof(*icpll) * num_output, GFP_KERNEL);
+		icpll = kcalloc(num_output, sizeof(*icpll), GFP_KERNEL);
 		if (!icpll)
 			goto out_free_output;
 	}
-- 
cgit v1.2.3


From 9a78b16972fba06bc8d303282a6b96f5061fd16a Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Wed, 19 Apr 2017 22:37:30 +0200
Subject: clk: si5351: Use devm_kcalloc() in si5351_i2c_probe()

Multiplications for the size determination of memory allocations
indicated that array data structures should be processed.
Thus use the corresponding function "devm_kcalloc".

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-si5351.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c
index b051db43fae1..a4c009e1e70d 100644
--- a/drivers/clk/clk-si5351.c
+++ b/drivers/clk/clk-si5351.c
@@ -1535,9 +1535,9 @@ static int si5351_i2c_probe(struct i2c_client *client,
 	else
 		parent_names[1] = si5351_pll_names[1];
 
-	drvdata->msynth = devm_kzalloc(&client->dev, num_clocks *
+	drvdata->msynth = devm_kcalloc(&client->dev, num_clocks,
 				       sizeof(*drvdata->msynth), GFP_KERNEL);
-	drvdata->clkout = devm_kzalloc(&client->dev, num_clocks *
+	drvdata->clkout = devm_kcalloc(&client->dev, num_clocks,
 				       sizeof(*drvdata->clkout), GFP_KERNEL);
 	drvdata->num_clkout = num_clocks;
 
-- 
cgit v1.2.3


From 56f2150a841ca0915d43e7b9033a4557bc829cc0 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Thu, 20 Apr 2017 07:34:54 +0200
Subject: clk: si5351: Delete an error message for a failed memory allocation
 in si5351_i2c_probe()

The script "checkpatch.pl" pointed information out like the following.

* CHECK: Comparison to NULL could be written "!drvdata"

  Thus adjust this expression.

* WARNING: Possible unnecessary 'out of memory' message

  Thus remove such a statement here.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-si5351.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c
index a4c009e1e70d..2492442eea77 100644
--- a/drivers/clk/clk-si5351.c
+++ b/drivers/clk/clk-si5351.c
@@ -1354,10 +1354,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
 		return -EINVAL;
 
 	drvdata = devm_kzalloc(&client->dev, sizeof(*drvdata), GFP_KERNEL);
-	if (drvdata == NULL) {
-		dev_err(&client->dev, "unable to allocate driver data\n");
+	if (!drvdata)
 		return -ENOMEM;
-	}
 
 	i2c_set_clientdata(client, drvdata);
 	drvdata->client = client;
-- 
cgit v1.2.3


From 4d32758671527761e6ee94568b8e32925c4bc682 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Thu, 20 Apr 2017 08:45:43 +0200
Subject: clk: Replace four seq_printf() calls by seq_putc()

Four single characters should be put into a sequence.
Thus use the corresponding function "seq_putc".

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index cddddbe46d9d..3e5562a161a4 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2083,11 +2083,11 @@ static void clk_dump_subtree(struct seq_file *s, struct clk_core *c, int level)
 	clk_dump_one(s, c, level);
 
 	hlist_for_each_entry(child, &c->children, child_node) {
-		seq_printf(s, ",");
+		seq_putc(s, ',');
 		clk_dump_subtree(s, child, level + 1);
 	}
 
-	seq_printf(s, "}");
+	seq_putc(s, '}');
 }
 
 static int clk_dump(struct seq_file *s, void *data)
@@ -2096,14 +2096,13 @@ static int clk_dump(struct seq_file *s, void *data)
 	bool first_node = true;
 	struct hlist_head **lists = (struct hlist_head **)s->private;
 
-	seq_printf(s, "{");
-
+	seq_putc(s, '{');
 	clk_prepare_lock();
 
 	for (; *lists; lists++) {
 		hlist_for_each_entry(c, *lists, child_node) {
 			if (!first_node)
-				seq_puts(s, ",");
+				seq_putc(s, ',');
 			first_node = false;
 			clk_dump_subtree(s, c, 0);
 		}
-- 
cgit v1.2.3


From 1808a3201965c430b70cb9236ff80d2cad403452 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Thu, 20 Apr 2017 09:30:52 +0200
Subject: clk: Improve a size determination in two functions

Replace the specification of two data structures by pointer dereferences
as the parameter for the operator "sizeof" to make the corresponding size
determination a bit safer according to the Linux coding style convention.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 3e5562a161a4..fc58c52a26b4 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2973,7 +2973,7 @@ int clk_notifier_register(struct clk *clk, struct notifier_block *nb)
 
 	/* if clk wasn't in the notifier list, allocate new clk_notifier */
 	if (cn->clk != clk) {
-		cn = kzalloc(sizeof(struct clk_notifier), GFP_KERNEL);
+		cn = kzalloc(sizeof(*cn), GFP_KERNEL);
 		if (!cn)
 			goto out;
 
@@ -3121,7 +3121,7 @@ int of_clk_add_provider(struct device_node *np,
 	struct of_clk_provider *cp;
 	int ret;
 
-	cp = kzalloc(sizeof(struct of_clk_provider), GFP_KERNEL);
+	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From add3151133b9ae3bcfd97eab04d9a50b6606453c Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Thu, 20 Apr 2017 09:45:04 +0200
Subject: clk: nomadik: Use seq_puts() in nomadik_src_clk_show()

A string which did not contain a data format specification should be put
into a sequence. Thus use the corresponding function "seq_puts".

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-nomadik.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c
index 71677eb12565..8d57ab8d6ed8 100644
--- a/drivers/clk/clk-nomadik.c
+++ b/drivers/clk/clk-nomadik.c
@@ -467,7 +467,7 @@ static int nomadik_src_clk_show(struct seq_file *s, void *what)
 	u32 src_pckensr0 = readl(src_base + SRC_PCKENSR0);
 	u32 src_pckensr1 = readl(src_base + SRC_PCKENSR1);
 
-	seq_printf(s, "Clock:      Boot:   Now:    Request: ASKED:\n");
+	seq_puts(s, "Clock:      Boot:   Now:    Request: ASKED:\n");
 	for (i = 0; i < ARRAY_SIZE(src_clk_names); i++) {
 		u32 pcksrb = (i < 0x20) ? src_pcksr0_boot : src_pcksr1_boot;
 		u32 pcksr = (i < 0x20) ? src_pcksr0 : src_pcksr1;
-- 
cgit v1.2.3


From 24f8186eb8d69a3a0d856ce59fb4f19ae23ff23a Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Thu, 20 Apr 2017 10:04:00 +0200
Subject: clk: nomadik: Delete error messages for a failed memory allocation in
 two functions

The script "checkpatch.pl" pointed information out like the following.

WARNING: Possible unnecessary 'out of memory' message

Thus remove such statements here.

Link: http://events.linuxfoundation.org/sites/events/files/slides/LCJ16-Refactor_Strings-WSang_0.pdf
Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-nomadik.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c
index 8d57ab8d6ed8..13ad6d1e5090 100644
--- a/drivers/clk/clk-nomadik.c
+++ b/drivers/clk/clk-nomadik.c
@@ -267,10 +267,8 @@ pll_clk_register(struct device *dev, const char *name,
 	}
 
 	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
-	if (!pll) {
-		pr_err("%s: could not allocate PLL clk\n", __func__);
+	if (!pll)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	init.name = name;
 	init.ops = &pll_clk_ops;
@@ -356,11 +354,9 @@ src_clk_register(struct device *dev, const char *name,
 	struct clk_init_data init;
 
 	sclk = kzalloc(sizeof(*sclk), GFP_KERNEL);
-	if (!sclk) {
-		pr_err("could not allocate SRC clock %s\n",
-			name);
+	if (!sclk)
 		return ERR_PTR(-ENOMEM);
-	}
+
 	init.name = name;
 	init.ops = &src_clk_ops;
 	/* Do not force-disable the static SDRAM controller */
-- 
cgit v1.2.3


From 23826e240ad82727635eb84fff1211dd2ff750fb Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Wed, 19 Apr 2017 20:15:21 +0200
Subject: clk: mvebu: Use kcalloc() in of_cpu_clk_setup()

Multiplications for the size determination of memory allocations
indicated that array data structures should be processed.
Thus use the corresponding function "kcalloc".

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/mvebu/clk-cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/mvebu/clk-cpu.c b/drivers/clk/mvebu/clk-cpu.c
index 044892b6534d..072aa38374ce 100644
--- a/drivers/clk/mvebu/clk-cpu.c
+++ b/drivers/clk/mvebu/clk-cpu.c
@@ -186,11 +186,11 @@ static void __init of_cpu_clk_setup(struct device_node *node)
 	for_each_node_by_type(dn, "cpu")
 		ncpus++;
 
-	cpuclk = kzalloc(ncpus * sizeof(*cpuclk), GFP_KERNEL);
+	cpuclk = kcalloc(ncpus, sizeof(*cpuclk), GFP_KERNEL);
 	if (WARN_ON(!cpuclk))
 		goto cpuclk_out;
 
-	clks = kzalloc(ncpus * sizeof(*clks), GFP_KERNEL);
+	clks = kcalloc(ncpus, sizeof(*clks), GFP_KERNEL);
 	if (WARN_ON(!clks))
 		goto clks_out;
 
-- 
cgit v1.2.3


From ee7d74339df71015ae5b98d91393ea80b72a4546 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Wed, 19 Apr 2017 21:08:54 +0200
Subject: clk: mvebu: Use kcalloc() in two functions

* Multiplications for the size determination of memory allocations
  indicated that array data structures should be processed.
  Thus use the corresponding function "kcalloc".

  This issue was detected by using the Coccinelle software.

* Replace the specification of data types by pointer dereferences
  to make the corresponding size determination a bit safer according to
  the Linux coding style convention.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/mvebu/common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/mvebu/common.c b/drivers/clk/mvebu/common.c
index 66be2e0c82b4..472c88b90256 100644
--- a/drivers/clk/mvebu/common.c
+++ b/drivers/clk/mvebu/common.c
@@ -126,7 +126,7 @@ void __init mvebu_coreclk_setup(struct device_node *np,
 	if (desc->get_refclk_freq)
 		clk_data.clk_num += 1;
 
-	clk_data.clks = kzalloc(clk_data.clk_num * sizeof(struct clk *),
+	clk_data.clks = kcalloc(clk_data.clk_num, sizeof(*clk_data.clks),
 				GFP_KERNEL);
 	if (WARN_ON(!clk_data.clks)) {
 		iounmap(base);
@@ -270,7 +270,7 @@ void __init mvebu_clk_gating_setup(struct device_node *np,
 		n++;
 
 	ctrl->num_gates = n;
-	ctrl->gates = kzalloc(ctrl->num_gates * sizeof(struct clk *),
+	ctrl->gates = kcalloc(ctrl->num_gates, sizeof(*ctrl->gates),
 			      GFP_KERNEL);
 	if (WARN_ON(!ctrl->gates))
 		goto gates_out;
-- 
cgit v1.2.3


From e73ef755fbb90b04f0cf494071b2be6bafcafbdc Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 Apr 2017 11:32:53 +0300
Subject: rtc: hid-sensor-time: remove some dead code

devm_rtc_device_register() doesn't ever return NULL so there is no need
to check.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-hid-sensor-time.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c
index c398f74234c6..2751dba850c6 100644
--- a/drivers/rtc/rtc-hid-sensor-time.c
+++ b/drivers/rtc/rtc-hid-sensor-time.c
@@ -291,9 +291,9 @@ static int hid_time_probe(struct platform_device *pdev)
 					"hid-sensor-time", &hid_time_rtc_ops,
 					THIS_MODULE);
 
-	if (IS_ERR_OR_NULL(time_state->rtc)) {
+	if (IS_ERR(time_state->rtc)) {
 		hid_device_io_stop(hsdev->hdev);
-		ret = time_state->rtc ? PTR_ERR(time_state->rtc) : -ENODEV;
+		ret = PTR_ERR(time_state->rtc);
 		time_state->rtc = NULL;
 		dev_err(&pdev->dev, "rtc device register failed!\n");
 		goto err_rtc;
-- 
cgit v1.2.3


From 5d05e81516cfe7606ee0cd8278fe225314dccfbe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Apr 2017 19:52:43 +0200
Subject: rtc: sh: mark PM functions as unused

The sh_rtc_set_irq_wake() function is only called from the suspend/resume handlers
that may be hidden, causing a harmless warning:

drivers/rtc/rtc-sh.c:724:13: error: 'sh_rtc_set_irq_wake' defined but not used [-Werror=unused-function]
 static void sh_rtc_set_irq_wake(struct device *dev, int enabled)

The most reliable way to avoid the warning is to remove the existing #ifdef
and mark the two functions as __maybe_unused so the compiler can silently
drop all three when there is no reference.

Fixes: dab5aec64bf5 ("rtc: sh: add support for rza series")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-sh.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 00b396e96cbe..6c2d3989f967 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -734,8 +734,7 @@ static void sh_rtc_set_irq_wake(struct device *dev, int enabled)
 	}
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int sh_rtc_suspend(struct device *dev)
+static int __maybe_unused sh_rtc_suspend(struct device *dev)
 {
 	if (device_may_wakeup(dev))
 		sh_rtc_set_irq_wake(dev, 1);
@@ -743,14 +742,13 @@ static int sh_rtc_suspend(struct device *dev)
 	return 0;
 }
 
-static int sh_rtc_resume(struct device *dev)
+static int __maybe_unused sh_rtc_resume(struct device *dev)
 {
 	if (device_may_wakeup(dev))
 		sh_rtc_set_irq_wake(dev, 0);
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume);
 
-- 
cgit v1.2.3


From 7dd47b95b0f54f2057d40af6e66d477e3fe95d13 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 21 Apr 2017 15:21:10 +0900
Subject: kbuild: consolidate redundant sed script ASM offset generation

This part ended up in redundant code after touched by multiple
people.

[1] Commit 3234282f33b2 ("x86, asm: Fix CFI macro invocations to
deal with shortcomings in gas") added parentheses for defined
expressions to support old gas for x86.

[2] Commit a22dcdb0032c ("x86, asm: Fix ancient-GAS workaround")
split the pattern into two to avoid parentheses for non-numeric
expressions.

[3] Commit 95a2f6f72d37 ("Partially revert patch that encloses
asm-offset.h numbers in brackets") removed parentheses from numeric
expressions as well because parentheses in MN10300 assembly have a
special meaning (pointer access).

Apparently, there is a conflict between [1] and [3].  After all,
[3] took precedence, and a long time has passed since then.

Now, merge the two patterns again because the first one is covered
by the other.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
---
 scripts/Makefile.lib | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index dd567e5d59e0..8109c133887d 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -415,7 +415,6 @@ cmd_xzmisc = (cat $(filter-out FORCE,$^) | \
 # Default sed regexp - multiline due to syntax constraints
 define sed-offsets
 	"/^->/{s:->#\(.*\):/* \1 */:; \
-	s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
 	s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
 	s:->::; p;}"
 endef
-- 
cgit v1.2.3


From cf0c3e68aa81f992b0301f62e341b710d385bf68 Mon Sep 17 00:00:00 2001
From: Jeroen Hofstee <jeroen@myspectrum.nl>
Date: Fri, 21 Apr 2017 15:21:11 +0900
Subject: kbuild: fix asm-offset generation to work with clang

KBuild abuses the asm statement to write to a file and
clang chokes about these invalid asm statements. Hack it
even more by fooling this is actual valid asm code.

[masahiro:
 Import Jeroen's work for U-Boot:
 http://patchwork.ozlabs.org/patch/375026/
 Tweak sed script a little to avoid garbage '#' for GCC case, like
 #define NR_PAGEFLAGS 23 /* __NR_PAGEFLAGS       # */ ]

Signed-off-by: Jeroen Hofstee <jeroen@myspectrum.nl>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Tested-by: Matthias Kaehlcke <mka@chromium.org>
---
 include/linux/kbuild.h | 6 +++---
 scripts/Makefile.lib   | 8 ++++++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/linux/kbuild.h b/include/linux/kbuild.h
index 22a72198c14b..4e80f3a9ad58 100644
--- a/include/linux/kbuild.h
+++ b/include/linux/kbuild.h
@@ -2,14 +2,14 @@
 #define __LINUX_KBUILD_H
 
 #define DEFINE(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+	asm volatile("\n.ascii \"->" #sym " %0 " #val "\"" : : "i" (val))
 
-#define BLANK() asm volatile("\n->" : : )
+#define BLANK() asm volatile("\n.ascii \"->\"" : : )
 
 #define OFFSET(sym, str, mem) \
 	DEFINE(sym, offsetof(struct str, mem))
 
 #define COMMENT(x) \
-	asm volatile("\n->#" x)
+	asm volatile("\n.ascii \"->#" x "\"")
 
 #endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 8109c133887d..2209ed696fb4 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -413,10 +413,14 @@ cmd_xzmisc = (cat $(filter-out FORCE,$^) | \
 # ---------------------------------------------------------------------------
 
 # Default sed regexp - multiline due to syntax constraints
+#
+# Use [:space:] because LLVM's integrated assembler inserts <tab> around
+# the .ascii directive whereas GCC keeps the <space> as-is.
 define sed-offsets
-	"/^->/{s:->#\(.*\):/* \1 */:; \
+	's:^[[:space:]]*\.ascii[[:space:]]*"\(.*\)".*:\1:; \
+	/^->/{s:->#\(.*\):/* \1 */:; \
 	s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
-	s:->::; p;}"
+	s:->::; p;}'
 endef
 
 # Use filechk to avoid rebuilds when a header changes, but the resulting file
-- 
cgit v1.2.3


From a0ae981eba8f07dbc74bce38fd3a462b69a5bc8e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 13 Apr 2017 07:25:21 +0900
Subject: kbuild: drop -Wno-unknown-warning-option from clang options

Since commit c3f0d0bc5b01 ("kbuild, LLVMLinux: Add -Werror to
cc-option to support clang"), cc-option and friends work nicely
for clang.

However, -Wno-unknown-warning-option makes clang happy with any
unknown warning options even if -Werror is specified.

Once -Wno-unknown-warning-option is added, any succeeding call of
cc-disable-warning is evaluated positive, then unknown warning
options are accepted.  This should be dropped.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile                   | 1 -
 scripts/Makefile.extrawarn | 1 -
 2 files changed, 2 deletions(-)

diff --git a/Makefile b/Makefile
index 245852f7b312..f178b9a7b56f 100644
--- a/Makefile
+++ b/Makefile
@@ -689,7 +689,6 @@ KBUILD_CFLAGS += $(stackp-flag)
 
 ifeq ($(cc-name),clang)
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
-KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 7c321a603b07..fb3522fd8702 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -64,7 +64,6 @@ ifeq ($(cc-name),clang)
 KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-value)
 KBUILD_CFLAGS += $(call cc-disable-warning, format)
-KBUILD_CFLAGS += $(call cc-disable-warning, unknown-warning-option)
 KBUILD_CFLAGS += $(call cc-disable-warning, sign-compare)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-zero-length)
 KBUILD_CFLAGS += $(call cc-disable-warning, uninitialized)
-- 
cgit v1.2.3


From 785f11aa595bc3d4e74096cbd598ada54ecc0d81 Mon Sep 17 00:00:00 2001
From: Behan Webster <behanw@converseincode.com>
Date: Fri, 21 Apr 2017 11:20:01 -0700
Subject: kbuild: Add better clang cross build support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add cross target to CC if using clang. Also add custom gcc toolchain
path for fallback gcc tools.

Clang will fallback to using things like ld, as, and libgcc if
(respectively) one of the llvm linkers isn't available, the integrated
assembler is turned off, or an appropriately cross-compiled version of
compiler-rt isn't available. To this end, you can specify the path to
this fallback gcc toolchain with GCC_TOOLCHAIN.

Signed-off-by: Behan Webster <behanw@converseincode.com>
Reviewed-by: Jan-Simon Möller <dl9pf@gmx.de>
Reviewed-by: Mark Charlebois <charlebm@gmail.com>
Signed-off-by: Greg Hackmann <ghackmann@google.com>
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Makefile b/Makefile
index f178b9a7b56f..88d607245791 100644
--- a/Makefile
+++ b/Makefile
@@ -688,6 +688,15 @@ endif
 KBUILD_CFLAGS += $(stackp-flag)
 
 ifeq ($(cc-name),clang)
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET	:= -target $(notdir $(CROSS_COMPILE:%-=%))
+GCC_TOOLCHAIN	:= $(realpath $(dir $(shell which $(LD)))/..)
+endif
+ifneq ($(GCC_TOOLCHAIN),)
+CLANG_GCC_TC	:= -gcc-toolchain $(GCC_TOOLCHAIN)
+endif
+KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
+KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
-- 
cgit v1.2.3


From cb9b323b5395d0d0cea2922d4f8ad588cf064f46 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Mon, 17 Apr 2017 23:02:01 +0200
Subject: thermal: broadcom: ns: specify myself as MODULE_AUTHOR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just in case someone uses modinfo to find (blame) me.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/broadcom/ns-thermal.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/thermal/broadcom/ns-thermal.c b/drivers/thermal/broadcom/ns-thermal.c
index 80ad32c6b2df..322e741a2463 100644
--- a/drivers/thermal/broadcom/ns-thermal.c
+++ b/drivers/thermal/broadcom/ns-thermal.c
@@ -101,5 +101,6 @@ static struct platform_driver ns_thermal_driver = {
 };
 module_platform_driver(ns_thermal_driver);
 
+MODULE_AUTHOR("Rafał Miłecki <rafal@milecki.pl>");
 MODULE_DESCRIPTION("Northstar thermal driver");
 MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 6892cf07e7337776c0b006db3b96a96bf071acb7 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Mon, 17 Apr 2017 23:04:16 +0200
Subject: thermal: bcm2835: move to the broadcom subdirectory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already have 2 Broadcom drivers and at least 1 more is coming. This
made us create broadcom subdirectory where bcm2835 should be moves now.

Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/Kconfig                    |   8 -
 drivers/thermal/Makefile                   |   1 -
 drivers/thermal/bcm2835_thermal.c          | 314 -----------------------------
 drivers/thermal/broadcom/Kconfig           |   8 +
 drivers/thermal/broadcom/Makefile          |   1 +
 drivers/thermal/broadcom/bcm2835_thermal.c | 314 +++++++++++++++++++++++++++++
 6 files changed, 323 insertions(+), 323 deletions(-)
 delete mode 100644 drivers/thermal/bcm2835_thermal.c
 create mode 100644 drivers/thermal/broadcom/bcm2835_thermal.c

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 6699843918fa..f786ae433032 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -468,12 +468,4 @@ config ZX2967_THERMAL
 	  the primitive temperature sensor embedded in zx2967 SoCs.
 	  This sensor generates the real time die temperature.
 
-config BCM2835_THERMAL
-	tristate "Thermal sensors on bcm2835 SoC"
-	depends on ARCH_BCM2835 || COMPILE_TEST
-	depends on HAS_IOMEM
-	depends on THERMAL_OF
-	help
-	  Support for thermal sensors on Broadcom bcm2835 SoCs.
-
 endif
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index a1e9b8b4e897..e6834061da28 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -60,4 +60,3 @@ obj-$(CONFIG_HISI_THERMAL)     += hisi_thermal.o
 obj-$(CONFIG_MTK_THERMAL)	+= mtk_thermal.o
 obj-$(CONFIG_GENERIC_ADC_THERMAL)	+= thermal-generic-adc.o
 obj-$(CONFIG_ZX2967_THERMAL)	+= zx2967_thermal.o
-obj-$(CONFIG_BCM2835_THERMAL)	+= bcm2835_thermal.o
diff --git a/drivers/thermal/bcm2835_thermal.c b/drivers/thermal/bcm2835_thermal.c
deleted file mode 100644
index 0ecf80890c84..000000000000
--- a/drivers/thermal/bcm2835_thermal.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
- * Driver for Broadcom BCM2835 SoC temperature sensor
- *
- * Copyright (C) 2016 Martin Sperl
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/clk.h>
-#include <linux/debugfs.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
-#include <linux/thermal.h>
-
-#define BCM2835_TS_TSENSCTL			0x00
-#define BCM2835_TS_TSENSSTAT			0x04
-
-#define BCM2835_TS_TSENSCTL_PRWDW		BIT(0)
-#define BCM2835_TS_TSENSCTL_RSTB		BIT(1)
-
-/*
- * bandgap reference voltage in 6 mV increments
- * 000b = 1178 mV, 001b = 1184 mV, ... 111b = 1220 mV
- */
-#define BCM2835_TS_TSENSCTL_CTRL_BITS		3
-#define BCM2835_TS_TSENSCTL_CTRL_SHIFT		2
-#define BCM2835_TS_TSENSCTL_CTRL_MASK		    \
-	GENMASK(BCM2835_TS_TSENSCTL_CTRL_BITS +     \
-		BCM2835_TS_TSENSCTL_CTRL_SHIFT - 1, \
-		BCM2835_TS_TSENSCTL_CTRL_SHIFT)
-#define BCM2835_TS_TSENSCTL_CTRL_DEFAULT	1
-#define BCM2835_TS_TSENSCTL_EN_INT		BIT(5)
-#define BCM2835_TS_TSENSCTL_DIRECT		BIT(6)
-#define BCM2835_TS_TSENSCTL_CLR_INT		BIT(7)
-#define BCM2835_TS_TSENSCTL_THOLD_SHIFT		8
-#define BCM2835_TS_TSENSCTL_THOLD_BITS		10
-#define BCM2835_TS_TSENSCTL_THOLD_MASK		     \
-	GENMASK(BCM2835_TS_TSENSCTL_THOLD_BITS +     \
-		BCM2835_TS_TSENSCTL_THOLD_SHIFT - 1, \
-		BCM2835_TS_TSENSCTL_THOLD_SHIFT)
-/*
- * time how long the block to be asserted in reset
- * which based on a clock counter (TSENS clock assumed)
- */
-#define BCM2835_TS_TSENSCTL_RSTDELAY_SHIFT	18
-#define BCM2835_TS_TSENSCTL_RSTDELAY_BITS	8
-#define BCM2835_TS_TSENSCTL_REGULEN		BIT(26)
-
-#define BCM2835_TS_TSENSSTAT_DATA_BITS		10
-#define BCM2835_TS_TSENSSTAT_DATA_SHIFT		0
-#define BCM2835_TS_TSENSSTAT_DATA_MASK		     \
-	GENMASK(BCM2835_TS_TSENSSTAT_DATA_BITS +     \
-		BCM2835_TS_TSENSSTAT_DATA_SHIFT - 1, \
-		BCM2835_TS_TSENSSTAT_DATA_SHIFT)
-#define BCM2835_TS_TSENSSTAT_VALID		BIT(10)
-#define BCM2835_TS_TSENSSTAT_INTERRUPT		BIT(11)
-
-struct bcm2835_thermal_data {
-	struct thermal_zone_device *tz;
-	void __iomem *regs;
-	struct clk *clk;
-	struct dentry *debugfsdir;
-};
-
-static int bcm2835_thermal_adc2temp(u32 adc, int offset, int slope)
-{
-	return offset + slope * adc;
-}
-
-static int bcm2835_thermal_temp2adc(int temp, int offset, int slope)
-{
-	temp -= offset;
-	temp /= slope;
-
-	if (temp < 0)
-		temp = 0;
-	if (temp >= BIT(BCM2835_TS_TSENSSTAT_DATA_BITS))
-		temp = BIT(BCM2835_TS_TSENSSTAT_DATA_BITS) - 1;
-
-	return temp;
-}
-
-static int bcm2835_thermal_get_temp(void *d, int *temp)
-{
-	struct bcm2835_thermal_data *data = d;
-	u32 val = readl(data->regs + BCM2835_TS_TSENSSTAT);
-
-	if (!(val & BCM2835_TS_TSENSSTAT_VALID))
-		return -EIO;
-
-	val &= BCM2835_TS_TSENSSTAT_DATA_MASK;
-
-	*temp = bcm2835_thermal_adc2temp(
-		val,
-		thermal_zone_get_offset(data->tz),
-		thermal_zone_get_slope(data->tz));
-
-	return 0;
-}
-
-static const struct debugfs_reg32 bcm2835_thermal_regs[] = {
-	{
-		.name = "ctl",
-		.offset = 0
-	},
-	{
-		.name = "stat",
-		.offset = 4
-	}
-};
-
-static void bcm2835_thermal_debugfs(struct platform_device *pdev)
-{
-	struct thermal_zone_device *tz = platform_get_drvdata(pdev);
-	struct bcm2835_thermal_data *data = tz->devdata;
-	struct debugfs_regset32 *regset;
-
-	data->debugfsdir = debugfs_create_dir("bcm2835_thermal", NULL);
-	if (!data->debugfsdir)
-		return;
-
-	regset = devm_kzalloc(&pdev->dev, sizeof(*regset), GFP_KERNEL);
-	if (!regset)
-		return;
-
-	regset->regs = bcm2835_thermal_regs;
-	regset->nregs = ARRAY_SIZE(bcm2835_thermal_regs);
-	regset->base = data->regs;
-
-	debugfs_create_regset32("regset", 0444, data->debugfsdir, regset);
-}
-
-static struct thermal_zone_of_device_ops bcm2835_thermal_ops = {
-	.get_temp = bcm2835_thermal_get_temp,
-};
-
-/*
- * Note: as per Raspberry Foundation FAQ
- * (https://www.raspberrypi.org/help/faqs/#performanceOperatingTemperature)
- * the recommended temperature range for the SoC -40C to +85C
- * so the trip limit is set to 80C.
- * this applies to all the BCM283X SoC
- */
-
-static const struct of_device_id bcm2835_thermal_of_match_table[] = {
-	{
-		.compatible = "brcm,bcm2835-thermal",
-	},
-	{
-		.compatible = "brcm,bcm2836-thermal",
-	},
-	{
-		.compatible = "brcm,bcm2837-thermal",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, bcm2835_thermal_of_match_table);
-
-static int bcm2835_thermal_probe(struct platform_device *pdev)
-{
-	const struct of_device_id *match;
-	struct thermal_zone_device *tz;
-	struct bcm2835_thermal_data *data;
-	struct resource *res;
-	int err = 0;
-	u32 val;
-	unsigned long rate;
-
-	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	match = of_match_device(bcm2835_thermal_of_match_table,
-				&pdev->dev);
-	if (!match)
-		return -EINVAL;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	data->regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(data->regs)) {
-		err = PTR_ERR(data->regs);
-		dev_err(&pdev->dev, "Could not get registers: %d\n", err);
-		return err;
-	}
-
-	data->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(data->clk)) {
-		err = PTR_ERR(data->clk);
-		if (err != -EPROBE_DEFER)
-			dev_err(&pdev->dev, "Could not get clk: %d\n", err);
-		return err;
-	}
-
-	err = clk_prepare_enable(data->clk);
-	if (err)
-		return err;
-
-	rate = clk_get_rate(data->clk);
-	if ((rate < 1920000) || (rate > 5000000))
-		dev_warn(&pdev->dev,
-			 "Clock %pCn running at %pCr Hz is outside of the recommended range: 1.92 to 5MHz\n",
-			 data->clk, data->clk);
-
-	/* register of thermal sensor and get info from DT */
-	tz = thermal_zone_of_sensor_register(&pdev->dev, 0, data,
-					     &bcm2835_thermal_ops);
-	if (IS_ERR(tz)) {
-		err = PTR_ERR(tz);
-		dev_err(&pdev->dev,
-			"Failed to register the thermal device: %d\n",
-			err);
-		goto err_clk;
-	}
-
-	/*
-	 * right now the FW does set up the HW-block, so we are not
-	 * touching the configuration registers.
-	 * But if the HW is not enabled, then set it up
-	 * using "sane" values used by the firmware right now.
-	 */
-	val = readl(data->regs + BCM2835_TS_TSENSCTL);
-	if (!(val & BCM2835_TS_TSENSCTL_RSTB)) {
-		int trip_temp, offset, slope;
-
-		slope = thermal_zone_get_slope(tz);
-		offset = thermal_zone_get_offset(tz);
-		/*
-		 * For now we deal only with critical, otherwise
-		 * would need to iterate
-		 */
-		err = tz->ops->get_trip_temp(tz, 0, &trip_temp);
-		if (err < 0) {
-			err = PTR_ERR(tz);
-			dev_err(&pdev->dev,
-				"Not able to read trip_temp: %d\n",
-				err);
-			goto err_tz;
-		}
-
-		/* set bandgap reference voltage and enable voltage regulator */
-		val = (BCM2835_TS_TSENSCTL_CTRL_DEFAULT <<
-		       BCM2835_TS_TSENSCTL_CTRL_SHIFT) |
-		      BCM2835_TS_TSENSCTL_REGULEN;
-
-		/* use the recommended reset duration */
-		val |= (0xFE << BCM2835_TS_TSENSCTL_RSTDELAY_SHIFT);
-
-		/*  trip_adc value from info */
-		val |= bcm2835_thermal_temp2adc(trip_temp,
-						offset,
-						slope)
-			<< BCM2835_TS_TSENSCTL_THOLD_SHIFT;
-
-		/* write the value back to the register as 2 steps */
-		writel(val, data->regs + BCM2835_TS_TSENSCTL);
-		val |= BCM2835_TS_TSENSCTL_RSTB;
-		writel(val, data->regs + BCM2835_TS_TSENSCTL);
-	}
-
-	data->tz = tz;
-
-	platform_set_drvdata(pdev, tz);
-
-	bcm2835_thermal_debugfs(pdev);
-
-	return 0;
-err_tz:
-	thermal_zone_of_sensor_unregister(&pdev->dev, tz);
-err_clk:
-	clk_disable_unprepare(data->clk);
-
-	return err;
-}
-
-static int bcm2835_thermal_remove(struct platform_device *pdev)
-{
-	struct thermal_zone_device *tz = platform_get_drvdata(pdev);
-	struct bcm2835_thermal_data *data = tz->devdata;
-
-	debugfs_remove_recursive(data->debugfsdir);
-	thermal_zone_of_sensor_unregister(&pdev->dev, tz);
-	clk_disable_unprepare(data->clk);
-
-	return 0;
-}
-
-static struct platform_driver bcm2835_thermal_driver = {
-	.probe = bcm2835_thermal_probe,
-	.remove = bcm2835_thermal_remove,
-	.driver = {
-		.name = "bcm2835_thermal",
-		.of_match_table = bcm2835_thermal_of_match_table,
-	},
-};
-module_platform_driver(bcm2835_thermal_driver);
-
-MODULE_AUTHOR("Martin Sperl");
-MODULE_DESCRIPTION("Thermal driver for bcm2835 chip");
-MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/broadcom/Kconfig b/drivers/thermal/broadcom/Kconfig
index f0dea8a8e002..ab08af4654ef 100644
--- a/drivers/thermal/broadcom/Kconfig
+++ b/drivers/thermal/broadcom/Kconfig
@@ -1,3 +1,11 @@
+config BCM2835_THERMAL
+	tristate "Thermal sensors on bcm2835 SoC"
+	depends on ARCH_BCM2835 || COMPILE_TEST
+	depends on HAS_IOMEM
+	depends on THERMAL_OF
+	help
+	  Support for thermal sensors on Broadcom bcm2835 SoCs.
+
 config BCM_NS_THERMAL
 	tristate "Northstar thermal driver"
 	depends on ARCH_BCM_IPROC || COMPILE_TEST
diff --git a/drivers/thermal/broadcom/Makefile b/drivers/thermal/broadcom/Makefile
index 059df9a0ed69..c6f62e4fd0ee 100644
--- a/drivers/thermal/broadcom/Makefile
+++ b/drivers/thermal/broadcom/Makefile
@@ -1 +1,2 @@
+obj-$(CONFIG_BCM2835_THERMAL)		+= bcm2835_thermal.o
 obj-$(CONFIG_BCM_NS_THERMAL)		+= ns-thermal.o
diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c
new file mode 100644
index 000000000000..0ecf80890c84
--- /dev/null
+++ b/drivers/thermal/broadcom/bcm2835_thermal.c
@@ -0,0 +1,314 @@
+/*
+ * Driver for Broadcom BCM2835 SoC temperature sensor
+ *
+ * Copyright (C) 2016 Martin Sperl
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#define BCM2835_TS_TSENSCTL			0x00
+#define BCM2835_TS_TSENSSTAT			0x04
+
+#define BCM2835_TS_TSENSCTL_PRWDW		BIT(0)
+#define BCM2835_TS_TSENSCTL_RSTB		BIT(1)
+
+/*
+ * bandgap reference voltage in 6 mV increments
+ * 000b = 1178 mV, 001b = 1184 mV, ... 111b = 1220 mV
+ */
+#define BCM2835_TS_TSENSCTL_CTRL_BITS		3
+#define BCM2835_TS_TSENSCTL_CTRL_SHIFT		2
+#define BCM2835_TS_TSENSCTL_CTRL_MASK		    \
+	GENMASK(BCM2835_TS_TSENSCTL_CTRL_BITS +     \
+		BCM2835_TS_TSENSCTL_CTRL_SHIFT - 1, \
+		BCM2835_TS_TSENSCTL_CTRL_SHIFT)
+#define BCM2835_TS_TSENSCTL_CTRL_DEFAULT	1
+#define BCM2835_TS_TSENSCTL_EN_INT		BIT(5)
+#define BCM2835_TS_TSENSCTL_DIRECT		BIT(6)
+#define BCM2835_TS_TSENSCTL_CLR_INT		BIT(7)
+#define BCM2835_TS_TSENSCTL_THOLD_SHIFT		8
+#define BCM2835_TS_TSENSCTL_THOLD_BITS		10
+#define BCM2835_TS_TSENSCTL_THOLD_MASK		     \
+	GENMASK(BCM2835_TS_TSENSCTL_THOLD_BITS +     \
+		BCM2835_TS_TSENSCTL_THOLD_SHIFT - 1, \
+		BCM2835_TS_TSENSCTL_THOLD_SHIFT)
+/*
+ * time how long the block to be asserted in reset
+ * which based on a clock counter (TSENS clock assumed)
+ */
+#define BCM2835_TS_TSENSCTL_RSTDELAY_SHIFT	18
+#define BCM2835_TS_TSENSCTL_RSTDELAY_BITS	8
+#define BCM2835_TS_TSENSCTL_REGULEN		BIT(26)
+
+#define BCM2835_TS_TSENSSTAT_DATA_BITS		10
+#define BCM2835_TS_TSENSSTAT_DATA_SHIFT		0
+#define BCM2835_TS_TSENSSTAT_DATA_MASK		     \
+	GENMASK(BCM2835_TS_TSENSSTAT_DATA_BITS +     \
+		BCM2835_TS_TSENSSTAT_DATA_SHIFT - 1, \
+		BCM2835_TS_TSENSSTAT_DATA_SHIFT)
+#define BCM2835_TS_TSENSSTAT_VALID		BIT(10)
+#define BCM2835_TS_TSENSSTAT_INTERRUPT		BIT(11)
+
+struct bcm2835_thermal_data {
+	struct thermal_zone_device *tz;
+	void __iomem *regs;
+	struct clk *clk;
+	struct dentry *debugfsdir;
+};
+
+static int bcm2835_thermal_adc2temp(u32 adc, int offset, int slope)
+{
+	return offset + slope * adc;
+}
+
+static int bcm2835_thermal_temp2adc(int temp, int offset, int slope)
+{
+	temp -= offset;
+	temp /= slope;
+
+	if (temp < 0)
+		temp = 0;
+	if (temp >= BIT(BCM2835_TS_TSENSSTAT_DATA_BITS))
+		temp = BIT(BCM2835_TS_TSENSSTAT_DATA_BITS) - 1;
+
+	return temp;
+}
+
+static int bcm2835_thermal_get_temp(void *d, int *temp)
+{
+	struct bcm2835_thermal_data *data = d;
+	u32 val = readl(data->regs + BCM2835_TS_TSENSSTAT);
+
+	if (!(val & BCM2835_TS_TSENSSTAT_VALID))
+		return -EIO;
+
+	val &= BCM2835_TS_TSENSSTAT_DATA_MASK;
+
+	*temp = bcm2835_thermal_adc2temp(
+		val,
+		thermal_zone_get_offset(data->tz),
+		thermal_zone_get_slope(data->tz));
+
+	return 0;
+}
+
+static const struct debugfs_reg32 bcm2835_thermal_regs[] = {
+	{
+		.name = "ctl",
+		.offset = 0
+	},
+	{
+		.name = "stat",
+		.offset = 4
+	}
+};
+
+static void bcm2835_thermal_debugfs(struct platform_device *pdev)
+{
+	struct thermal_zone_device *tz = platform_get_drvdata(pdev);
+	struct bcm2835_thermal_data *data = tz->devdata;
+	struct debugfs_regset32 *regset;
+
+	data->debugfsdir = debugfs_create_dir("bcm2835_thermal", NULL);
+	if (!data->debugfsdir)
+		return;
+
+	regset = devm_kzalloc(&pdev->dev, sizeof(*regset), GFP_KERNEL);
+	if (!regset)
+		return;
+
+	regset->regs = bcm2835_thermal_regs;
+	regset->nregs = ARRAY_SIZE(bcm2835_thermal_regs);
+	regset->base = data->regs;
+
+	debugfs_create_regset32("regset", 0444, data->debugfsdir, regset);
+}
+
+static struct thermal_zone_of_device_ops bcm2835_thermal_ops = {
+	.get_temp = bcm2835_thermal_get_temp,
+};
+
+/*
+ * Note: as per Raspberry Foundation FAQ
+ * (https://www.raspberrypi.org/help/faqs/#performanceOperatingTemperature)
+ * the recommended temperature range for the SoC -40C to +85C
+ * so the trip limit is set to 80C.
+ * this applies to all the BCM283X SoC
+ */
+
+static const struct of_device_id bcm2835_thermal_of_match_table[] = {
+	{
+		.compatible = "brcm,bcm2835-thermal",
+	},
+	{
+		.compatible = "brcm,bcm2836-thermal",
+	},
+	{
+		.compatible = "brcm,bcm2837-thermal",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, bcm2835_thermal_of_match_table);
+
+static int bcm2835_thermal_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct thermal_zone_device *tz;
+	struct bcm2835_thermal_data *data;
+	struct resource *res;
+	int err = 0;
+	u32 val;
+	unsigned long rate;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	match = of_match_device(bcm2835_thermal_of_match_table,
+				&pdev->dev);
+	if (!match)
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(data->regs)) {
+		err = PTR_ERR(data->regs);
+		dev_err(&pdev->dev, "Could not get registers: %d\n", err);
+		return err;
+	}
+
+	data->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(data->clk)) {
+		err = PTR_ERR(data->clk);
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "Could not get clk: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(data->clk);
+	if (err)
+		return err;
+
+	rate = clk_get_rate(data->clk);
+	if ((rate < 1920000) || (rate > 5000000))
+		dev_warn(&pdev->dev,
+			 "Clock %pCn running at %pCr Hz is outside of the recommended range: 1.92 to 5MHz\n",
+			 data->clk, data->clk);
+
+	/* register of thermal sensor and get info from DT */
+	tz = thermal_zone_of_sensor_register(&pdev->dev, 0, data,
+					     &bcm2835_thermal_ops);
+	if (IS_ERR(tz)) {
+		err = PTR_ERR(tz);
+		dev_err(&pdev->dev,
+			"Failed to register the thermal device: %d\n",
+			err);
+		goto err_clk;
+	}
+
+	/*
+	 * right now the FW does set up the HW-block, so we are not
+	 * touching the configuration registers.
+	 * But if the HW is not enabled, then set it up
+	 * using "sane" values used by the firmware right now.
+	 */
+	val = readl(data->regs + BCM2835_TS_TSENSCTL);
+	if (!(val & BCM2835_TS_TSENSCTL_RSTB)) {
+		int trip_temp, offset, slope;
+
+		slope = thermal_zone_get_slope(tz);
+		offset = thermal_zone_get_offset(tz);
+		/*
+		 * For now we deal only with critical, otherwise
+		 * would need to iterate
+		 */
+		err = tz->ops->get_trip_temp(tz, 0, &trip_temp);
+		if (err < 0) {
+			err = PTR_ERR(tz);
+			dev_err(&pdev->dev,
+				"Not able to read trip_temp: %d\n",
+				err);
+			goto err_tz;
+		}
+
+		/* set bandgap reference voltage and enable voltage regulator */
+		val = (BCM2835_TS_TSENSCTL_CTRL_DEFAULT <<
+		       BCM2835_TS_TSENSCTL_CTRL_SHIFT) |
+		      BCM2835_TS_TSENSCTL_REGULEN;
+
+		/* use the recommended reset duration */
+		val |= (0xFE << BCM2835_TS_TSENSCTL_RSTDELAY_SHIFT);
+
+		/*  trip_adc value from info */
+		val |= bcm2835_thermal_temp2adc(trip_temp,
+						offset,
+						slope)
+			<< BCM2835_TS_TSENSCTL_THOLD_SHIFT;
+
+		/* write the value back to the register as 2 steps */
+		writel(val, data->regs + BCM2835_TS_TSENSCTL);
+		val |= BCM2835_TS_TSENSCTL_RSTB;
+		writel(val, data->regs + BCM2835_TS_TSENSCTL);
+	}
+
+	data->tz = tz;
+
+	platform_set_drvdata(pdev, tz);
+
+	bcm2835_thermal_debugfs(pdev);
+
+	return 0;
+err_tz:
+	thermal_zone_of_sensor_unregister(&pdev->dev, tz);
+err_clk:
+	clk_disable_unprepare(data->clk);
+
+	return err;
+}
+
+static int bcm2835_thermal_remove(struct platform_device *pdev)
+{
+	struct thermal_zone_device *tz = platform_get_drvdata(pdev);
+	struct bcm2835_thermal_data *data = tz->devdata;
+
+	debugfs_remove_recursive(data->debugfsdir);
+	thermal_zone_of_sensor_unregister(&pdev->dev, tz);
+	clk_disable_unprepare(data->clk);
+
+	return 0;
+}
+
+static struct platform_driver bcm2835_thermal_driver = {
+	.probe = bcm2835_thermal_probe,
+	.remove = bcm2835_thermal_remove,
+	.driver = {
+		.name = "bcm2835_thermal",
+		.of_match_table = bcm2835_thermal_of_match_table,
+	},
+};
+module_platform_driver(bcm2835_thermal_driver);
+
+MODULE_AUTHOR("Martin Sperl");
+MODULE_DESCRIPTION("Thermal driver for bcm2835 chip");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 05d7839aa290901429d8edcd8f7974c9df2bcaa5 Mon Sep 17 00:00:00 2001
From: Dawei Chien <dawei.chien@mediatek.com>
Date: Tue, 21 Feb 2017 20:26:52 +0800
Subject: thermal: mt8173: minor mtk_thermal.c cleanups

If thermal bank with 4 sensors, thermal driver should read TEMP_MSR3.

However, currently thermal driver would not read TEMP_MSR3 since mt8173
thermal driver only use 3 sensors on each thermal bank at the same time,
so this patch would not effect temperature.
Only if mt mt8173 thermal driver use 4 sensors on any thermal bank, would
read third sensor two times, and lose fourth sensor of vale.

cc: stable@vger.kernel.org
Fixes: b7cf0053738c ("thermal: Add Mediatek thermal driver for mt2701.")
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Dawei Chien <dawei.chien@mediatek.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/mtk_thermal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c
index 1aff7fde54b1..7737f14846f9 100644
--- a/drivers/thermal/mtk_thermal.c
+++ b/drivers/thermal/mtk_thermal.c
@@ -191,7 +191,7 @@ static const int mt8173_bank_data[MT8173_NUM_ZONES][3] = {
 };
 
 static const int mt8173_msr[MT8173_NUM_SENSORS_PER_ZONE] = {
-	TEMP_MSR0, TEMP_MSR1, TEMP_MSR2, TEMP_MSR2
+	TEMP_MSR0, TEMP_MSR1, TEMP_MSR2, TEMP_MSR3
 };
 
 static const int mt8173_adcpnp[MT8173_NUM_SENSORS_PER_ZONE] = {
-- 
cgit v1.2.3


From 68747c5f3d81771cfc8be2ae98f70a1aea9175c3 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Sun, 9 Apr 2017 09:41:30 +0800
Subject: dmaengine: ioat: use setup_timer

Use setup_timer() instead of init_timer() to simplify the code.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ioat/init.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index cc5259b881d4..6ad4384b3fa8 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -760,9 +760,7 @@ ioat_init_channel(struct ioatdma_device *ioat_dma,
 	dma_cookie_init(&ioat_chan->dma_chan);
 	list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels);
 	ioat_dma->idx[idx] = ioat_chan;
-	init_timer(&ioat_chan->timer);
-	ioat_chan->timer.function = ioat_timer_event;
-	ioat_chan->timer.data = data;
+	setup_timer(&ioat_chan->timer, ioat_timer_event, data);
 	tasklet_init(&ioat_chan->cleanup_task, ioat_cleanup_event, data);
 }
 
-- 
cgit v1.2.3


From 57192245bc074710ea1a128d39ecc429455ac815 Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Date: Fri, 21 Apr 2017 10:43:20 +0200
Subject: dmaengine: sun4i: fix invalid argument

The "pchans_used" field is an unsigned long array.

for_each_clear_bit_from() expects an unsigned long pointer,
not an array address.

$ make C=2 drivers/dma/sun4i-dma.o
  CHECK   drivers/dma/sun4i-dma.c
drivers/dma/sun4i-dma.c:241:9: warning: incorrect type in argument 1 (different base types)
drivers/dma/sun4i-dma.c:241:9:    expected unsigned long const *p
drivers/dma/sun4i-dma.c:241:9:    got unsigned long ( *<noident> )[1]

Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/sun4i-dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c
index 57aa227bfadb..f4ed3f17607c 100644
--- a/drivers/dma/sun4i-dma.c
+++ b/drivers/dma/sun4i-dma.c
@@ -238,7 +238,7 @@ static struct sun4i_dma_pchan *find_and_use_pchan(struct sun4i_dma_dev *priv,
 	}
 
 	spin_lock_irqsave(&priv->lock, flags);
-	for_each_clear_bit_from(i, &priv->pchans_used, max) {
+	for_each_clear_bit_from(i, priv->pchans_used, max) {
 		pchan = &pchans[i];
 		pchan->vchan = vchan;
 		set_bit(i, priv->pchans_used);
-- 
cgit v1.2.3


From 677df9d4615a2db6774cd0e8951bf7404b858b3b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 23 Apr 2017 09:22:05 -0700
Subject: srcu: Fix Kconfig botch when SRCU not selected

If the CONFIG_SRCU option is not selected, for example, when building
arch/tile allnoconfig, the following build errors appear:

	kernel/rcu/tree.o: In function `srcu_online_cpu':
	tree.c:(.text+0x4248): multiple definition of `srcu_online_cpu'
	kernel/rcu/srcutree.o:srcutree.c:(.text+0x2120): first defined here
	kernel/rcu/tree.o: In function `srcu_offline_cpu':
	tree.c:(.text+0x4250): multiple definition of `srcu_offline_cpu'
	kernel/rcu/srcutree.o:srcutree.c:(.text+0x2160): first defined here

The corresponding .config file shows CONFIG_TREE_SRCU=y, but no sign
of CONFIG_SRCU, which fatally confuses SRCU's #ifdefs, resulting in
the above errors.  The reason this occurs is the folowing line in
init/Kconfig's definition for TREE_SRCU:

	default y if !TINY_RCU && !CLASSIC_SRCU

If CONFIG_CLASSIC_SRCU=n, as it will be in for allnoconfig, and if
CONFIG_SMP=y, then we will get CONFIG_TREE_SRCU=y but no CONFIG_SRCU,
as seen in the .config file, and which will result in the above errors.
This error did not show up during rcutorture testing because rcutorture
forces CONFIG_SRCU=y, as it must to prevent build errors in rcutorture.c.

This commit therefore conditions TREE_SRCU (and TINY_SRCU, while it is
at it) with SRCU, like this:

	default y if SRCU && !TINY_RCU && !CLASSIC_SRCU

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20170423162205.GP3956@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 init/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index 4119a44e4157..fe72c12e06a5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -545,13 +545,13 @@ config CLASSIC_SRCU
 
 config TINY_SRCU
 	bool
-	default y if TINY_RCU && !CLASSIC_SRCU
+	default y if SRCU && TINY_RCU && !CLASSIC_SRCU
 	help
 	  This option selects the single-CPU non-preemptible version of SRCU.
 
 config TREE_SRCU
 	bool
-	default y if !TINY_RCU && !CLASSIC_SRCU
+	default y if SRCU && !TINY_RCU && !CLASSIC_SRCU
 	help
 	  This option selects the full-fledged version of SRCU.
 
-- 
cgit v1.2.3


From 73dbd4a4230216b6a5540a362edceae0c9b4876b Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sun, 23 Apr 2017 18:23:21 +0800
Subject: iommu/amd: Fix incorrect error handling in amd_iommu_bind_pasid()

In function amd_iommu_bind_pasid(), the control flow jumps
to label out_free when pasid_state->mm and mm is NULL. And
mmput(mm) is called.  In function mmput(mm), mm is
referenced without validation. This will result in a NULL
dereference bug. This patch fixes the bug.

Signed-off-by: Pan Bian <bianpan2016@163.com>
Fixes: f0aac63b873b ('iommu/amd: Don't hold a reference to mm_struct')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_v2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 063343909b0d..6629c472eafd 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -696,9 +696,9 @@ out_clear_state:
 
 out_unregister:
 	mmu_notifier_unregister(&pasid_state->mn, mm);
+	mmput(mm);
 
 out_free:
-	mmput(mm);
 	free_pasid_state(pasid_state);
 
 out:
-- 
cgit v1.2.3


From f62e5f613e124911b955e4a804a5fceb37c82993 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Sat, 22 Apr 2017 09:18:03 +0800
Subject: dmaengine: dmatest: use offset_in_page() macro

Use offset_in_page() macro instead of open-coding.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dmatest.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 54d581d407aa..d042b2b4965e 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -585,7 +585,7 @@ static int dmatest_func(void *data)
 		for (i = 0; i < src_cnt; i++) {
 			void *buf = thread->srcs[i];
 			struct page *pg = virt_to_page(buf);
-			unsigned pg_off = (unsigned long) buf & ~PAGE_MASK;
+			unsigned long pg_off = offset_in_page(buf);
 
 			um->addr[i] = dma_map_page(dev->dev, pg, pg_off,
 						   um->len, DMA_TO_DEVICE);
@@ -605,7 +605,7 @@ static int dmatest_func(void *data)
 		for (i = 0; i < dst_cnt; i++) {
 			void *buf = thread->dsts[i];
 			struct page *pg = virt_to_page(buf);
-			unsigned pg_off = (unsigned long) buf & ~PAGE_MASK;
+			unsigned long pg_off = offset_in_page(buf);
 
 			dsts[i] = dma_map_page(dev->dev, pg, pg_off, um->len,
 					       DMA_BIDIRECTIONAL);
-- 
cgit v1.2.3


From b70e52cacb3bff1cc6fae36a89e0fa9d33fe31a7 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Sat, 22 Apr 2017 09:18:04 +0800
Subject: dmaengine: mv_xor: use offset_in_page() macro

Use offset_in_page() macro instead of open-coding.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/mv_xor.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index ea53b879859c..25bc5b103aa2 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -960,7 +960,7 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 	}
 
 	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src),
-			       (size_t)src & ~PAGE_MASK, PAGE_SIZE,
+			       offset_in_page(src), PAGE_SIZE,
 			       DMA_TO_DEVICE);
 	unmap->addr[0] = src_dma;
 
@@ -972,7 +972,7 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 	unmap->to_cnt = 1;
 
 	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest),
-				(size_t)dest & ~PAGE_MASK, PAGE_SIZE,
+				offset_in_page(dest), PAGE_SIZE,
 				DMA_FROM_DEVICE);
 	unmap->addr[1] = dest_dma;
 
-- 
cgit v1.2.3


From e57d05520f9c89cf7616fc8c65dafb1ceb7a4c63 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Sat, 22 Apr 2017 09:18:05 +0800
Subject: dma-debug: use offset_in_page() macro

Use offset_in_page() macro instead of open-coding.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 lib/dma-debug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index b157b46cc9a6..cd5a5a426ef1 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1502,7 +1502,7 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
 	entry->type      = dma_debug_coherent;
 	entry->dev       = dev;
 	entry->pfn	 = page_to_pfn(virt_to_page(virt));
-	entry->offset	 = (size_t) virt & ~PAGE_MASK;
+	entry->offset	 = offset_in_page(virt);
 	entry->size      = size;
 	entry->dev_addr  = dma_addr;
 	entry->direction = DMA_BIDIRECTIONAL;
@@ -1518,7 +1518,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
 		.type           = dma_debug_coherent,
 		.dev            = dev,
 		.pfn		= page_to_pfn(virt_to_page(virt)),
-		.offset		= (size_t) virt & ~PAGE_MASK,
+		.offset		= offset_in_page(virt),
 		.dev_addr       = addr,
 		.size           = size,
 		.direction      = DMA_BIDIRECTIONAL,
-- 
cgit v1.2.3


From d160a727c40e7175aa642137910a3fda46262fc8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 23 Apr 2017 12:50:59 -0700
Subject: srcu: Make SRCU be built by default

SRCU is optional, and included only if there is a "select SRCU" in effect.
However, we now have Tiny SRCU, so this commit defaults CONFIG_SRCU=y.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 init/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/init/Kconfig b/init/Kconfig
index fe72c12e06a5..42a346b0df43 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -521,6 +521,7 @@ config RCU_EXPERT
 
 config SRCU
 	bool
+	default y
 	help
 	  This option selects the sleepable version of RCU. This version
 	  permits arbitrary sleeping or blocking within RCU read-side critical
-- 
cgit v1.2.3


From 1d349923857d7b4e1892d48b7c4e00ebaf635df9 Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@linaro.org>
Date: Mon, 24 Apr 2017 14:07:40 +0300
Subject: builddeb: Update a few outdated and hardcoded strings

The builddeb script has some hardcoded references to Linux version 2.6
which is ancient. Drop Provides as the virtual packages provided are not
useful anymore. Leave the Provides for linux-kernel-headers, as someone
might still be referring to it.

While at it, updated copyright date and drop Standards-Version: since
the package isn't Debian Standards compliant anyways.

Cc: Timo Sigurdsson <public_timo.s@silentcreek.de>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/package/builddeb | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index f04e91f0845d..c0107f581c5d 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -260,7 +260,7 @@ This is a packacked upstream version of the Linux kernel.
 The sources may be found at most Linux ftp sites, including:
 ftp://ftp.kernel.org/pub/linux/kernel
 
-Copyright: 1991 - 2015 Linus Torvalds and others.
+Copyright: 1991 - 2017 Linus Torvalds and others.
 
 The git repository for mainline kernel development is at:
 git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
@@ -283,7 +283,6 @@ Section: kernel
 Priority: optional
 Maintainer: $maintainer
 Build-Depends: $build_depends
-Standards-Version: 3.8.4
 Homepage: http://www.kernel.org/
 EOF
 
@@ -291,7 +290,6 @@ if [ "$ARCH" = "um" ]; then
 	cat <<EOF >> debian/control
 
 Package: $packagename
-Provides: linux-image, linux-image-2.6, linux-modules-$version
 Architecture: any
 Description: User Mode Linux kernel, version $version
  User-mode Linux is a port of the Linux kernel to its own system call
@@ -308,7 +306,6 @@ else
 	cat <<EOF >> debian/control
 
 Package: $packagename
-Provides: linux-image, linux-image-2.6, linux-modules-$version
 Suggests: $fwpackagename
 Architecture: any
 Description: Linux kernel, version $version
@@ -341,7 +338,6 @@ rm -f "$objtree/debian/hdrsrcfiles" "$objtree/debian/hdrobjfiles"
 cat <<EOF >> debian/control
 
 Package: $kernel_headers_packagename
-Provides: linux-headers, linux-headers-2.6
 Architecture: any
 Description: Linux kernel headers for $KERNELRELEASE on \${kernel:debarch}
  This package provides kernel header files for $KERNELRELEASE on \${kernel:debarch}
@@ -399,7 +395,6 @@ if [ -n "$BUILD_DEBUG" ] ; then
 
 Package: $dbg_packagename
 Section: debug
-Provides: linux-debug, linux-debug-$version
 Architecture: any
 Description: Linux kernel debugging symbols for $version
  This package will come in handy if you need to debug the kernel. It provides
-- 
cgit v1.2.3


From 9eb3c95896a337256489124857157f49616d2c6b Mon Sep 17 00:00:00 2001
From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Date: Mon, 24 Apr 2017 14:07:41 +0300
Subject: builddeb: fix typo

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/package/builddeb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index c0107f581c5d..3407826f2a99 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -69,7 +69,7 @@ set_debarch() {
 		echo "" >&2
 		echo "** ** **  WARNING  ** ** **" >&2
 		echo "" >&2
-		echo "Your architecture doesn't have it's equivalent" >&2
+		echo "Your architecture doesn't have its equivalent" >&2
 		echo "Debian userspace architecture defined!" >&2
 		echo "Falling back to using your current userspace instead!" >&2
 		echo "Please add support for $UTS_MACHINE to ${0} ..." >&2
-- 
cgit v1.2.3


From 433db3e260bc8134d4a46ddf20b3668937e12556 Mon Sep 17 00:00:00 2001
From: Vinícius Tinti <viniciustinti@gmail.com>
Date: Mon, 24 Apr 2017 13:04:58 -0700
Subject: kbuild: Add support to generate LLVM assembly files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add rules to kbuild in order to generate LLVM assembly files with the .ll
extension when using clang.

  # from c code
  make CC=clang kernel/pid.ll

Signed-off-by: Vinícius Tinti <viniciustinti@gmail.com>
Signed-off-by: Behan Webster <behanw@converseincode.com>
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 .gitignore             | 1 +
 Makefile               | 5 +++++
 scripts/Makefile.build | 8 ++++++++
 3 files changed, 14 insertions(+)

diff --git a/.gitignore b/.gitignore
index c2ed4ecb0acd..0c39aa20b6ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,6 +33,7 @@
 *.lzo
 *.patch
 *.gcno
+*.ll
 modules.builtin
 Module.symvers
 *.dwo
diff --git a/Makefile b/Makefile
index 88d607245791..439443176161 100644
--- a/Makefile
+++ b/Makefile
@@ -1373,6 +1373,8 @@ help:
 	@echo  '                    (default: $$(INSTALL_MOD_PATH)/lib/firmware)'
 	@echo  '  dir/            - Build all files in dir and below'
 	@echo  '  dir/file.[ois]  - Build specified target only'
+	@echo  '  dir/file.ll     - Build the LLVM assembly file'
+	@echo  '                    (requires compiler support for LLVM assembly generation)'
 	@echo  '  dir/file.lst    - Build specified mixed source/assembly target only'
 	@echo  '                    (requires a recent binutils and recent build (System.map))'
 	@echo  '  dir/file.ko     - Build module including final link'
@@ -1557,6 +1559,7 @@ clean: $(clean-dirs)
 		-o -name '*.symtypes' -o -name 'modules.order' \
 		-o -name modules.builtin -o -name '.tmp_*.o.*' \
 		-o -name '*.c.[012]*.*' \
+		-o -name '*.ll' \
 		-o -name '*.gcno' \) -type f -print | xargs rm -f
 
 # Generate tags for editors
@@ -1660,6 +1663,8 @@ endif
 	$(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
 %.symtypes: %.c prepare scripts FORCE
 	$(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
+%.ll: %.c prepare scripts FORCE
+	$(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
 
 # Modules
 /: prepare scripts FORCE
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index d883116ebaa4..e5f1425c601f 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -177,6 +177,14 @@ cmd_cc_symtypes_c =                                                         \
 $(obj)/%.symtypes : $(src)/%.c FORCE
 	$(call cmd,cc_symtypes_c)
 
+# LLVM assembly
+# Generate .ll files from .c
+quiet_cmd_cc_ll_c = CC $(quiet_modtag)  $@
+      cmd_cc_ll_c = $(CC) $(c_flags) -emit-llvm -S -o $@ $<
+
+$(obj)/%.ll: $(src)/%.c FORCE
+	$(call if_changed_dep,cc_ll_c)
+
 # C (.c) files
 # The C file is compiled and updated dependency information is generated.
 # (See cmd_cc_o_c + relevant part of rule_cc_o_c)
-- 
cgit v1.2.3


From f88fc122cc34c2545dec9562eaab121494e401ef Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 16 Mar 2017 09:02:40 +0100
Subject: mtd: nand: Cleanup/rework the atmel_nand driver

This is a complete rewrite of the driver whose main purpose is to
support the new DT representation where the NAND controller node is now
really visible in the DT and appears under the EBI bus. With this new
representation, we can add other devices under the EBI bus without
risking pinmuxing conflicts (the NAND controller is under the EBI
bus logic and as such, share some of its pins with other devices
connected on this bus).

Even though the goal of this rework was not necessarily to add new
features, the new driver has been designed with this in mind. With a
clearer separation between the different blocks and different IP
revisions, adding new functionalities should be easier (we already
have plans to support SMC timing configuration so that we no longer
have to rely on the configuration done by the bootloader/bootstrap).

Also note that we no longer have a custom ->cmdfunc() implementation,
which means we can now benefit from new features added in the core
implementation for free (support for new NAND operations for example).

The last thing that we gain with this rework is support for multi-chips
and multi-dies chips, thanks to the clean NAND controller <-> NAND
devices representation.

During this transition we also dropped support for AVR32 SoCs which
should soon disappear from mainline (removal of the AVR32 arch is
planned for 4.12).

This new driver has been tested on several platforms (at91sam9261,
at91sam9g45, at91sam9x5, sama5d3 and sama5d4) to make sure it did not
introduce regressions, and it's worth mentioning that old bindings are
still supported (which partly explain the positive diffstat).

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
---
 MAINTAINERS                              |    2 +-
 drivers/mtd/nand/Kconfig                 |    6 +-
 drivers/mtd/nand/Makefile                |    2 +-
 drivers/mtd/nand/atmel/Makefile          |    4 +
 drivers/mtd/nand/atmel/nand-controller.c | 2196 ++++++++++++++++++++++++++
 drivers/mtd/nand/atmel/pmecc.c           | 1020 ++++++++++++
 drivers/mtd/nand/atmel/pmecc.h           |   73 +
 drivers/mtd/nand/atmel_nand.c            | 2479 ------------------------------
 drivers/mtd/nand/atmel_nand_ecc.h        |  163 --
 drivers/mtd/nand/atmel_nand_nfc.h        |  103 --
 10 files changed, 3298 insertions(+), 2750 deletions(-)
 create mode 100644 drivers/mtd/nand/atmel/Makefile
 create mode 100644 drivers/mtd/nand/atmel/nand-controller.c
 create mode 100644 drivers/mtd/nand/atmel/pmecc.c
 create mode 100644 drivers/mtd/nand/atmel/pmecc.h
 delete mode 100644 drivers/mtd/nand/atmel_nand.c
 delete mode 100644 drivers/mtd/nand/atmel_nand_ecc.h
 delete mode 100644 drivers/mtd/nand/atmel_nand_nfc.h

diff --git a/MAINTAINERS b/MAINTAINERS
index c265a5fe4848..9a84d0022d57 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2244,7 +2244,7 @@ M:	Wenyou Yang <wenyou.yang@atmel.com>
 M:	Josh Wu <rainyfeeling@outlook.com>
 L:	linux-mtd@lists.infradead.org
 S:	Supported
-F:	drivers/mtd/nand/atmel_nand*
+F:	drivers/mtd/nand/atmel/*
 
 ATMEL SDMMC DRIVER
 M:	Ludovic Desroches <ludovic.desroches@microchip.com>
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index aa44fb0a1b18..c3029528063b 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -306,11 +306,11 @@ config MTD_NAND_CS553X
 	  If you say "m", the module will be called cs553x_nand.
 
 config MTD_NAND_ATMEL
-	tristate "Support for NAND Flash / SmartMedia on AT91 and AVR32"
-	depends on ARCH_AT91 || AVR32
+	tristate "Support for NAND Flash / SmartMedia on AT91"
+	depends on ARCH_AT91
 	help
 	  Enables support for NAND Flash / Smart Media Card interface
-	  on Atmel AT91 and AVR32 processors.
+	  on Atmel AT91 processors.
 
 config MTD_NAND_PXA3xx
 	tristate "NAND support on PXA3xx and Armada 370/XP"
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 098b8791f10a..ade5fc4c3819 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -24,7 +24,7 @@ obj-$(CONFIG_MTD_NAND_SHARPSL)		+= sharpsl.o
 obj-$(CONFIG_MTD_NAND_NANDSIM)		+= nandsim.o
 obj-$(CONFIG_MTD_NAND_CS553X)		+= cs553x_nand.o
 obj-$(CONFIG_MTD_NAND_NDFC)		+= ndfc.o
-obj-$(CONFIG_MTD_NAND_ATMEL)		+= atmel_nand.o
+obj-$(CONFIG_MTD_NAND_ATMEL)		+= atmel/
 obj-$(CONFIG_MTD_NAND_GPIO)		+= gpio.o
 omap2_nand-objs := omap2.o
 obj-$(CONFIG_MTD_NAND_OMAP2) 		+= omap2_nand.o
diff --git a/drivers/mtd/nand/atmel/Makefile b/drivers/mtd/nand/atmel/Makefile
new file mode 100644
index 000000000000..288db4f38a8f
--- /dev/null
+++ b/drivers/mtd/nand/atmel/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_MTD_NAND_ATMEL)	+= atmel-nand-controller.o atmel-pmecc.o
+
+atmel-nand-controller-objs	:= nand-controller.o
+atmel-pmecc-objs		:= pmecc.o
diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
new file mode 100644
index 000000000000..80e2459f92f8
--- /dev/null
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -0,0 +1,2196 @@
+/*
+ * Copyright 2017 ATMEL
+ * Copyright 2017 Free Electrons
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * Derived from the atmel_nand.c driver which contained the following
+ * copyrights:
+ *
+ *   Copyright 2003 Rick Bronson
+ *
+ *   Derived from drivers/mtd/nand/autcpu12.c
+ *	Copyright 2001 Thomas Gleixner (gleixner@autronix.de)
+ *
+ *   Derived from drivers/mtd/spia.c
+ *	Copyright 2000 Steven J. Hill (sjhill@cotw.com)
+ *
+ *
+ *   Add Hardware ECC support for AT91SAM9260 / AT91SAM9263
+ *	Richard Genoud (richard.genoud@gmail.com), Adeneo Copyright 2007
+ *
+ *   Derived from Das U-Boot source code
+ *	(u-boot-1.1.5/board/atmel/at91sam9263ek/nand.c)
+ *	Copyright 2006 ATMEL Rousset, Lacressonniere Nicolas
+ *
+ *   Add Programmable Multibit ECC support for various AT91 SoC
+ *	Copyright 2012 ATMEL, Hong Xu
+ *
+ *   Add Nand Flash Controller support for SAMA5 SoC
+ *	Copyright 2013 ATMEL, Josh Wu (josh.wu@atmel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * A few words about the naming convention in this file. This convention
+ * applies to structure and function names.
+ *
+ * Prefixes:
+ *
+ * - atmel_nand_: all generic structures/functions
+ * - atmel_smc_nand_: all structures/functions specific to the SMC interface
+ *		      (at91sam9 and avr32 SoCs)
+ * - atmel_hsmc_nand_: all structures/functions specific to the HSMC interface
+ *		       (sama5 SoCs and later)
+ * - atmel_nfc_: all structures/functions used to manipulate the NFC sub-block
+ *		 that is available in the HSMC block
+ * - <soc>_nand_: all SoC specific structures/functions
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/genalloc.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/atmel-matrix.h>
+#include <linux/module.h>
+#include <linux/mtd/nand.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/atmel.h>
+#include <linux/regmap.h>
+
+#include "pmecc.h"
+
+#define ATMEL_HSMC_NFC_CFG			0x0
+#define ATMEL_HSMC_NFC_CFG_SPARESIZE(x)		(((x) / 4) << 24)
+#define ATMEL_HSMC_NFC_CFG_SPARESIZE_MASK	GENMASK(30, 24)
+#define ATMEL_HSMC_NFC_CFG_DTO(cyc, mul)	(((cyc) << 16) | ((mul) << 20))
+#define ATMEL_HSMC_NFC_CFG_DTO_MAX		GENMASK(22, 16)
+#define ATMEL_HSMC_NFC_CFG_RBEDGE		BIT(13)
+#define ATMEL_HSMC_NFC_CFG_FALLING_EDGE		BIT(12)
+#define ATMEL_HSMC_NFC_CFG_RSPARE		BIT(9)
+#define ATMEL_HSMC_NFC_CFG_WSPARE		BIT(8)
+#define ATMEL_HSMC_NFC_CFG_PAGESIZE_MASK	GENMASK(2, 0)
+#define ATMEL_HSMC_NFC_CFG_PAGESIZE(x)		(fls((x) / 512) - 1)
+
+#define ATMEL_HSMC_NFC_CTRL			0x4
+#define ATMEL_HSMC_NFC_CTRL_EN			BIT(0)
+#define ATMEL_HSMC_NFC_CTRL_DIS			BIT(1)
+
+#define ATMEL_HSMC_NFC_SR			0x8
+#define ATMEL_HSMC_NFC_IER			0xc
+#define ATMEL_HSMC_NFC_IDR			0x10
+#define ATMEL_HSMC_NFC_IMR			0x14
+#define ATMEL_HSMC_NFC_SR_ENABLED		BIT(1)
+#define ATMEL_HSMC_NFC_SR_RB_RISE		BIT(4)
+#define ATMEL_HSMC_NFC_SR_RB_FALL		BIT(5)
+#define ATMEL_HSMC_NFC_SR_BUSY			BIT(8)
+#define ATMEL_HSMC_NFC_SR_WR			BIT(11)
+#define ATMEL_HSMC_NFC_SR_CSID			GENMASK(14, 12)
+#define ATMEL_HSMC_NFC_SR_XFRDONE		BIT(16)
+#define ATMEL_HSMC_NFC_SR_CMDDONE		BIT(17)
+#define ATMEL_HSMC_NFC_SR_DTOE			BIT(20)
+#define ATMEL_HSMC_NFC_SR_UNDEF			BIT(21)
+#define ATMEL_HSMC_NFC_SR_AWB			BIT(22)
+#define ATMEL_HSMC_NFC_SR_NFCASE		BIT(23)
+#define ATMEL_HSMC_NFC_SR_ERRORS		(ATMEL_HSMC_NFC_SR_DTOE | \
+						 ATMEL_HSMC_NFC_SR_UNDEF | \
+						 ATMEL_HSMC_NFC_SR_AWB | \
+						 ATMEL_HSMC_NFC_SR_NFCASE)
+#define ATMEL_HSMC_NFC_SR_RBEDGE(x)		BIT((x) + 24)
+
+#define ATMEL_HSMC_NFC_ADDR			0x18
+#define ATMEL_HSMC_NFC_BANK			0x1c
+
+#define ATMEL_NFC_MAX_RB_ID			7
+
+#define ATMEL_NFC_SRAM_SIZE			0x2400
+
+#define ATMEL_NFC_CMD(pos, cmd)			((cmd) << (((pos) * 8) + 2))
+#define ATMEL_NFC_VCMD2				BIT(18)
+#define ATMEL_NFC_ACYCLE(naddrs)		((naddrs) << 19)
+#define ATMEL_NFC_CSID(cs)			((cs) << 22)
+#define ATMEL_NFC_DATAEN			BIT(25)
+#define ATMEL_NFC_NFCWR				BIT(26)
+
+#define ATMEL_NFC_MAX_ADDR_CYCLES		5
+
+#define ATMEL_NAND_ALE_OFFSET			BIT(21)
+#define ATMEL_NAND_CLE_OFFSET			BIT(22)
+
+#define DEFAULT_TIMEOUT_MS			1000
+#define MIN_DMA_LEN				128
+
+enum atmel_nand_rb_type {
+	ATMEL_NAND_NO_RB,
+	ATMEL_NAND_NATIVE_RB,
+	ATMEL_NAND_GPIO_RB,
+};
+
+struct atmel_nand_rb {
+	enum atmel_nand_rb_type type;
+	union {
+		struct gpio_desc *gpio;
+		int id;
+	};
+};
+
+struct atmel_nand_cs {
+	int id;
+	struct atmel_nand_rb rb;
+	struct gpio_desc *csgpio;
+	struct {
+		void __iomem *virt;
+		dma_addr_t dma;
+	} io;
+};
+
+struct atmel_nand {
+	struct list_head node;
+	struct device *dev;
+	struct nand_chip base;
+	struct atmel_nand_cs *activecs;
+	struct atmel_pmecc_user *pmecc;
+	struct gpio_desc *cdgpio;
+	int numcs;
+	struct atmel_nand_cs cs[];
+};
+
+static inline struct atmel_nand *to_atmel_nand(struct nand_chip *chip)
+{
+	return container_of(chip, struct atmel_nand, base);
+}
+
+enum atmel_nfc_data_xfer {
+	ATMEL_NFC_NO_DATA,
+	ATMEL_NFC_READ_DATA,
+	ATMEL_NFC_WRITE_DATA,
+};
+
+struct atmel_nfc_op {
+	u8 cs;
+	u8 ncmds;
+	u8 cmds[2];
+	u8 naddrs;
+	u8 addrs[5];
+	enum atmel_nfc_data_xfer data;
+	u32 wait;
+	u32 errors;
+};
+
+struct atmel_nand_controller;
+struct atmel_nand_controller_caps;
+
+struct atmel_nand_controller_ops {
+	int (*probe)(struct platform_device *pdev,
+		     const struct atmel_nand_controller_caps *caps);
+	int (*remove)(struct atmel_nand_controller *nc);
+	void (*nand_init)(struct atmel_nand_controller *nc,
+			  struct atmel_nand *nand);
+	int (*ecc_init)(struct atmel_nand *nand);
+};
+
+struct atmel_nand_controller_caps {
+	bool has_dma;
+	bool legacy_of_bindings;
+	u32 ale_offs;
+	u32 cle_offs;
+	const struct atmel_nand_controller_ops *ops;
+};
+
+struct atmel_nand_controller {
+	struct nand_hw_control base;
+	const struct atmel_nand_controller_caps *caps;
+	struct device *dev;
+	struct regmap *smc;
+	struct dma_chan *dmac;
+	struct atmel_pmecc *pmecc;
+	struct list_head chips;
+	struct clk *mck;
+};
+
+static inline struct atmel_nand_controller *
+to_nand_controller(struct nand_hw_control *ctl)
+{
+	return container_of(ctl, struct atmel_nand_controller, base);
+}
+
+struct atmel_smc_nand_controller {
+	struct atmel_nand_controller base;
+	struct regmap *matrix;
+	unsigned int ebi_csa_offs;
+};
+
+static inline struct atmel_smc_nand_controller *
+to_smc_nand_controller(struct nand_hw_control *ctl)
+{
+	return container_of(to_nand_controller(ctl),
+			    struct atmel_smc_nand_controller, base);
+}
+
+struct atmel_hsmc_nand_controller {
+	struct atmel_nand_controller base;
+	struct {
+		struct gen_pool *pool;
+		void __iomem *virt;
+		dma_addr_t dma;
+	} sram;
+	struct regmap *io;
+	struct atmel_nfc_op op;
+	struct completion complete;
+	int irq;
+
+	/* Only used when instantiating from legacy DT bindings. */
+	struct clk *clk;
+};
+
+static inline struct atmel_hsmc_nand_controller *
+to_hsmc_nand_controller(struct nand_hw_control *ctl)
+{
+	return container_of(to_nand_controller(ctl),
+			    struct atmel_hsmc_nand_controller, base);
+}
+
+static bool atmel_nfc_op_done(struct atmel_nfc_op *op, u32 status)
+{
+	op->errors |= status & ATMEL_HSMC_NFC_SR_ERRORS;
+	op->wait ^= status & op->wait;
+
+	return !op->wait || op->errors;
+}
+
+static irqreturn_t atmel_nfc_interrupt(int irq, void *data)
+{
+	struct atmel_hsmc_nand_controller *nc = data;
+	u32 sr, rcvd;
+	bool done;
+
+	regmap_read(nc->base.smc, ATMEL_HSMC_NFC_SR, &sr);
+
+	rcvd = sr & (nc->op.wait | ATMEL_HSMC_NFC_SR_ERRORS);
+	done = atmel_nfc_op_done(&nc->op, sr);
+
+	if (rcvd)
+		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_IDR, rcvd);
+
+	if (done)
+		complete(&nc->complete);
+
+	return rcvd ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int atmel_nfc_wait(struct atmel_hsmc_nand_controller *nc, bool poll,
+			  unsigned int timeout_ms)
+{
+	int ret;
+
+	if (!timeout_ms)
+		timeout_ms = DEFAULT_TIMEOUT_MS;
+
+	if (poll) {
+		u32 status;
+
+		ret = regmap_read_poll_timeout(nc->base.smc,
+					       ATMEL_HSMC_NFC_SR, status,
+					       atmel_nfc_op_done(&nc->op,
+								 status),
+					       0, timeout_ms * 1000);
+	} else {
+		init_completion(&nc->complete);
+		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_IER,
+			     nc->op.wait | ATMEL_HSMC_NFC_SR_ERRORS);
+		ret = wait_for_completion_timeout(&nc->complete,
+						msecs_to_jiffies(timeout_ms));
+		if (!ret)
+			ret = -ETIMEDOUT;
+		else
+			ret = 0;
+
+		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_IDR, 0xffffffff);
+	}
+
+	if (nc->op.errors & ATMEL_HSMC_NFC_SR_DTOE) {
+		dev_err(nc->base.dev, "Waiting NAND R/B Timeout\n");
+		ret = -ETIMEDOUT;
+	}
+
+	if (nc->op.errors & ATMEL_HSMC_NFC_SR_UNDEF) {
+		dev_err(nc->base.dev, "Access to an undefined area\n");
+		ret = -EIO;
+	}
+
+	if (nc->op.errors & ATMEL_HSMC_NFC_SR_AWB) {
+		dev_err(nc->base.dev, "Access while busy\n");
+		ret = -EIO;
+	}
+
+	if (nc->op.errors & ATMEL_HSMC_NFC_SR_NFCASE) {
+		dev_err(nc->base.dev, "Wrong access size\n");
+		ret = -EIO;
+	}
+
+	return ret;
+}
+
+static void atmel_nand_dma_transfer_finished(void *data)
+{
+	struct completion *finished = data;
+
+	complete(finished);
+}
+
+static int atmel_nand_dma_transfer(struct atmel_nand_controller *nc,
+				   void *buf, dma_addr_t dev_dma, size_t len,
+				   enum dma_data_direction dir)
+{
+	DECLARE_COMPLETION_ONSTACK(finished);
+	dma_addr_t src_dma, dst_dma, buf_dma;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+
+	buf_dma = dma_map_single(nc->dev, buf, len, dir);
+	if (dma_mapping_error(nc->dev, dev_dma)) {
+		dev_err(nc->dev,
+			"Failed to prepare a buffer for DMA access\n");
+		goto err;
+	}
+
+	if (dir == DMA_FROM_DEVICE) {
+		src_dma = dev_dma;
+		dst_dma = buf_dma;
+	} else {
+		src_dma = buf_dma;
+		dst_dma = dev_dma;
+	}
+
+	tx = dmaengine_prep_dma_memcpy(nc->dmac, dst_dma, src_dma, len,
+				       DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(nc->dev, "Failed to prepare DMA memcpy\n");
+		goto err_unmap;
+	}
+
+	tx->callback = atmel_nand_dma_transfer_finished;
+	tx->callback_param = &finished;
+
+	cookie = dmaengine_submit(tx);
+	if (dma_submit_error(cookie)) {
+		dev_err(nc->dev, "Failed to do DMA tx_submit\n");
+		goto err_unmap;
+	}
+
+	dma_async_issue_pending(nc->dmac);
+	wait_for_completion(&finished);
+
+	return 0;
+
+err_unmap:
+	dma_unmap_single(nc->dev, buf_dma, len, dir);
+
+err:
+	dev_dbg(nc->dev, "Fall back to CPU I/O\n");
+
+	return -EIO;
+}
+
+static u8 atmel_nand_read_byte(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	return ioread8(nand->activecs->io.virt);
+}
+
+static u16 atmel_nand_read_word(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	return ioread16(nand->activecs->io.virt);
+}
+
+static void atmel_nand_write_byte(struct mtd_info *mtd, u8 byte)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	if (chip->options & NAND_BUSWIDTH_16)
+		iowrite16(byte | (byte << 8), nand->activecs->io.virt);
+	else
+		iowrite8(byte, nand->activecs->io.virt);
+}
+
+static void atmel_nand_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+
+	nc = to_nand_controller(chip->controller);
+
+	/*
+	 * If the controller supports DMA, the buffer address is DMA-able and
+	 * len is long enough to make DMA transfers profitable, let's trigger
+	 * a DMA transfer. If it fails, fallback to PIO mode.
+	 */
+	if (nc->dmac && virt_addr_valid(buf) &&
+	    len >= MIN_DMA_LEN &&
+	    !atmel_nand_dma_transfer(nc, buf, nand->activecs->io.dma, len,
+				     DMA_FROM_DEVICE))
+		return;
+
+	if (chip->options & NAND_BUSWIDTH_16)
+		ioread16_rep(nand->activecs->io.virt, buf, len / 2);
+	else
+		ioread8_rep(nand->activecs->io.virt, buf, len);
+}
+
+static void atmel_nand_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+
+	nc = to_nand_controller(chip->controller);
+
+	/*
+	 * If the controller supports DMA, the buffer address is DMA-able and
+	 * len is long enough to make DMA transfers profitable, let's trigger
+	 * a DMA transfer. If it fails, fallback to PIO mode.
+	 */
+	if (nc->dmac && virt_addr_valid(buf) &&
+	    len >= MIN_DMA_LEN &&
+	    !atmel_nand_dma_transfer(nc, (void *)buf, nand->activecs->io.dma,
+				     len, DMA_TO_DEVICE))
+		return;
+
+	if (chip->options & NAND_BUSWIDTH_16)
+		iowrite16_rep(nand->activecs->io.virt, buf, len / 2);
+	else
+		iowrite8_rep(nand->activecs->io.virt, buf, len);
+}
+
+static int atmel_nand_dev_ready(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	return gpiod_get_value(nand->activecs->rb.gpio);
+}
+
+static void atmel_nand_select_chip(struct mtd_info *mtd, int cs)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	if (cs < 0 || cs >= nand->numcs) {
+		nand->activecs = NULL;
+		chip->dev_ready = NULL;
+		return;
+	}
+
+	nand->activecs = &nand->cs[cs];
+
+	if (nand->activecs->rb.type == ATMEL_NAND_GPIO_RB)
+		chip->dev_ready = atmel_nand_dev_ready;
+}
+
+static int atmel_hsmc_nand_dev_ready(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_hsmc_nand_controller *nc;
+	u32 status;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	regmap_read(nc->base.smc, ATMEL_HSMC_NFC_SR, &status);
+
+	return status & ATMEL_HSMC_NFC_SR_RBEDGE(nand->activecs->rb.id);
+}
+
+static void atmel_hsmc_nand_select_chip(struct mtd_info *mtd, int cs)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_hsmc_nand_controller *nc;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	atmel_nand_select_chip(mtd, cs);
+
+	if (!nand->activecs) {
+		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL,
+			     ATMEL_HSMC_NFC_CTRL_DIS);
+		return;
+	}
+
+	if (nand->activecs->rb.type == ATMEL_NAND_NATIVE_RB)
+		chip->dev_ready = atmel_hsmc_nand_dev_ready;
+
+	regmap_update_bits(nc->base.smc, ATMEL_HSMC_NFC_CFG,
+			   ATMEL_HSMC_NFC_CFG_PAGESIZE_MASK |
+			   ATMEL_HSMC_NFC_CFG_SPARESIZE_MASK |
+			   ATMEL_HSMC_NFC_CFG_RSPARE |
+			   ATMEL_HSMC_NFC_CFG_WSPARE,
+			   ATMEL_HSMC_NFC_CFG_PAGESIZE(mtd->writesize) |
+			   ATMEL_HSMC_NFC_CFG_SPARESIZE(mtd->oobsize) |
+			   ATMEL_HSMC_NFC_CFG_RSPARE);
+	regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL,
+		     ATMEL_HSMC_NFC_CTRL_EN);
+}
+
+static int atmel_nfc_exec_op(struct atmel_hsmc_nand_controller *nc, bool poll)
+{
+	u8 *addrs = nc->op.addrs;
+	unsigned int op = 0;
+	u32 addr, val;
+	int i, ret;
+
+	nc->op.wait = ATMEL_HSMC_NFC_SR_CMDDONE;
+
+	for (i = 0; i < nc->op.ncmds; i++)
+		op |= ATMEL_NFC_CMD(i, nc->op.cmds[i]);
+
+	if (nc->op.naddrs == ATMEL_NFC_MAX_ADDR_CYCLES)
+		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_ADDR, *addrs++);
+
+	op |= ATMEL_NFC_CSID(nc->op.cs) |
+	      ATMEL_NFC_ACYCLE(nc->op.naddrs);
+
+	if (nc->op.ncmds > 1)
+		op |= ATMEL_NFC_VCMD2;
+
+	addr = addrs[0] | (addrs[1] << 8) | (addrs[2] << 16) |
+	       (addrs[3] << 24);
+
+	if (nc->op.data != ATMEL_NFC_NO_DATA) {
+		op |= ATMEL_NFC_DATAEN;
+		nc->op.wait |= ATMEL_HSMC_NFC_SR_XFRDONE;
+
+		if (nc->op.data == ATMEL_NFC_WRITE_DATA)
+			op |= ATMEL_NFC_NFCWR;
+	}
+
+	/* Clear all flags. */
+	regmap_read(nc->base.smc, ATMEL_HSMC_NFC_SR, &val);
+
+	/* Send the command. */
+	regmap_write(nc->io, op, addr);
+
+	ret = atmel_nfc_wait(nc, poll, 0);
+	if (ret)
+		dev_err(nc->base.dev,
+			"Failed to send NAND command (err = %d)!",
+			ret);
+
+	/* Reset the op state. */
+	memset(&nc->op, 0, sizeof(nc->op));
+
+	return ret;
+}
+
+static void atmel_hsmc_nand_cmd_ctrl(struct mtd_info *mtd, int dat,
+				     unsigned int ctrl)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_hsmc_nand_controller *nc;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	if (ctrl & NAND_ALE) {
+		if (nc->op.naddrs == ATMEL_NFC_MAX_ADDR_CYCLES)
+			return;
+
+		nc->op.addrs[nc->op.naddrs++] = dat;
+	} else if (ctrl & NAND_CLE) {
+		if (nc->op.ncmds > 1)
+			return;
+
+		nc->op.cmds[nc->op.ncmds++] = dat;
+	}
+
+	if (dat == NAND_CMD_NONE) {
+		nc->op.cs = nand->activecs->id;
+		atmel_nfc_exec_op(nc, true);
+	}
+}
+
+static void atmel_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+				unsigned int ctrl)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+
+	nc = to_nand_controller(chip->controller);
+
+	if ((ctrl & NAND_CTRL_CHANGE) && nand->activecs->csgpio) {
+		if (ctrl & NAND_NCE)
+			gpiod_set_value(nand->activecs->csgpio, 0);
+		else
+			gpiod_set_value(nand->activecs->csgpio, 1);
+	}
+
+	if (ctrl & NAND_ALE)
+		writeb(cmd, nand->activecs->io.virt + nc->caps->ale_offs);
+	else if (ctrl & NAND_CLE)
+		writeb(cmd, nand->activecs->io.virt + nc->caps->cle_offs);
+}
+
+static void atmel_nfc_copy_to_sram(struct nand_chip *chip, const u8 *buf,
+				   bool oob_required)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_hsmc_nand_controller *nc;
+	int ret = -EIO;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	if (nc->base.dmac)
+		ret = atmel_nand_dma_transfer(&nc->base, (void *)buf,
+					      nc->sram.dma, mtd->writesize,
+					      DMA_TO_DEVICE);
+
+	/* Falling back to CPU copy. */
+	if (ret)
+		memcpy_toio(nc->sram.virt, buf, mtd->writesize);
+
+	if (oob_required)
+		memcpy_toio(nc->sram.virt + mtd->writesize, chip->oob_poi,
+			    mtd->oobsize);
+}
+
+static void atmel_nfc_copy_from_sram(struct nand_chip *chip, u8 *buf,
+				     bool oob_required)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_hsmc_nand_controller *nc;
+	int ret = -EIO;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	if (nc->base.dmac)
+		ret = atmel_nand_dma_transfer(&nc->base, buf, nc->sram.dma,
+					      mtd->writesize, DMA_FROM_DEVICE);
+
+	/* Falling back to CPU copy. */
+	if (ret)
+		memcpy_fromio(buf, nc->sram.virt, mtd->writesize);
+
+	if (oob_required)
+		memcpy_fromio(chip->oob_poi, nc->sram.virt + mtd->writesize,
+			      mtd->oobsize);
+}
+
+static void atmel_nfc_set_op_addr(struct nand_chip *chip, int page, int column)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_hsmc_nand_controller *nc;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	if (column >= 0) {
+		nc->op.addrs[nc->op.naddrs++] = column;
+
+		/*
+		 * 2 address cycles for the column offset on large page NANDs.
+		 */
+		if (mtd->writesize > 512)
+			nc->op.addrs[nc->op.naddrs++] = column >> 8;
+	}
+
+	if (page >= 0) {
+		nc->op.addrs[nc->op.naddrs++] = page;
+		nc->op.addrs[nc->op.naddrs++] = page >> 8;
+
+		if ((mtd->writesize > 512 && chip->chipsize > SZ_128M) ||
+		    (mtd->writesize <= 512 && chip->chipsize > SZ_32M))
+			nc->op.addrs[nc->op.naddrs++] = page >> 16;
+	}
+}
+
+static int atmel_nand_pmecc_enable(struct nand_chip *chip, int op, bool raw)
+{
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+	int ret;
+
+	nc = to_nand_controller(chip->controller);
+
+	if (raw)
+		return 0;
+
+	ret = atmel_pmecc_enable(nand->pmecc, op);
+	if (ret)
+		dev_err(nc->dev,
+			"Failed to enable ECC engine (err = %d)\n", ret);
+
+	return ret;
+}
+
+static void atmel_nand_pmecc_disable(struct nand_chip *chip, bool raw)
+{
+	struct atmel_nand *nand = to_atmel_nand(chip);
+
+	if (!raw)
+		atmel_pmecc_disable(nand->pmecc);
+}
+
+static int atmel_nand_pmecc_generate_eccbytes(struct nand_chip *chip, bool raw)
+{
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand_controller *nc;
+	struct mtd_oob_region oobregion;
+	void *eccbuf;
+	int ret, i;
+
+	nc = to_nand_controller(chip->controller);
+
+	if (raw)
+		return 0;
+
+	ret = atmel_pmecc_wait_rdy(nand->pmecc);
+	if (ret) {
+		dev_err(nc->dev,
+			"Failed to transfer NAND page data (err = %d)\n",
+			ret);
+		return ret;
+	}
+
+	mtd_ooblayout_ecc(mtd, 0, &oobregion);
+	eccbuf = chip->oob_poi + oobregion.offset;
+
+	for (i = 0; i < chip->ecc.steps; i++) {
+		atmel_pmecc_get_generated_eccbytes(nand->pmecc, i,
+						   eccbuf);
+		eccbuf += chip->ecc.bytes;
+	}
+
+	return 0;
+}
+
+static int atmel_nand_pmecc_correct_data(struct nand_chip *chip, void *buf,
+					 bool raw)
+{
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand_controller *nc;
+	struct mtd_oob_region oobregion;
+	int ret, i, max_bitflips = 0;
+	void *databuf, *eccbuf;
+
+	nc = to_nand_controller(chip->controller);
+
+	if (raw)
+		return 0;
+
+	ret = atmel_pmecc_wait_rdy(nand->pmecc);
+	if (ret) {
+		dev_err(nc->dev,
+			"Failed to read NAND page data (err = %d)\n",
+			ret);
+		return ret;
+	}
+
+	mtd_ooblayout_ecc(mtd, 0, &oobregion);
+	eccbuf = chip->oob_poi + oobregion.offset;
+	databuf = buf;
+
+	for (i = 0; i < chip->ecc.steps; i++) {
+		ret = atmel_pmecc_correct_sector(nand->pmecc, i, databuf,
+						 eccbuf);
+		if (ret < 0 && !atmel_pmecc_correct_erased_chunks(nand->pmecc))
+			ret = nand_check_erased_ecc_chunk(databuf,
+							  chip->ecc.size,
+							  eccbuf,
+							  chip->ecc.bytes,
+							  NULL, 0,
+							  chip->ecc.strength);
+
+		if (ret >= 0)
+			max_bitflips = max(ret, max_bitflips);
+		else
+			mtd->ecc_stats.failed++;
+
+		databuf += chip->ecc.size;
+		eccbuf += chip->ecc.bytes;
+	}
+
+	return max_bitflips;
+}
+
+static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf,
+				     bool oob_required, int page, bool raw)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	int ret;
+
+	ret = atmel_nand_pmecc_enable(chip, NAND_ECC_WRITE, raw);
+	if (ret)
+		return ret;
+
+	atmel_nand_write_buf(mtd, buf, mtd->writesize);
+
+	ret = atmel_nand_pmecc_generate_eccbytes(chip, raw);
+	if (ret) {
+		atmel_pmecc_disable(nand->pmecc);
+		return ret;
+	}
+
+	atmel_nand_pmecc_disable(chip, raw);
+
+	atmel_nand_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	return 0;
+}
+
+static int atmel_nand_pmecc_write_page(struct mtd_info *mtd,
+				       struct nand_chip *chip, const u8 *buf,
+				       int oob_required, int page)
+{
+	return atmel_nand_pmecc_write_pg(chip, buf, oob_required, page, false);
+}
+
+static int atmel_nand_pmecc_write_page_raw(struct mtd_info *mtd,
+					   struct nand_chip *chip,
+					   const u8 *buf, int oob_required,
+					   int page)
+{
+	return atmel_nand_pmecc_write_pg(chip, buf, oob_required, page, true);
+}
+
+static int atmel_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf,
+				    bool oob_required, int page, bool raw)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	int ret;
+
+	ret = atmel_nand_pmecc_enable(chip, NAND_ECC_READ, raw);
+	if (ret)
+		return ret;
+
+	atmel_nand_read_buf(mtd, buf, mtd->writesize);
+	atmel_nand_read_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	ret = atmel_nand_pmecc_correct_data(chip, buf, raw);
+
+	atmel_nand_pmecc_disable(chip, raw);
+
+	return ret;
+}
+
+static int atmel_nand_pmecc_read_page(struct mtd_info *mtd,
+				      struct nand_chip *chip, u8 *buf,
+				      int oob_required, int page)
+{
+	return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page, false);
+}
+
+static int atmel_nand_pmecc_read_page_raw(struct mtd_info *mtd,
+					  struct nand_chip *chip, u8 *buf,
+					  int oob_required, int page)
+{
+	return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page, true);
+}
+
+static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
+					  const u8 *buf, bool oob_required,
+					  int page, bool raw)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_hsmc_nand_controller *nc;
+	int ret;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	atmel_nfc_copy_to_sram(chip, buf, false);
+
+	nc->op.cmds[0] = NAND_CMD_SEQIN;
+	nc->op.ncmds = 1;
+	atmel_nfc_set_op_addr(chip, page, 0x0);
+	nc->op.cs = nand->activecs->id;
+	nc->op.data = ATMEL_NFC_WRITE_DATA;
+
+	ret = atmel_nand_pmecc_enable(chip, NAND_ECC_WRITE, raw);
+	if (ret)
+		return ret;
+
+	ret = atmel_nfc_exec_op(nc, false);
+	if (ret) {
+		atmel_nand_pmecc_disable(chip, raw);
+		dev_err(nc->base.dev,
+			"Failed to transfer NAND page data (err = %d)\n",
+			ret);
+		return ret;
+	}
+
+	ret = atmel_nand_pmecc_generate_eccbytes(chip, raw);
+
+	atmel_nand_pmecc_disable(chip, raw);
+
+	if (ret)
+		return ret;
+
+	atmel_nand_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	nc->op.cmds[0] = NAND_CMD_PAGEPROG;
+	nc->op.ncmds = 1;
+	nc->op.cs = nand->activecs->id;
+	ret = atmel_nfc_exec_op(nc, false);
+	if (ret)
+		dev_err(nc->base.dev, "Failed to program NAND page (err = %d)\n",
+			ret);
+
+	return ret;
+}
+
+static int atmel_hsmc_nand_pmecc_write_page(struct mtd_info *mtd,
+					    struct nand_chip *chip,
+					    const u8 *buf, int oob_required,
+					    int page)
+{
+	return atmel_hsmc_nand_pmecc_write_pg(chip, buf, oob_required, page,
+					      false);
+}
+
+static int atmel_hsmc_nand_pmecc_write_page_raw(struct mtd_info *mtd,
+						struct nand_chip *chip,
+						const u8 *buf,
+						int oob_required, int page)
+{
+	return atmel_hsmc_nand_pmecc_write_pg(chip, buf, oob_required, page,
+					      true);
+}
+
+static int atmel_hsmc_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf,
+					 bool oob_required, int page,
+					 bool raw)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_hsmc_nand_controller *nc;
+	int ret;
+
+	nc = to_hsmc_nand_controller(chip->controller);
+
+	/*
+	 * Optimized read page accessors only work when the NAND R/B pin is
+	 * connected to a native SoC R/B pin. If that's not the case, fallback
+	 * to the non-optimized one.
+	 */
+	if (nand->activecs->rb.type != ATMEL_NAND_NATIVE_RB) {
+		chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+
+		return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page,
+						raw);
+	}
+
+	nc->op.cmds[nc->op.ncmds++] = NAND_CMD_READ0;
+
+	if (mtd->writesize > 512)
+		nc->op.cmds[nc->op.ncmds++] = NAND_CMD_READSTART;
+
+	atmel_nfc_set_op_addr(chip, page, 0x0);
+	nc->op.cs = nand->activecs->id;
+	nc->op.data = ATMEL_NFC_READ_DATA;
+
+	ret = atmel_nand_pmecc_enable(chip, NAND_ECC_READ, raw);
+	if (ret)
+		return ret;
+
+	ret = atmel_nfc_exec_op(nc, false);
+	if (ret) {
+		atmel_nand_pmecc_disable(chip, raw);
+		dev_err(nc->base.dev,
+			"Failed to load NAND page data (err = %d)\n",
+			ret);
+		return ret;
+	}
+
+	atmel_nfc_copy_from_sram(chip, buf, true);
+
+	ret = atmel_nand_pmecc_correct_data(chip, buf, raw);
+
+	atmel_nand_pmecc_disable(chip, raw);
+
+	return ret;
+}
+
+static int atmel_hsmc_nand_pmecc_read_page(struct mtd_info *mtd,
+					   struct nand_chip *chip, u8 *buf,
+					   int oob_required, int page)
+{
+	return atmel_hsmc_nand_pmecc_read_pg(chip, buf, oob_required, page,
+					     false);
+}
+
+static int atmel_hsmc_nand_pmecc_read_page_raw(struct mtd_info *mtd,
+					       struct nand_chip *chip,
+					       u8 *buf, int oob_required,
+					       int page)
+{
+	return atmel_hsmc_nand_pmecc_read_pg(chip, buf, oob_required, page,
+					     true);
+}
+
+static int atmel_nand_pmecc_init(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+	struct atmel_pmecc_user_req req;
+
+	nc = to_nand_controller(chip->controller);
+
+	if (!nc->pmecc) {
+		dev_err(nc->dev, "HW ECC not supported\n");
+		return -ENOTSUPP;
+	}
+
+	if (nc->caps->legacy_of_bindings) {
+		u32 val;
+
+		if (!of_property_read_u32(nc->dev->of_node, "atmel,pmecc-cap",
+					  &val))
+			chip->ecc.strength = val;
+
+		if (!of_property_read_u32(nc->dev->of_node,
+					  "atmel,pmecc-sector-size",
+					  &val))
+			chip->ecc.size = val;
+	}
+
+	if (chip->ecc.options & NAND_ECC_MAXIMIZE)
+		req.ecc.strength = ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH;
+	else if (chip->ecc.strength)
+		req.ecc.strength = chip->ecc.strength;
+	else if (chip->ecc_strength_ds)
+		req.ecc.strength = chip->ecc_strength_ds;
+	else
+		req.ecc.strength = ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH;
+
+	if (chip->ecc.size)
+		req.ecc.sectorsize = chip->ecc.size;
+	else if (chip->ecc_step_ds)
+		req.ecc.sectorsize = chip->ecc_step_ds;
+	else
+		req.ecc.sectorsize = ATMEL_PMECC_SECTOR_SIZE_AUTO;
+
+	req.pagesize = mtd->writesize;
+	req.oobsize = mtd->oobsize;
+
+	if (mtd->writesize <= 512) {
+		req.ecc.bytes = 4;
+		req.ecc.ooboffset = 0;
+	} else {
+		req.ecc.bytes = mtd->oobsize - 2;
+		req.ecc.ooboffset = ATMEL_PMECC_OOBOFFSET_AUTO;
+	}
+
+	nand->pmecc = atmel_pmecc_create_user(nc->pmecc, &req);
+	if (IS_ERR(nand->pmecc))
+		return PTR_ERR(nand->pmecc);
+
+	chip->ecc.algo = NAND_ECC_BCH;
+	chip->ecc.size = req.ecc.sectorsize;
+	chip->ecc.bytes = req.ecc.bytes / req.ecc.nsectors;
+	chip->ecc.strength = req.ecc.strength;
+
+	chip->options |= NAND_NO_SUBPAGE_WRITE;
+
+	mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
+
+	return 0;
+}
+
+static int atmel_nand_ecc_init(struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct atmel_nand_controller *nc;
+	int ret;
+
+	nc = to_nand_controller(chip->controller);
+
+	switch (chip->ecc.mode) {
+	case NAND_ECC_NONE:
+	case NAND_ECC_SOFT:
+		/*
+		 * Nothing to do, the core will initialize everything for us.
+		 */
+		break;
+
+	case NAND_ECC_HW:
+		ret = atmel_nand_pmecc_init(chip);
+		if (ret)
+			return ret;
+
+		chip->ecc.read_page = atmel_nand_pmecc_read_page;
+		chip->ecc.write_page = atmel_nand_pmecc_write_page;
+		chip->ecc.read_page_raw = atmel_nand_pmecc_read_page_raw;
+		chip->ecc.write_page_raw = atmel_nand_pmecc_write_page_raw;
+		break;
+
+	default:
+		/* Other modes are not supported. */
+		dev_err(nc->dev, "Unsupported ECC mode: %d\n",
+			chip->ecc.mode);
+		return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
+static int atmel_hsmc_nand_ecc_init(struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	int ret;
+
+	ret = atmel_nand_ecc_init(nand);
+	if (ret)
+		return ret;
+
+	if (chip->ecc.mode != NAND_ECC_HW)
+		return 0;
+
+	/* Adjust the ECC operations for the HSMC IP. */
+	chip->ecc.read_page = atmel_hsmc_nand_pmecc_read_page;
+	chip->ecc.write_page = atmel_hsmc_nand_pmecc_write_page;
+	chip->ecc.read_page_raw = atmel_hsmc_nand_pmecc_read_page_raw;
+	chip->ecc.write_page_raw = atmel_hsmc_nand_pmecc_write_page_raw;
+	chip->ecc.options |= NAND_ECC_CUSTOM_PAGE_ACCESS;
+
+	return 0;
+}
+
+static void atmel_nand_init(struct atmel_nand_controller *nc,
+			    struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	mtd->dev.parent = nc->dev;
+	nand->base.controller = &nc->base;
+
+	chip->cmd_ctrl = atmel_nand_cmd_ctrl;
+	chip->read_byte = atmel_nand_read_byte;
+	chip->read_word = atmel_nand_read_word;
+	chip->write_byte = atmel_nand_write_byte;
+	chip->read_buf = atmel_nand_read_buf;
+	chip->write_buf = atmel_nand_write_buf;
+	chip->select_chip = atmel_nand_select_chip;
+
+	/* Some NANDs require a longer delay than the default one (20us). */
+	chip->chip_delay = 40;
+
+	/*
+	 * Use a bounce buffer when the buffer passed by the MTD user is not
+	 * suitable for DMA.
+	 */
+	if (nc->dmac)
+		chip->options |= NAND_USE_BOUNCE_BUFFER;
+
+	/* Default to HW ECC if pmecc is available. */
+	if (nc->pmecc)
+		chip->ecc.mode = NAND_ECC_HW;
+}
+
+static void atmel_smc_nand_init(struct atmel_nand_controller *nc,
+				struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct atmel_smc_nand_controller *smc_nc;
+	int i;
+
+	atmel_nand_init(nc, nand);
+
+	smc_nc = to_smc_nand_controller(chip->controller);
+	if (!smc_nc->matrix)
+		return;
+
+	/* Attach the CS to the NAND Flash logic. */
+	for (i = 0; i < nand->numcs; i++)
+		regmap_update_bits(smc_nc->matrix, smc_nc->ebi_csa_offs,
+				   BIT(nand->cs[i].id), BIT(nand->cs[i].id));
+}
+
+static void atmel_hsmc_nand_init(struct atmel_nand_controller *nc,
+				 struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+
+	atmel_nand_init(nc, nand);
+
+	/* Overload some methods for the HSMC controller. */
+	chip->cmd_ctrl = atmel_hsmc_nand_cmd_ctrl;
+	chip->select_chip = atmel_hsmc_nand_select_chip;
+}
+
+static int atmel_nand_detect(struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand_controller *nc;
+	int ret;
+
+	nc = to_nand_controller(chip->controller);
+
+	ret = nand_scan_ident(mtd, nand->numcs, NULL);
+	if (ret)
+		dev_err(nc->dev, "nand_scan_ident() failed: %d\n", ret);
+
+	return ret;
+}
+
+static int atmel_nand_unregister(struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	int ret;
+
+	ret = mtd_device_unregister(mtd);
+	if (ret)
+		return ret;
+
+	nand_cleanup(chip);
+	list_del(&nand->node);
+
+	return 0;
+}
+
+static int atmel_nand_register(struct atmel_nand *nand)
+{
+	struct nand_chip *chip = &nand->base;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct atmel_nand_controller *nc;
+	int ret;
+
+	nc = to_nand_controller(chip->controller);
+
+	if (nc->caps->legacy_of_bindings || !nc->dev->of_node) {
+		/*
+		 * We keep the MTD name unchanged to avoid breaking platforms
+		 * where the MTD cmdline parser is used and the bootloader
+		 * has not been updated to use the new naming scheme.
+		 */
+		mtd->name = "atmel_nand";
+	} else if (!mtd->name) {
+		/*
+		 * If the new bindings are used and the bootloader has not been
+		 * updated to pass a new mtdparts parameter on the cmdline, you
+		 * should define the following property in your nand node:
+		 *
+		 *	label = "atmel_nand";
+		 *
+		 * This way, mtd->name will be set by the core when
+		 * nand_set_flash_node() is called.
+		 */
+		mtd->name = devm_kasprintf(nc->dev, GFP_KERNEL,
+					   "%s:nand.%d", dev_name(nc->dev),
+					   nand->cs[0].id);
+		if (!mtd->name) {
+			dev_err(nc->dev, "Failed to allocate mtd->name\n");
+			return -ENOMEM;
+		}
+	}
+
+	ret = nand_scan_tail(mtd);
+	if (ret) {
+		dev_err(nc->dev, "nand_scan_tail() failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = mtd_device_register(mtd, NULL, 0);
+	if (ret) {
+		dev_err(nc->dev, "Failed to register mtd device: %d\n", ret);
+		nand_cleanup(chip);
+		return ret;
+	}
+
+	list_add_tail(&nand->node, &nc->chips);
+
+	return 0;
+}
+
+static struct atmel_nand *atmel_nand_create(struct atmel_nand_controller *nc,
+					    struct device_node *np,
+					    int reg_cells)
+{
+	struct atmel_nand *nand;
+	struct gpio_desc *gpio;
+	int numcs, ret, i;
+
+	numcs = of_property_count_elems_of_size(np, "reg",
+						reg_cells * sizeof(u32));
+	if (numcs < 1) {
+		dev_err(nc->dev, "Missing or invalid reg property\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	nand = devm_kzalloc(nc->dev,
+			    sizeof(*nand) + (numcs * sizeof(*nand->cs)),
+			    GFP_KERNEL);
+	if (!nand) {
+		dev_err(nc->dev, "Failed to allocate NAND object\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	nand->numcs = numcs;
+
+	gpio = devm_fwnode_get_index_gpiod_from_child(nc->dev, "det", 0,
+						      &np->fwnode, GPIOD_IN,
+						      "nand-det");
+	if (IS_ERR(gpio) && PTR_ERR(gpio) != -ENOENT) {
+		dev_err(nc->dev,
+			"Failed to get detect gpio (err = %ld)\n",
+			PTR_ERR(gpio));
+		return ERR_CAST(gpio);
+	}
+
+	if (!IS_ERR(gpio))
+		nand->cdgpio = gpio;
+
+	for (i = 0; i < numcs; i++) {
+		struct resource res;
+		u32 val;
+
+		ret = of_address_to_resource(np, 0, &res);
+		if (ret) {
+			dev_err(nc->dev, "Invalid reg property (err = %d)\n",
+				ret);
+			return ERR_PTR(ret);
+		}
+
+		ret = of_property_read_u32_index(np, "reg", i * reg_cells,
+						 &val);
+		if (ret) {
+			dev_err(nc->dev, "Invalid reg property (err = %d)\n",
+				ret);
+			return ERR_PTR(ret);
+		}
+
+		nand->cs[i].id = val;
+
+		nand->cs[i].io.dma = res.start;
+		nand->cs[i].io.virt = devm_ioremap_resource(nc->dev, &res);
+		if (IS_ERR(nand->cs[i].io.virt))
+			return ERR_CAST(nand->cs[i].io.virt);
+
+		if (!of_property_read_u32(np, "atmel,rb", &val)) {
+			if (val > ATMEL_NFC_MAX_RB_ID)
+				return ERR_PTR(-EINVAL);
+
+			nand->cs[i].rb.type = ATMEL_NAND_NATIVE_RB;
+			nand->cs[i].rb.id = val;
+		} else {
+			gpio = devm_fwnode_get_index_gpiod_from_child(nc->dev,
+							"rb", i, &np->fwnode,
+							GPIOD_IN, "nand-rb");
+			if (IS_ERR(gpio) && PTR_ERR(gpio) != -ENOENT) {
+				dev_err(nc->dev,
+					"Failed to get R/B gpio (err = %ld)\n",
+					PTR_ERR(gpio));
+				return ERR_CAST(gpio);
+			}
+
+			if (!IS_ERR(gpio)) {
+				nand->cs[i].rb.type = ATMEL_NAND_GPIO_RB;
+				nand->cs[i].rb.gpio = gpio;
+			}
+		}
+
+		gpio = devm_fwnode_get_index_gpiod_from_child(nc->dev, "cs",
+							      i, &np->fwnode,
+							      GPIOD_OUT_HIGH,
+							      "nand-cs");
+		if (IS_ERR(gpio) && PTR_ERR(gpio) != -ENOENT) {
+			dev_err(nc->dev,
+				"Failed to get CS gpio (err = %ld)\n",
+				PTR_ERR(gpio));
+			return ERR_CAST(gpio);
+		}
+
+		if (!IS_ERR(gpio))
+			nand->cs[i].csgpio = gpio;
+	}
+
+	nand_set_flash_node(&nand->base, np);
+
+	return nand;
+}
+
+static int
+atmel_nand_controller_add_nand(struct atmel_nand_controller *nc,
+			       struct atmel_nand *nand)
+{
+	int ret;
+
+	/* No card inserted, skip this NAND. */
+	if (nand->cdgpio && gpiod_get_value(nand->cdgpio)) {
+		dev_info(nc->dev, "No SmartMedia card inserted.\n");
+		return 0;
+	}
+
+	nc->caps->ops->nand_init(nc, nand);
+
+	ret = atmel_nand_detect(nand);
+	if (ret)
+		return ret;
+
+	ret = nc->caps->ops->ecc_init(nand);
+	if (ret)
+		return ret;
+
+	return atmel_nand_register(nand);
+}
+
+static int
+atmel_nand_controller_remove_nands(struct atmel_nand_controller *nc)
+{
+	struct atmel_nand *nand, *tmp;
+	int ret;
+
+	list_for_each_entry_safe(nand, tmp, &nc->chips, node) {
+		ret = atmel_nand_unregister(nand);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int
+atmel_nand_controller_legacy_add_nands(struct atmel_nand_controller *nc)
+{
+	struct device *dev = nc->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct atmel_nand *nand;
+	struct gpio_desc *gpio;
+	struct resource *res;
+
+	/*
+	 * Legacy bindings only allow connecting a single NAND with a unique CS
+	 * line to the controller.
+	 */
+	nand = devm_kzalloc(nc->dev, sizeof(*nand) + sizeof(*nand->cs),
+			    GFP_KERNEL);
+	if (!nand)
+		return -ENOMEM;
+
+	nand->numcs = 1;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nand->cs[0].io.virt = devm_ioremap_resource(dev, res);
+	if (IS_ERR(nand->cs[0].io.virt))
+		return PTR_ERR(nand->cs[0].io.virt);
+
+	nand->cs[0].io.dma = res->start;
+
+	/*
+	 * The old driver was hardcoding the CS id to 3 for all sama5
+	 * controllers. Since this id is only meaningful for the sama5
+	 * controller we can safely assign this id to 3 no matter the
+	 * controller.
+	 * If one wants to connect a NAND to a different CS line, he will
+	 * have to use the new bindings.
+	 */
+	nand->cs[0].id = 3;
+
+	/* R/B GPIO. */
+	gpio = devm_gpiod_get_index_optional(dev, NULL, 0,  GPIOD_IN);
+	if (IS_ERR(gpio)) {
+		dev_err(dev, "Failed to get R/B gpio (err = %ld)\n",
+			PTR_ERR(gpio));
+		return PTR_ERR(gpio);
+	}
+
+	if (gpio) {
+		nand->cs[0].rb.type = ATMEL_NAND_GPIO_RB;
+		nand->cs[0].rb.gpio = gpio;
+	}
+
+	/* CS GPIO. */
+	gpio = devm_gpiod_get_index_optional(dev, NULL, 1, GPIOD_OUT_HIGH);
+	if (IS_ERR(gpio)) {
+		dev_err(dev, "Failed to get CS gpio (err = %ld)\n",
+			PTR_ERR(gpio));
+		return PTR_ERR(gpio);
+	}
+
+	nand->cs[0].csgpio = gpio;
+
+	/* Card detect GPIO. */
+	gpio = devm_gpiod_get_index_optional(nc->dev, NULL, 2, GPIOD_IN);
+	if (IS_ERR(gpio)) {
+		dev_err(dev,
+			"Failed to get detect gpio (err = %ld)\n",
+			PTR_ERR(gpio));
+		return PTR_ERR(gpio);
+	}
+
+	nand->cdgpio = gpio;
+
+	nand_set_flash_node(&nand->base, nc->dev->of_node);
+
+	return atmel_nand_controller_add_nand(nc, nand);
+}
+
+static int atmel_nand_controller_add_nands(struct atmel_nand_controller *nc)
+{
+	struct device_node *np, *nand_np;
+	struct device *dev = nc->dev;
+	int ret, reg_cells;
+	u32 val;
+
+	/* We do not retrieve the SMC syscon when parsing old DTs. */
+	if (nc->caps->legacy_of_bindings)
+		return atmel_nand_controller_legacy_add_nands(nc);
+
+	np = dev->of_node;
+
+	ret = of_property_read_u32(np, "#address-cells", &val);
+	if (ret) {
+		dev_err(dev, "missing #address-cells property\n");
+		return ret;
+	}
+
+	reg_cells = val;
+
+	ret = of_property_read_u32(np, "#size-cells", &val);
+	if (ret) {
+		dev_err(dev, "missing #address-cells property\n");
+		return ret;
+	}
+
+	reg_cells += val;
+
+	for_each_child_of_node(np, nand_np) {
+		struct atmel_nand *nand;
+
+		nand = atmel_nand_create(nc, nand_np, reg_cells);
+		if (IS_ERR(nand)) {
+			ret = PTR_ERR(nand);
+			goto err;
+		}
+
+		ret = atmel_nand_controller_add_nand(nc, nand);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	atmel_nand_controller_remove_nands(nc);
+
+	return ret;
+}
+
+static void atmel_nand_controller_cleanup(struct atmel_nand_controller *nc)
+{
+	if (nc->dmac)
+		dma_release_channel(nc->dmac);
+
+	clk_put(nc->mck);
+}
+
+static const struct of_device_id atmel_matrix_of_ids[] = {
+	{
+		.compatible = "atmel,at91sam9260-matrix",
+		.data = (void *)AT91SAM9260_MATRIX_EBICSA,
+	},
+	{
+		.compatible = "atmel,at91sam9261-matrix",
+		.data = (void *)AT91SAM9261_MATRIX_EBICSA,
+	},
+	{
+		.compatible = "atmel,at91sam9263-matrix",
+		.data = (void *)AT91SAM9263_MATRIX_EBI0CSA,
+	},
+	{
+		.compatible = "atmel,at91sam9rl-matrix",
+		.data = (void *)AT91SAM9RL_MATRIX_EBICSA,
+	},
+	{
+		.compatible = "atmel,at91sam9g45-matrix",
+		.data = (void *)AT91SAM9G45_MATRIX_EBICSA,
+	},
+	{
+		.compatible = "atmel,at91sam9n12-matrix",
+		.data = (void *)AT91SAM9N12_MATRIX_EBICSA,
+	},
+	{
+		.compatible = "atmel,at91sam9x5-matrix",
+		.data = (void *)AT91SAM9X5_MATRIX_EBICSA,
+	},
+};
+
+static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
+				struct platform_device *pdev,
+				const struct atmel_nand_controller_caps *caps)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	nand_hw_control_init(&nc->base);
+	INIT_LIST_HEAD(&nc->chips);
+	nc->dev = dev;
+	nc->caps = caps;
+
+	platform_set_drvdata(pdev, nc);
+
+	nc->pmecc = devm_atmel_pmecc_get(dev);
+	if (IS_ERR(nc->pmecc)) {
+		ret = PTR_ERR(nc->pmecc);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Could not get PMECC object (err = %d)\n",
+				ret);
+		return ret;
+	}
+
+	if (nc->caps->has_dma) {
+		dma_cap_mask_t mask;
+
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_MEMCPY, mask);
+
+		nc->dmac = dma_request_channel(mask, NULL, NULL);
+		if (!nc->dmac)
+			dev_err(nc->dev, "Failed to request DMA channel\n");
+	}
+
+	/* We do not retrieve the SMC syscon when parsing old DTs. */
+	if (nc->caps->legacy_of_bindings)
+		return 0;
+
+	np = of_parse_phandle(dev->parent->of_node, "atmel,smc", 0);
+	if (!np) {
+		dev_err(dev, "Missing or invalid atmel,smc property\n");
+		return -EINVAL;
+	}
+
+	nc->smc = syscon_node_to_regmap(np);
+	of_node_put(np);
+	if (IS_ERR(nc->smc)) {
+		ret = IS_ERR(nc->smc);
+		dev_err(dev, "Could not get SMC regmap (err = %d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+atmel_smc_nand_controller_init(struct atmel_smc_nand_controller *nc)
+{
+	struct device *dev = nc->base.dev;
+	const struct of_device_id *match;
+	struct device_node *np;
+	int ret;
+
+	/* We do not retrieve the matrix syscon when parsing old DTs. */
+	if (nc->base.caps->legacy_of_bindings)
+		return 0;
+
+	np = of_parse_phandle(dev->parent->of_node, "atmel,matrix", 0);
+	if (!np)
+		return 0;
+
+	match = of_match_node(atmel_matrix_of_ids, np);
+	if (!match) {
+		of_node_put(np);
+		return 0;
+	}
+
+	nc->matrix = syscon_node_to_regmap(np);
+	of_node_put(np);
+	if (IS_ERR(nc->matrix)) {
+		ret = IS_ERR(nc->matrix);
+		dev_err(dev, "Could not get Matrix regmap (err = %d)\n", ret);
+		return ret;
+	}
+
+	nc->ebi_csa_offs = (unsigned int)match->data;
+
+	/*
+	 * The at91sam9263 has 2 EBIs, if the NAND controller is under EBI1
+	 * add 4 to ->ebi_csa_offs.
+	 */
+	if (of_device_is_compatible(dev->parent->of_node,
+				    "atmel,at91sam9263-ebi1"))
+		nc->ebi_csa_offs += 4;
+
+	return 0;
+}
+
+static int
+atmel_hsmc_nand_controller_legacy_init(struct atmel_hsmc_nand_controller *nc)
+{
+	struct regmap_config regmap_conf = {
+		.reg_bits = 32,
+		.val_bits = 32,
+		.reg_stride = 4,
+	};
+
+	struct device *dev = nc->base.dev;
+	struct device_node *nand_np, *nfc_np;
+	void __iomem *iomem;
+	struct resource res;
+	int ret;
+
+	nand_np = dev->of_node;
+	nfc_np = of_find_compatible_node(dev->of_node, NULL,
+					 "atmel,sama5d3-nfc");
+
+	nc->clk = of_clk_get(nfc_np, 0);
+	if (IS_ERR(nc->clk)) {
+		ret = PTR_ERR(nc->clk);
+		dev_err(dev, "Failed to retrieve HSMC clock (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	ret = clk_prepare_enable(nc->clk);
+	if (ret) {
+		dev_err(dev, "Failed to enable the HSMC clock (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	nc->irq = of_irq_get(nand_np, 0);
+	if (nc->irq < 0) {
+		ret = nc->irq;
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get IRQ number (err = %d)\n",
+				ret);
+		goto out;
+	}
+
+	ret = of_address_to_resource(nfc_np, 0, &res);
+	if (ret) {
+		dev_err(dev, "Invalid or missing NFC IO resource (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	iomem = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(iomem)) {
+		ret = PTR_ERR(iomem);
+		goto out;
+	}
+
+	regmap_conf.name = "nfc-io";
+	regmap_conf.max_register = resource_size(&res) - 4;
+	nc->io = devm_regmap_init_mmio(dev, iomem, &regmap_conf);
+	if (IS_ERR(nc->io)) {
+		ret = PTR_ERR(nc->io);
+		dev_err(dev, "Could not create NFC IO regmap (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	ret = of_address_to_resource(nfc_np, 1, &res);
+	if (ret) {
+		dev_err(dev, "Invalid or missing HSMC resource (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	iomem = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(iomem)) {
+		ret = PTR_ERR(iomem);
+		goto out;
+	}
+
+	regmap_conf.name = "smc";
+	regmap_conf.max_register = resource_size(&res) - 4;
+	nc->base.smc = devm_regmap_init_mmio(dev, iomem, &regmap_conf);
+	if (IS_ERR(nc->base.smc)) {
+		ret = PTR_ERR(nc->base.smc);
+		dev_err(dev, "Could not create NFC IO regmap (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	ret = of_address_to_resource(nfc_np, 2, &res);
+	if (ret) {
+		dev_err(dev, "Invalid or missing SRAM resource (err = %d)\n",
+			ret);
+		goto out;
+	}
+
+	nc->sram.virt = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(nc->sram.virt)) {
+		ret = PTR_ERR(nc->sram.virt);
+		goto out;
+	}
+
+	nc->sram.dma = res.start;
+
+out:
+	of_node_put(nfc_np);
+
+	return ret;
+}
+
+static int
+atmel_hsmc_nand_controller_init(struct atmel_hsmc_nand_controller *nc)
+{
+	struct device *dev = nc->base.dev;
+	struct device_node *np;
+	int ret;
+
+	np = of_parse_phandle(dev->parent->of_node, "atmel,smc", 0);
+	if (!np) {
+		dev_err(dev, "Missing or invalid atmel,smc property\n");
+		return -EINVAL;
+	}
+
+	nc->irq = of_irq_get(np, 0);
+	of_node_put(np);
+	if (nc->irq < 0) {
+		if (nc->irq != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get IRQ number (err = %d)\n",
+				nc->irq);
+		return nc->irq;
+	}
+
+	np = of_parse_phandle(dev->of_node, "atmel,nfc-io", 0);
+	if (!np) {
+		dev_err(dev, "Missing or invalid atmel,nfc-io property\n");
+		return -EINVAL;
+	}
+
+	nc->io = syscon_node_to_regmap(np);
+	of_node_put(np);
+	if (IS_ERR(nc->io)) {
+		ret = PTR_ERR(nc->io);
+		dev_err(dev, "Could not get NFC IO regmap (err = %d)\n", ret);
+		return ret;
+	}
+
+	nc->sram.pool = of_gen_pool_get(nc->base.dev->of_node,
+					 "atmel,nfc-sram", 0);
+	if (!nc->sram.pool) {
+		dev_err(nc->base.dev, "Missing SRAM\n");
+		return -ENOMEM;
+	}
+
+	nc->sram.virt = gen_pool_dma_alloc(nc->sram.pool,
+					    ATMEL_NFC_SRAM_SIZE,
+					    &nc->sram.dma);
+	if (!nc->sram.virt) {
+		dev_err(nc->base.dev,
+			"Could not allocate memory from the NFC SRAM pool\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int
+atmel_hsmc_nand_controller_remove(struct atmel_nand_controller *nc)
+{
+	struct atmel_hsmc_nand_controller *hsmc_nc;
+	int ret;
+
+	ret = atmel_nand_controller_remove_nands(nc);
+	if (ret)
+		return ret;
+
+	hsmc_nc = container_of(nc, struct atmel_hsmc_nand_controller, base);
+	if (hsmc_nc->sram.pool)
+		gen_pool_free(hsmc_nc->sram.pool,
+			      (unsigned long)hsmc_nc->sram.virt,
+			      ATMEL_NFC_SRAM_SIZE);
+
+	if (hsmc_nc->clk) {
+		clk_disable_unprepare(hsmc_nc->clk);
+		clk_put(hsmc_nc->clk);
+	}
+
+	atmel_nand_controller_cleanup(nc);
+
+	return 0;
+}
+
+static int atmel_hsmc_nand_controller_probe(struct platform_device *pdev,
+				const struct atmel_nand_controller_caps *caps)
+{
+	struct device *dev = &pdev->dev;
+	struct atmel_hsmc_nand_controller *nc;
+	int ret;
+
+	nc = devm_kzalloc(dev, sizeof(*nc), GFP_KERNEL);
+	if (!nc)
+		return -ENOMEM;
+
+	ret = atmel_nand_controller_init(&nc->base, pdev, caps);
+	if (ret)
+		return ret;
+
+	if (caps->legacy_of_bindings)
+		ret = atmel_hsmc_nand_controller_legacy_init(nc);
+	else
+		ret = atmel_hsmc_nand_controller_init(nc);
+
+	if (ret)
+		return ret;
+
+	/* Make sure all irqs are masked before registering our IRQ handler. */
+	regmap_write(nc->base.smc, ATMEL_HSMC_NFC_IDR, 0xffffffff);
+	ret = devm_request_irq(dev, nc->irq, atmel_nfc_interrupt,
+			       IRQF_SHARED, "nfc", nc);
+	if (ret) {
+		dev_err(dev,
+			"Could not get register NFC interrupt handler (err = %d)\n",
+			ret);
+		goto err;
+	}
+
+	/* Initial NFC configuration. */
+	regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CFG,
+		     ATMEL_HSMC_NFC_CFG_DTO_MAX);
+
+	ret = atmel_nand_controller_add_nands(&nc->base);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	atmel_hsmc_nand_controller_remove(&nc->base);
+
+	return ret;
+}
+
+static const struct atmel_nand_controller_ops atmel_hsmc_nc_ops = {
+	.probe = atmel_hsmc_nand_controller_probe,
+	.remove = atmel_hsmc_nand_controller_remove,
+	.ecc_init = atmel_hsmc_nand_ecc_init,
+	.nand_init = atmel_hsmc_nand_init,
+};
+
+static const struct atmel_nand_controller_caps atmel_sama5_nc_caps = {
+	.has_dma = true,
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_hsmc_nc_ops,
+};
+
+/* Only used to parse old bindings. */
+static const struct atmel_nand_controller_caps atmel_sama5_nand_caps = {
+	.has_dma = true,
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_hsmc_nc_ops,
+	.legacy_of_bindings = true,
+};
+
+static int atmel_smc_nand_controller_probe(struct platform_device *pdev,
+				const struct atmel_nand_controller_caps *caps)
+{
+	struct device *dev = &pdev->dev;
+	struct atmel_smc_nand_controller *nc;
+	int ret;
+
+	nc = devm_kzalloc(dev, sizeof(*nc), GFP_KERNEL);
+	if (!nc)
+		return -ENOMEM;
+
+	ret = atmel_nand_controller_init(&nc->base, pdev, caps);
+	if (ret)
+		return ret;
+
+	ret = atmel_smc_nand_controller_init(nc);
+	if (ret)
+		return ret;
+
+	return atmel_nand_controller_add_nands(&nc->base);
+}
+
+static int
+atmel_smc_nand_controller_remove(struct atmel_nand_controller *nc)
+{
+	int ret;
+
+	ret = atmel_nand_controller_remove_nands(nc);
+	if (ret)
+		return ret;
+
+	atmel_nand_controller_cleanup(nc);
+
+	return 0;
+}
+
+static const struct atmel_nand_controller_ops atmel_smc_nc_ops = {
+	.probe = atmel_smc_nand_controller_probe,
+	.remove = atmel_smc_nand_controller_remove,
+	.ecc_init = atmel_nand_ecc_init,
+	.nand_init = atmel_smc_nand_init,
+};
+
+static const struct atmel_nand_controller_caps atmel_rm9200_nc_caps = {
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_smc_nc_ops,
+};
+
+static const struct atmel_nand_controller_caps atmel_sam9261_nc_caps = {
+	.ale_offs = BIT(22),
+	.cle_offs = BIT(21),
+	.ops = &atmel_smc_nc_ops,
+};
+
+static const struct atmel_nand_controller_caps atmel_sam9g45_nc_caps = {
+	.has_dma = true,
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_smc_nc_ops,
+};
+
+/* Only used to parse old bindings. */
+static const struct atmel_nand_controller_caps atmel_rm9200_nand_caps = {
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_smc_nc_ops,
+	.legacy_of_bindings = true,
+};
+
+static const struct atmel_nand_controller_caps atmel_sam9261_nand_caps = {
+	.ale_offs = BIT(22),
+	.cle_offs = BIT(21),
+	.ops = &atmel_smc_nc_ops,
+	.legacy_of_bindings = true,
+};
+
+static const struct atmel_nand_controller_caps atmel_sam9g45_nand_caps = {
+	.has_dma = true,
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &atmel_smc_nc_ops,
+	.legacy_of_bindings = true,
+};
+
+static const struct of_device_id atmel_nand_controller_of_ids[] = {
+	{
+		.compatible = "atmel,at91rm9200-nand-controller",
+		.data = &atmel_rm9200_nc_caps,
+	},
+	{
+		.compatible = "atmel,at91sam9260-nand-controller",
+		.data = &atmel_rm9200_nc_caps,
+	},
+	{
+		.compatible = "atmel,at91sam9261-nand-controller",
+		.data = &atmel_sam9261_nc_caps,
+	},
+	{
+		.compatible = "atmel,at91sam9g45-nand-controller",
+		.data = &atmel_sam9g45_nc_caps,
+	},
+	{
+		.compatible = "atmel,sama5d3-nand-controller",
+		.data = &atmel_sama5_nc_caps,
+	},
+	/* Support for old/deprecated bindings: */
+	{
+		.compatible = "atmel,at91rm9200-nand",
+		.data = &atmel_rm9200_nand_caps,
+	},
+	{
+		.compatible = "atmel,sama5d4-nand",
+		.data = &atmel_rm9200_nand_caps,
+	},
+	{
+		.compatible = "atmel,sama5d2-nand",
+		.data = &atmel_rm9200_nand_caps,
+	},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, atmel_nand_controller_of_ids);
+
+static int atmel_nand_controller_probe(struct platform_device *pdev)
+{
+	const struct atmel_nand_controller_caps *caps;
+
+	if (pdev->id_entry)
+		caps = (void *)pdev->id_entry->driver_data;
+	else
+		caps = of_device_get_match_data(&pdev->dev);
+
+	if (!caps) {
+		dev_err(&pdev->dev, "Could not retrieve NFC caps\n");
+		return -EINVAL;
+	}
+
+	if (caps->legacy_of_bindings) {
+		u32 ale_offs = 21;
+
+		/*
+		 * If we are parsing legacy DT props and the DT contains a
+		 * valid NFC node, forward the request to the sama5 logic.
+		 */
+		if (of_find_compatible_node(pdev->dev.of_node, NULL,
+					    "atmel,sama5d3-nfc"))
+			caps = &atmel_sama5_nand_caps;
+
+		/*
+		 * Even if the compatible says we are dealing with an
+		 * at91rm9200 controller, the atmel,nand-has-dma specify that
+		 * this controller supports DMA, which means we are in fact
+		 * dealing with an at91sam9g45+ controller.
+		 */
+		if (!caps->has_dma &&
+		    of_property_read_bool(pdev->dev.of_node,
+					  "atmel,nand-has-dma"))
+			caps = &atmel_sam9g45_nand_caps;
+
+		/*
+		 * All SoCs except the at91sam9261 are assigning ALE to A21 and
+		 * CLE to A22. If atmel,nand-addr-offset != 21 this means we're
+		 * actually dealing with an at91sam9261 controller.
+		 */
+		of_property_read_u32(pdev->dev.of_node,
+				     "atmel,nand-addr-offset", &ale_offs);
+		if (ale_offs != 21)
+			caps = &atmel_sam9261_nand_caps;
+	}
+
+	return caps->ops->probe(pdev, caps);
+}
+
+static int atmel_nand_controller_remove(struct platform_device *pdev)
+{
+	struct atmel_nand_controller *nc = platform_get_drvdata(pdev);
+
+	return nc->caps->ops->remove(nc);
+}
+
+static struct platform_driver atmel_nand_controller_driver = {
+	.driver = {
+		.name = "atmel-nand-controller",
+		.of_match_table = of_match_ptr(atmel_nand_controller_of_ids),
+	},
+	.probe = atmel_nand_controller_probe,
+	.remove = atmel_nand_controller_remove,
+};
+module_platform_driver(atmel_nand_controller_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>");
+MODULE_DESCRIPTION("NAND Flash Controller driver for Atmel SoCs");
+MODULE_ALIAS("platform:atmel-nand-controller");
diff --git a/drivers/mtd/nand/atmel/pmecc.c b/drivers/mtd/nand/atmel/pmecc.c
new file mode 100644
index 000000000000..55a8ee5306ea
--- /dev/null
+++ b/drivers/mtd/nand/atmel/pmecc.c
@@ -0,0 +1,1020 @@
+/*
+ * Copyright 2017 ATMEL
+ * Copyright 2017 Free Electrons
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * Derived from the atmel_nand.c driver which contained the following
+ * copyrights:
+ *
+ *   Copyright 2003 Rick Bronson
+ *
+ *   Derived from drivers/mtd/nand/autcpu12.c
+ *	Copyright 2001 Thomas Gleixner (gleixner@autronix.de)
+ *
+ *   Derived from drivers/mtd/spia.c
+ *	Copyright 2000 Steven J. Hill (sjhill@cotw.com)
+ *
+ *   Add Hardware ECC support for AT91SAM9260 / AT91SAM9263
+ *	Richard Genoud (richard.genoud@gmail.com), Adeneo Copyright 2007
+ *
+ *   Derived from Das U-Boot source code
+ *	(u-boot-1.1.5/board/atmel/at91sam9263ek/nand.c)
+ *      Copyright 2006 ATMEL Rousset, Lacressonniere Nicolas
+ *
+ *   Add Programmable Multibit ECC support for various AT91 SoC
+ *	Copyright 2012 ATMEL, Hong Xu
+ *
+ *   Add Nand Flash Controller support for SAMA5 SoC
+ *	Copyright 2013 ATMEL, Josh Wu (josh.wu@atmel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * The PMECC is an hardware assisted BCH engine, which means part of the
+ * ECC algorithm is left to the software. The hardware/software repartition
+ * is explained in the "PMECC Controller Functional Description" chapter in
+ * Atmel datasheets, and some of the functions in this file are directly
+ * implementing the algorithms described in the "Software Implementation"
+ * sub-section.
+ *
+ * TODO: it seems that the software BCH implementation in lib/bch.c is already
+ * providing some of the logic we are implementing here. It would be smart
+ * to expose the needed lib/bch.c helpers/functions and re-use them here.
+ */
+
+#include <linux/genalloc.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/mtd/nand.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "pmecc.h"
+
+/* Galois field dimension */
+#define PMECC_GF_DIMENSION_13			13
+#define PMECC_GF_DIMENSION_14			14
+
+/* Primitive Polynomial used by PMECC */
+#define PMECC_GF_13_PRIMITIVE_POLY		0x201b
+#define PMECC_GF_14_PRIMITIVE_POLY		0x4443
+
+#define PMECC_LOOKUP_TABLE_SIZE_512		0x2000
+#define PMECC_LOOKUP_TABLE_SIZE_1024		0x4000
+
+/* Time out value for reading PMECC status register */
+#define PMECC_MAX_TIMEOUT_MS			100
+
+/* PMECC Register Definitions */
+#define ATMEL_PMECC_CFG				0x0
+#define PMECC_CFG_BCH_STRENGTH(x)		(x)
+#define PMECC_CFG_BCH_STRENGTH_MASK		GENMASK(2, 0)
+#define PMECC_CFG_SECTOR512			(0 << 4)
+#define PMECC_CFG_SECTOR1024			(1 << 4)
+#define PMECC_CFG_NSECTORS(x)			((fls(x) - 1) << 8)
+#define PMECC_CFG_READ_OP			(0 << 12)
+#define PMECC_CFG_WRITE_OP			(1 << 12)
+#define PMECC_CFG_SPARE_ENABLE			BIT(16)
+#define PMECC_CFG_AUTO_ENABLE			BIT(20)
+
+#define ATMEL_PMECC_SAREA			0x4
+#define ATMEL_PMECC_SADDR			0x8
+#define ATMEL_PMECC_EADDR			0xc
+
+#define ATMEL_PMECC_CLK				0x10
+#define PMECC_CLK_133MHZ			(2 << 0)
+
+#define ATMEL_PMECC_CTRL			0x14
+#define PMECC_CTRL_RST				BIT(0)
+#define PMECC_CTRL_DATA				BIT(1)
+#define PMECC_CTRL_USER				BIT(2)
+#define PMECC_CTRL_ENABLE			BIT(4)
+#define PMECC_CTRL_DISABLE			BIT(5)
+
+#define ATMEL_PMECC_SR				0x18
+#define PMECC_SR_BUSY				BIT(0)
+#define PMECC_SR_ENABLE				BIT(4)
+
+#define ATMEL_PMECC_IER				0x1c
+#define ATMEL_PMECC_IDR				0x20
+#define ATMEL_PMECC_IMR				0x24
+#define ATMEL_PMECC_ISR				0x28
+#define PMECC_ERROR_INT				BIT(0)
+
+#define ATMEL_PMECC_ECC(sector, n)		\
+	((((sector) + 1) * 0x40) + (n))
+
+#define ATMEL_PMECC_REM(sector, n)		\
+	((((sector) + 1) * 0x40) + ((n) * 4) + 0x200)
+
+/* PMERRLOC Register Definitions */
+#define ATMEL_PMERRLOC_ELCFG			0x0
+#define PMERRLOC_ELCFG_SECTOR_512		(0 << 0)
+#define PMERRLOC_ELCFG_SECTOR_1024		(1 << 0)
+#define PMERRLOC_ELCFG_NUM_ERRORS(n)		((n) << 16)
+
+#define ATMEL_PMERRLOC_ELPRIM			0x4
+#define ATMEL_PMERRLOC_ELEN			0x8
+#define ATMEL_PMERRLOC_ELDIS			0xc
+#define PMERRLOC_DISABLE			BIT(0)
+
+#define ATMEL_PMERRLOC_ELSR			0x10
+#define PMERRLOC_ELSR_BUSY			BIT(0)
+
+#define ATMEL_PMERRLOC_ELIER			0x14
+#define ATMEL_PMERRLOC_ELIDR			0x18
+#define ATMEL_PMERRLOC_ELIMR			0x1c
+#define ATMEL_PMERRLOC_ELISR			0x20
+#define PMERRLOC_ERR_NUM_MASK			GENMASK(12, 8)
+#define PMERRLOC_CALC_DONE			BIT(0)
+
+#define ATMEL_PMERRLOC_SIGMA(x)			(((x) * 0x4) + 0x28)
+
+#define ATMEL_PMERRLOC_EL(offs, x)		(((x) * 0x4) + (offs))
+
+struct atmel_pmecc_gf_tables {
+	u16 *alpha_to;
+	u16 *index_of;
+};
+
+struct atmel_pmecc_caps {
+	const int *strengths;
+	int nstrengths;
+	int el_offset;
+	bool correct_erased_chunks;
+};
+
+struct atmel_pmecc {
+	struct device *dev;
+	const struct atmel_pmecc_caps *caps;
+
+	struct {
+		void __iomem *base;
+		void __iomem *errloc;
+	} regs;
+
+	struct mutex lock;
+};
+
+struct atmel_pmecc_user_conf_cache {
+	u32 cfg;
+	u32 sarea;
+	u32 saddr;
+	u32 eaddr;
+};
+
+struct atmel_pmecc_user {
+	struct atmel_pmecc_user_conf_cache cache;
+	struct atmel_pmecc *pmecc;
+	const struct atmel_pmecc_gf_tables *gf_tables;
+	int eccbytes;
+	s16 *partial_syn;
+	s16 *si;
+	s16 *lmu;
+	s16 *smu;
+	s32 *mu;
+	s32 *dmu;
+	s32 *delta;
+	u32 isr;
+};
+
+static DEFINE_MUTEX(pmecc_gf_tables_lock);
+static const struct atmel_pmecc_gf_tables *pmecc_gf_tables_512;
+static const struct atmel_pmecc_gf_tables *pmecc_gf_tables_1024;
+
+static inline int deg(unsigned int poly)
+{
+	/* polynomial degree is the most-significant bit index */
+	return fls(poly) - 1;
+}
+
+static int atmel_pmecc_build_gf_tables(int mm, unsigned int poly,
+				       struct atmel_pmecc_gf_tables *gf_tables)
+{
+	unsigned int i, x = 1;
+	const unsigned int k = BIT(deg(poly));
+	unsigned int nn = BIT(mm) - 1;
+
+	/* primitive polynomial must be of degree m */
+	if (k != (1u << mm))
+		return -EINVAL;
+
+	for (i = 0; i < nn; i++) {
+		gf_tables->alpha_to[i] = x;
+		gf_tables->index_of[x] = i;
+		if (i && (x == 1))
+			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
+			return -EINVAL;
+		x <<= 1;
+		if (x & k)
+			x ^= poly;
+	}
+	gf_tables->alpha_to[nn] = 1;
+	gf_tables->index_of[0] = 0;
+
+	return 0;
+}
+
+static const struct atmel_pmecc_gf_tables *
+atmel_pmecc_create_gf_tables(const struct atmel_pmecc_user_req *req)
+{
+	struct atmel_pmecc_gf_tables *gf_tables;
+	unsigned int poly, degree, table_size;
+	int ret;
+
+	if (req->ecc.sectorsize == 512) {
+		degree = PMECC_GF_DIMENSION_13;
+		poly = PMECC_GF_13_PRIMITIVE_POLY;
+		table_size = PMECC_LOOKUP_TABLE_SIZE_512;
+	} else {
+		degree = PMECC_GF_DIMENSION_14;
+		poly = PMECC_GF_14_PRIMITIVE_POLY;
+		table_size = PMECC_LOOKUP_TABLE_SIZE_1024;
+	}
+
+	gf_tables = kzalloc(sizeof(*gf_tables) +
+			    (2 * table_size * sizeof(u16)),
+			    GFP_KERNEL);
+	if (!gf_tables)
+		return ERR_PTR(-ENOMEM);
+
+	gf_tables->alpha_to = (void *)(gf_tables + 1);
+	gf_tables->index_of = gf_tables->alpha_to + table_size;
+
+	ret = atmel_pmecc_build_gf_tables(degree, poly, gf_tables);
+	if (ret) {
+		kfree(gf_tables);
+		return ERR_PTR(ret);
+	}
+
+	return gf_tables;
+}
+
+static const struct atmel_pmecc_gf_tables *
+atmel_pmecc_get_gf_tables(const struct atmel_pmecc_user_req *req)
+{
+	const struct atmel_pmecc_gf_tables **gf_tables, *ret;
+
+	mutex_lock(&pmecc_gf_tables_lock);
+	if (req->ecc.sectorsize == 512)
+		gf_tables = &pmecc_gf_tables_512;
+	else
+		gf_tables = &pmecc_gf_tables_1024;
+
+	ret = *gf_tables;
+
+	if (!ret) {
+		ret = atmel_pmecc_create_gf_tables(req);
+		if (!IS_ERR(ret))
+			*gf_tables = ret;
+	}
+	mutex_unlock(&pmecc_gf_tables_lock);
+
+	return ret;
+}
+
+static int atmel_pmecc_prepare_user_req(struct atmel_pmecc *pmecc,
+					struct atmel_pmecc_user_req *req)
+{
+	int i, max_eccbytes, eccbytes = 0, eccstrength = 0;
+
+	if (req->pagesize <= 0 || req->oobsize <= 0 || req->ecc.bytes <= 0)
+		return -EINVAL;
+
+	if (req->ecc.ooboffset >= 0 &&
+	    req->ecc.ooboffset + req->ecc.bytes > req->oobsize)
+		return -EINVAL;
+
+	if (req->ecc.sectorsize == ATMEL_PMECC_SECTOR_SIZE_AUTO) {
+		if (req->ecc.strength != ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH)
+			return -EINVAL;
+
+		if (req->pagesize > 512)
+			req->ecc.sectorsize = 1024;
+		else
+			req->ecc.sectorsize = 512;
+	}
+
+	if (req->ecc.sectorsize != 512 && req->ecc.sectorsize != 1024)
+		return -EINVAL;
+
+	if (req->pagesize % req->ecc.sectorsize)
+		return -EINVAL;
+
+	req->ecc.nsectors = req->pagesize / req->ecc.sectorsize;
+
+	max_eccbytes = req->ecc.bytes;
+
+	for (i = 0; i < pmecc->caps->nstrengths; i++) {
+		int nbytes, strength = pmecc->caps->strengths[i];
+
+		if (req->ecc.strength != ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH &&
+		    strength < req->ecc.strength)
+			continue;
+
+		nbytes = DIV_ROUND_UP(strength * fls(8 * req->ecc.sectorsize),
+				      8);
+		nbytes *= req->ecc.nsectors;
+
+		if (nbytes > max_eccbytes)
+			break;
+
+		eccstrength = strength;
+		eccbytes = nbytes;
+
+		if (req->ecc.strength != ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH)
+			break;
+	}
+
+	if (!eccstrength)
+		return -EINVAL;
+
+	req->ecc.bytes = eccbytes;
+	req->ecc.strength = eccstrength;
+
+	if (req->ecc.ooboffset < 0)
+		req->ecc.ooboffset = req->oobsize - eccbytes;
+
+	return 0;
+}
+
+struct atmel_pmecc_user *
+atmel_pmecc_create_user(struct atmel_pmecc *pmecc,
+			struct atmel_pmecc_user_req *req)
+{
+	struct atmel_pmecc_user *user;
+	const struct atmel_pmecc_gf_tables *gf_tables;
+	int strength, size, ret;
+
+	ret = atmel_pmecc_prepare_user_req(pmecc, req);
+	if (ret)
+		return ERR_PTR(ret);
+
+	size = sizeof(*user);
+	size = ALIGN(size, sizeof(u16));
+	/* Reserve space for partial_syn, si and smu */
+	size += ((2 * req->ecc.strength) + 1) * sizeof(u16) *
+		(2 + req->ecc.strength + 2);
+	/* Reserve space for lmu. */
+	size += (req->ecc.strength + 1) * sizeof(u16);
+	/* Reserve space for mu, dmu and delta. */
+	size = ALIGN(size, sizeof(s32));
+	size += (req->ecc.strength + 1) * sizeof(s32);
+
+	user = kzalloc(size, GFP_KERNEL);
+	if (!user)
+		return ERR_PTR(-ENOMEM);
+
+	user->pmecc = pmecc;
+
+	user->partial_syn = (s16 *)PTR_ALIGN(user + 1, sizeof(u16));
+	user->si = user->partial_syn + ((2 * req->ecc.strength) + 1);
+	user->lmu = user->si + ((2 * req->ecc.strength) + 1);
+	user->smu = user->lmu + (req->ecc.strength + 1);
+	user->mu = (s32 *)PTR_ALIGN(user->smu +
+				    (((2 * req->ecc.strength) + 1) *
+				     (req->ecc.strength + 2)),
+				    sizeof(s32));
+	user->dmu = user->mu + req->ecc.strength + 1;
+	user->delta = user->dmu + req->ecc.strength + 1;
+
+	gf_tables = atmel_pmecc_get_gf_tables(req);
+	if (IS_ERR(gf_tables)) {
+		kfree(user);
+		return ERR_CAST(gf_tables);
+	}
+
+	user->gf_tables = gf_tables;
+
+	user->eccbytes = req->ecc.bytes / req->ecc.nsectors;
+
+	for (strength = 0; strength < pmecc->caps->nstrengths; strength++) {
+		if (pmecc->caps->strengths[strength] == req->ecc.strength)
+			break;
+	}
+
+	user->cache.cfg = PMECC_CFG_BCH_STRENGTH(strength) |
+			  PMECC_CFG_NSECTORS(req->ecc.nsectors);
+
+	if (req->ecc.sectorsize == 1024)
+		user->cache.cfg |= PMECC_CFG_SECTOR1024;
+
+	user->cache.sarea = req->oobsize - 1;
+	user->cache.saddr = req->ecc.ooboffset;
+	user->cache.eaddr = req->ecc.ooboffset + req->ecc.bytes - 1;
+
+	return user;
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_create_user);
+
+void atmel_pmecc_destroy_user(struct atmel_pmecc_user *user)
+{
+	kfree(user);
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_destroy_user);
+
+static int get_strength(struct atmel_pmecc_user *user)
+{
+	const int *strengths = user->pmecc->caps->strengths;
+
+	return strengths[user->cache.cfg & PMECC_CFG_BCH_STRENGTH_MASK];
+}
+
+static int get_sectorsize(struct atmel_pmecc_user *user)
+{
+	return user->cache.cfg & PMECC_LOOKUP_TABLE_SIZE_1024 ? 1024 : 512;
+}
+
+static void atmel_pmecc_gen_syndrome(struct atmel_pmecc_user *user, int sector)
+{
+	int strength = get_strength(user);
+	u32 value;
+	int i;
+
+	/* Fill odd syndromes */
+	for (i = 0; i < strength; i++) {
+		value = readl_relaxed(user->pmecc->regs.base +
+				      ATMEL_PMECC_REM(sector, i / 2));
+		if (i & 1)
+			value >>= 16;
+
+		user->partial_syn[(2 * i) + 1] = value;
+	}
+}
+
+static void atmel_pmecc_substitute(struct atmel_pmecc_user *user)
+{
+	int degree = get_sectorsize(user) == 512 ? 13 : 14;
+	int cw_len = BIT(degree) - 1;
+	int strength = get_strength(user);
+	s16 *alpha_to = user->gf_tables->alpha_to;
+	s16 *index_of = user->gf_tables->index_of;
+	s16 *partial_syn = user->partial_syn;
+	s16 *si;
+	int i, j;
+
+	/*
+	 * si[] is a table that holds the current syndrome value,
+	 * an element of that table belongs to the field
+	 */
+	si = user->si;
+
+	memset(&si[1], 0, sizeof(s16) * ((2 * strength) - 1));
+
+	/* Computation 2t syndromes based on S(x) */
+	/* Odd syndromes */
+	for (i = 1; i < 2 * strength; i += 2) {
+		for (j = 0; j < degree; j++) {
+			if (partial_syn[i] & BIT(j))
+				si[i] = alpha_to[i * j] ^ si[i];
+		}
+	}
+	/* Even syndrome = (Odd syndrome) ** 2 */
+	for (i = 2, j = 1; j <= strength; i = ++j << 1) {
+		if (si[j] == 0) {
+			si[i] = 0;
+		} else {
+			s16 tmp;
+
+			tmp = index_of[si[j]];
+			tmp = (tmp * 2) % cw_len;
+			si[i] = alpha_to[tmp];
+		}
+	}
+}
+
+static void atmel_pmecc_get_sigma(struct atmel_pmecc_user *user)
+{
+	s16 *lmu = user->lmu;
+	s16 *si = user->si;
+	s32 *mu = user->mu;
+	s32 *dmu = user->dmu;
+	s32 *delta = user->delta;
+	int degree = get_sectorsize(user) == 512 ? 13 : 14;
+	int cw_len = BIT(degree) - 1;
+	int strength = get_strength(user);
+	int num = 2 * strength + 1;
+	s16 *index_of = user->gf_tables->index_of;
+	s16 *alpha_to = user->gf_tables->alpha_to;
+	int i, j, k;
+	u32 dmu_0_count, tmp;
+	s16 *smu = user->smu;
+
+	/* index of largest delta */
+	int ro;
+	int largest;
+	int diff;
+
+	dmu_0_count = 0;
+
+	/* First Row */
+
+	/* Mu */
+	mu[0] = -1;
+
+	memset(smu, 0, sizeof(s16) * num);
+	smu[0] = 1;
+
+	/* discrepancy set to 1 */
+	dmu[0] = 1;
+	/* polynom order set to 0 */
+	lmu[0] = 0;
+	delta[0] = (mu[0] * 2 - lmu[0]) >> 1;
+
+	/* Second Row */
+
+	/* Mu */
+	mu[1] = 0;
+	/* Sigma(x) set to 1 */
+	memset(&smu[num], 0, sizeof(s16) * num);
+	smu[num] = 1;
+
+	/* discrepancy set to S1 */
+	dmu[1] = si[1];
+
+	/* polynom order set to 0 */
+	lmu[1] = 0;
+
+	delta[1] = (mu[1] * 2 - lmu[1]) >> 1;
+
+	/* Init the Sigma(x) last row */
+	memset(&smu[(strength + 1) * num], 0, sizeof(s16) * num);
+
+	for (i = 1; i <= strength; i++) {
+		mu[i + 1] = i << 1;
+		/* Begin Computing Sigma (Mu+1) and L(mu) */
+		/* check if discrepancy is set to 0 */
+		if (dmu[i] == 0) {
+			dmu_0_count++;
+
+			tmp = ((strength - (lmu[i] >> 1) - 1) / 2);
+			if ((strength - (lmu[i] >> 1) - 1) & 0x1)
+				tmp += 2;
+			else
+				tmp += 1;
+
+			if (dmu_0_count == tmp) {
+				for (j = 0; j <= (lmu[i] >> 1) + 1; j++)
+					smu[(strength + 1) * num + j] =
+							smu[i * num + j];
+
+				lmu[strength + 1] = lmu[i];
+				return;
+			}
+
+			/* copy polynom */
+			for (j = 0; j <= lmu[i] >> 1; j++)
+				smu[(i + 1) * num + j] = smu[i * num + j];
+
+			/* copy previous polynom order to the next */
+			lmu[i + 1] = lmu[i];
+		} else {
+			ro = 0;
+			largest = -1;
+			/* find largest delta with dmu != 0 */
+			for (j = 0; j < i; j++) {
+				if ((dmu[j]) && (delta[j] > largest)) {
+					largest = delta[j];
+					ro = j;
+				}
+			}
+
+			/* compute difference */
+			diff = (mu[i] - mu[ro]);
+
+			/* Compute degree of the new smu polynomial */
+			if ((lmu[i] >> 1) > ((lmu[ro] >> 1) + diff))
+				lmu[i + 1] = lmu[i];
+			else
+				lmu[i + 1] = ((lmu[ro] >> 1) + diff) * 2;
+
+			/* Init smu[i+1] with 0 */
+			for (k = 0; k < num; k++)
+				smu[(i + 1) * num + k] = 0;
+
+			/* Compute smu[i+1] */
+			for (k = 0; k <= lmu[ro] >> 1; k++) {
+				s16 a, b, c;
+
+				if (!(smu[ro * num + k] && dmu[i]))
+					continue;
+
+				a = index_of[dmu[i]];
+				b = index_of[dmu[ro]];
+				c = index_of[smu[ro * num + k]];
+				tmp = a + (cw_len - b) + c;
+				a = alpha_to[tmp % cw_len];
+				smu[(i + 1) * num + (k + diff)] = a;
+			}
+
+			for (k = 0; k <= lmu[i] >> 1; k++)
+				smu[(i + 1) * num + k] ^= smu[i * num + k];
+		}
+
+		/* End Computing Sigma (Mu+1) and L(mu) */
+		/* In either case compute delta */
+		delta[i + 1] = (mu[i + 1] * 2 - lmu[i + 1]) >> 1;
+
+		/* Do not compute discrepancy for the last iteration */
+		if (i >= strength)
+			continue;
+
+		for (k = 0; k <= (lmu[i + 1] >> 1); k++) {
+			tmp = 2 * (i - 1);
+			if (k == 0) {
+				dmu[i + 1] = si[tmp + 3];
+			} else if (smu[(i + 1) * num + k] && si[tmp + 3 - k]) {
+				s16 a, b, c;
+
+				a = index_of[smu[(i + 1) * num + k]];
+				b = si[2 * (i - 1) + 3 - k];
+				c = index_of[b];
+				tmp = a + c;
+				tmp %= cw_len;
+				dmu[i + 1] = alpha_to[tmp] ^ dmu[i + 1];
+			}
+		}
+	}
+}
+
+static int atmel_pmecc_err_location(struct atmel_pmecc_user *user)
+{
+	int sector_size = get_sectorsize(user);
+	int degree = sector_size == 512 ? 13 : 14;
+	struct atmel_pmecc *pmecc = user->pmecc;
+	int strength = get_strength(user);
+	int ret, roots_nbr, i, err_nbr = 0;
+	int num = (2 * strength) + 1;
+	s16 *smu = user->smu;
+	u32 val;
+
+	writel(PMERRLOC_DISABLE, pmecc->regs.errloc + ATMEL_PMERRLOC_ELDIS);
+
+	for (i = 0; i <= user->lmu[strength + 1] >> 1; i++) {
+		writel_relaxed(smu[(strength + 1) * num + i],
+			       pmecc->regs.errloc + ATMEL_PMERRLOC_SIGMA(i));
+		err_nbr++;
+	}
+
+	val = (err_nbr - 1) << 16;
+	if (sector_size == 1024)
+		val |= 1;
+
+	writel(val, pmecc->regs.errloc + ATMEL_PMERRLOC_ELCFG);
+	writel((sector_size * 8) + (degree * strength),
+	       pmecc->regs.errloc + ATMEL_PMERRLOC_ELEN);
+
+	ret = readl_relaxed_poll_timeout(pmecc->regs.errloc +
+					 ATMEL_PMERRLOC_ELISR,
+					 val, val & PMERRLOC_CALC_DONE, 0,
+					 PMECC_MAX_TIMEOUT_MS * 1000);
+	if (ret) {
+		dev_err(pmecc->dev,
+			"PMECC: Timeout to calculate error location.\n");
+		return ret;
+	}
+
+	roots_nbr = (val & PMERRLOC_ERR_NUM_MASK) >> 8;
+	/* Number of roots == degree of smu hence <= cap */
+	if (roots_nbr == user->lmu[strength + 1] >> 1)
+		return err_nbr - 1;
+
+	/*
+	 * Number of roots does not match the degree of smu
+	 * unable to correct error.
+	 */
+	return -EBADMSG;
+}
+
+int atmel_pmecc_correct_sector(struct atmel_pmecc_user *user, int sector,
+			       void *data, void *ecc)
+{
+	struct atmel_pmecc *pmecc = user->pmecc;
+	int sectorsize = get_sectorsize(user);
+	int eccbytes = user->eccbytes;
+	int i, nerrors;
+
+	if (!(user->isr & BIT(sector)))
+		return 0;
+
+	atmel_pmecc_gen_syndrome(user, sector);
+	atmel_pmecc_substitute(user);
+	atmel_pmecc_get_sigma(user);
+
+	nerrors = atmel_pmecc_err_location(user);
+	if (nerrors < 0)
+		return nerrors;
+
+	for (i = 0; i < nerrors; i++) {
+		const char *area;
+		int byte, bit;
+		u32 errpos;
+		u8 *ptr;
+
+		errpos = readl_relaxed(pmecc->regs.errloc +
+				ATMEL_PMERRLOC_EL(pmecc->caps->el_offset, i));
+		errpos--;
+
+		byte = errpos / 8;
+		bit = errpos % 8;
+
+		if (byte < sectorsize) {
+			ptr = data + byte;
+			area = "data";
+		} else if (byte < sectorsize + eccbytes) {
+			ptr = ecc + byte - sectorsize;
+			area = "ECC";
+		} else {
+			dev_dbg(pmecc->dev,
+				"Invalid errpos value (%d, max is %d)\n",
+				errpos, (sectorsize + eccbytes) * 8);
+			return -EINVAL;
+		}
+
+		dev_dbg(pmecc->dev,
+			"Bit flip in %s area, byte %d: 0x%02x -> 0x%02x\n",
+			area, byte, *ptr, (unsigned int)(*ptr ^ BIT(bit)));
+
+		*ptr ^= BIT(bit);
+	}
+
+	return nerrors;
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_correct_sector);
+
+bool atmel_pmecc_correct_erased_chunks(struct atmel_pmecc_user *user)
+{
+	return user->pmecc->caps->correct_erased_chunks;
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_correct_erased_chunks);
+
+void atmel_pmecc_get_generated_eccbytes(struct atmel_pmecc_user *user,
+					int sector, void *ecc)
+{
+	struct atmel_pmecc *pmecc = user->pmecc;
+	u8 *ptr = ecc;
+	int i;
+
+	for (i = 0; i < user->eccbytes; i++)
+		ptr[i] = readb_relaxed(pmecc->regs.base +
+				       ATMEL_PMECC_ECC(sector, i));
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_get_generated_eccbytes);
+
+int atmel_pmecc_enable(struct atmel_pmecc_user *user, int op)
+{
+	struct atmel_pmecc *pmecc = user->pmecc;
+	u32 cfg;
+
+	if (op != NAND_ECC_READ && op != NAND_ECC_WRITE) {
+		dev_err(pmecc->dev, "Bad ECC operation!");
+		return -EINVAL;
+	}
+
+	mutex_lock(&user->pmecc->lock);
+
+	cfg = user->cache.cfg;
+	if (op == NAND_ECC_WRITE)
+		cfg |= PMECC_CFG_WRITE_OP;
+	else
+		cfg |= PMECC_CFG_AUTO_ENABLE;
+
+	writel(cfg, pmecc->regs.base + ATMEL_PMECC_CFG);
+	writel(user->cache.sarea, pmecc->regs.base + ATMEL_PMECC_SAREA);
+	writel(user->cache.saddr, pmecc->regs.base + ATMEL_PMECC_SADDR);
+	writel(user->cache.eaddr, pmecc->regs.base + ATMEL_PMECC_EADDR);
+
+	writel(PMECC_CTRL_ENABLE, pmecc->regs.base + ATMEL_PMECC_CTRL);
+	writel(PMECC_CTRL_DATA, pmecc->regs.base + ATMEL_PMECC_CTRL);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_enable);
+
+void atmel_pmecc_disable(struct atmel_pmecc_user *user)
+{
+	struct atmel_pmecc *pmecc = user->pmecc;
+
+	writel(PMECC_CTRL_RST, pmecc->regs.base + ATMEL_PMECC_CTRL);
+	writel(PMECC_CTRL_DISABLE, pmecc->regs.base + ATMEL_PMECC_CTRL);
+	mutex_unlock(&user->pmecc->lock);
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_disable);
+
+int atmel_pmecc_wait_rdy(struct atmel_pmecc_user *user)
+{
+	struct atmel_pmecc *pmecc = user->pmecc;
+	u32 status;
+	int ret;
+
+	ret = readl_relaxed_poll_timeout(pmecc->regs.base +
+					 ATMEL_PMECC_SR,
+					 status, !(status & PMECC_SR_BUSY), 0,
+					 PMECC_MAX_TIMEOUT_MS * 1000);
+	if (ret) {
+		dev_err(pmecc->dev,
+			"Timeout while waiting for PMECC ready.\n");
+		return ret;
+	}
+
+	user->isr = readl_relaxed(pmecc->regs.base + ATMEL_PMECC_ISR);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(atmel_pmecc_wait_rdy);
+
+static struct atmel_pmecc *atmel_pmecc_create(struct platform_device *pdev,
+					const struct atmel_pmecc_caps *caps,
+					int pmecc_res_idx, int errloc_res_idx)
+{
+	struct device *dev = &pdev->dev;
+	struct atmel_pmecc *pmecc;
+	struct resource *res;
+
+	pmecc = devm_kzalloc(dev, sizeof(*pmecc), GFP_KERNEL);
+	if (!pmecc)
+		return ERR_PTR(-ENOMEM);
+
+	pmecc->caps = caps;
+	pmecc->dev = dev;
+	mutex_init(&pmecc->lock);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, pmecc_res_idx);
+	pmecc->regs.base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pmecc->regs.base))
+		return ERR_CAST(pmecc->regs.base);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, errloc_res_idx);
+	pmecc->regs.errloc = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pmecc->regs.errloc))
+		return ERR_CAST(pmecc->regs.errloc);
+
+	/* Disable all interrupts before registering the PMECC handler. */
+	writel(0xffffffff, pmecc->regs.base + ATMEL_PMECC_IDR);
+
+	/* Reset the ECC engine */
+	writel(PMECC_CTRL_RST, pmecc->regs.base + ATMEL_PMECC_CTRL);
+	writel(PMECC_CTRL_DISABLE, pmecc->regs.base + ATMEL_PMECC_CTRL);
+
+	return pmecc;
+}
+
+static void devm_atmel_pmecc_put(struct device *dev, void *res)
+{
+	struct atmel_pmecc **pmecc = res;
+
+	put_device((*pmecc)->dev);
+}
+
+static struct atmel_pmecc *atmel_pmecc_get_by_node(struct device *userdev,
+						   struct device_node *np)
+{
+	struct platform_device *pdev;
+	struct atmel_pmecc *pmecc, **ptr;
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev || !platform_get_drvdata(pdev))
+		return ERR_PTR(-EPROBE_DEFER);
+
+	ptr = devres_alloc(devm_atmel_pmecc_put, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	get_device(&pdev->dev);
+	pmecc = platform_get_drvdata(pdev);
+
+	*ptr = pmecc;
+
+	devres_add(userdev, ptr);
+
+	return pmecc;
+}
+
+static const int atmel_pmecc_strengths[] = { 2, 4, 8, 12, 24, 32 };
+
+static struct atmel_pmecc_caps at91sam9g45_caps = {
+	.strengths = atmel_pmecc_strengths,
+	.nstrengths = 5,
+	.el_offset = 0x8c,
+};
+
+static struct atmel_pmecc_caps sama5d4_caps = {
+	.strengths = atmel_pmecc_strengths,
+	.nstrengths = 5,
+	.el_offset = 0x8c,
+	.correct_erased_chunks = true,
+};
+
+static struct atmel_pmecc_caps sama5d2_caps = {
+	.strengths = atmel_pmecc_strengths,
+	.nstrengths = 6,
+	.el_offset = 0xac,
+	.correct_erased_chunks = true,
+};
+
+static const struct of_device_id atmel_pmecc_legacy_match[] = {
+	{ .compatible = "atmel,sama5d4-nand", &sama5d4_caps },
+	{ .compatible = "atmel,sama5d2-nand", &sama5d2_caps },
+	{ /* sentinel */ }
+};
+
+struct atmel_pmecc *devm_atmel_pmecc_get(struct device *userdev)
+{
+	struct atmel_pmecc *pmecc;
+	struct device_node *np;
+
+	if (!userdev)
+		return ERR_PTR(-EINVAL);
+
+	if (!userdev->of_node)
+		return NULL;
+
+	np = of_parse_phandle(userdev->of_node, "ecc-engine", 0);
+	if (np) {
+		pmecc = atmel_pmecc_get_by_node(userdev, np);
+		of_node_put(np);
+	} else {
+		/*
+		 * Support old DT bindings: in this case the PMECC iomem
+		 * resources are directly defined in the user pdev at position
+		 * 1 and 2. Extract all relevant information from there.
+		 */
+		struct platform_device *pdev = to_platform_device(userdev);
+		const struct atmel_pmecc_caps *caps;
+
+		/* No PMECC engine available. */
+		if (!of_property_read_bool(userdev->of_node,
+					   "atmel,has-pmecc"))
+			return NULL;
+
+		caps = &at91sam9g45_caps;
+
+		/*
+		 * Try to find the NFC subnode and extract the associated caps
+		 * from there.
+		 */
+		np = of_find_compatible_node(userdev->of_node, NULL,
+					     "atmel,sama5d3-nfc");
+		if (np) {
+			const struct of_device_id *match;
+
+			match = of_match_node(atmel_pmecc_legacy_match, np);
+			if (match && match->data)
+				caps = match->data;
+
+			of_node_put(np);
+		}
+
+		pmecc = atmel_pmecc_create(pdev, caps, 1, 2);
+	}
+
+	return pmecc;
+}
+EXPORT_SYMBOL(devm_atmel_pmecc_get);
+
+static const struct of_device_id atmel_pmecc_match[] = {
+	{ .compatible = "atmel,at91sam9g45-pmecc", &at91sam9g45_caps },
+	{ .compatible = "atmel,sama5d4-pmecc", &sama5d4_caps },
+	{ .compatible = "atmel,sama5d2-pmecc", &sama5d2_caps },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, atmel_pmecc_match);
+
+static int atmel_pmecc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct atmel_pmecc_caps *caps;
+	struct atmel_pmecc *pmecc;
+
+	caps = of_device_get_match_data(&pdev->dev);
+	if (!caps) {
+		dev_err(dev, "Invalid caps\n");
+		return -EINVAL;
+	}
+
+	pmecc = atmel_pmecc_create(pdev, caps, 0, 1);
+	if (IS_ERR(pmecc))
+		return PTR_ERR(pmecc);
+
+	platform_set_drvdata(pdev, pmecc);
+
+	return 0;
+}
+
+static struct platform_driver atmel_pmecc_driver = {
+	.driver = {
+		.name = "atmel-pmecc",
+		.of_match_table = of_match_ptr(atmel_pmecc_match),
+	},
+	.probe = atmel_pmecc_probe,
+};
+module_platform_driver(atmel_pmecc_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>");
+MODULE_DESCRIPTION("PMECC engine driver");
+MODULE_ALIAS("platform:atmel_pmecc");
diff --git a/drivers/mtd/nand/atmel/pmecc.h b/drivers/mtd/nand/atmel/pmecc.h
new file mode 100644
index 000000000000..a8ddbfca2ea5
--- /dev/null
+++ b/drivers/mtd/nand/atmel/pmecc.h
@@ -0,0 +1,73 @@
+/*
+ * © Copyright 2016 ATMEL
+ * © Copyright 2016 Free Electrons
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ *
+ * Derived from the atmel_nand.c driver which contained the following
+ * copyrights:
+ *
+ *    Copyright © 2003 Rick Bronson
+ *
+ *    Derived from drivers/mtd/nand/autcpu12.c
+ *        Copyright © 2001 Thomas Gleixner (gleixner@autronix.de)
+ *
+ *    Derived from drivers/mtd/spia.c
+ *        Copyright © 2000 Steven J. Hill (sjhill@cotw.com)
+ *
+ *
+ *    Add Hardware ECC support for AT91SAM9260 / AT91SAM9263
+ *        Richard Genoud (richard.genoud@gmail.com), Adeneo Copyright © 2007
+ *
+ *        Derived from Das U-Boot source code
+ *              (u-boot-1.1.5/board/atmel/at91sam9263ek/nand.c)
+ *        © Copyright 2006 ATMEL Rousset, Lacressonniere Nicolas
+ *
+ *    Add Programmable Multibit ECC support for various AT91 SoC
+ *        © Copyright 2012 ATMEL, Hong Xu
+ *
+ *    Add Nand Flash Controller support for SAMA5 SoC
+ *        © Copyright 2013 ATMEL, Josh Wu (josh.wu@atmel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef ATMEL_PMECC_H
+#define ATMEL_PMECC_H
+
+#define ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH	0
+#define ATMEL_PMECC_SECTOR_SIZE_AUTO		0
+#define ATMEL_PMECC_OOBOFFSET_AUTO		-1
+
+struct atmel_pmecc_user_req {
+	int pagesize;
+	int oobsize;
+	struct {
+		int strength;
+		int bytes;
+		int sectorsize;
+		int nsectors;
+		int ooboffset;
+	} ecc;
+};
+
+struct atmel_pmecc *devm_atmel_pmecc_get(struct device *dev);
+
+struct atmel_pmecc_user *
+atmel_pmecc_create_user(struct atmel_pmecc *pmecc,
+			struct atmel_pmecc_user_req *req);
+void atmel_pmecc_destroy_user(struct atmel_pmecc_user *user);
+
+int atmel_pmecc_enable(struct atmel_pmecc_user *user, int op);
+void atmel_pmecc_disable(struct atmel_pmecc_user *user);
+int atmel_pmecc_wait_rdy(struct atmel_pmecc_user *user);
+int atmel_pmecc_correct_sector(struct atmel_pmecc_user *user, int sector,
+			       void *data, void *ecc);
+bool atmel_pmecc_correct_erased_chunks(struct atmel_pmecc_user *user);
+void atmel_pmecc_get_generated_eccbytes(struct atmel_pmecc_user *user,
+					int sector, void *ecc);
+
+#endif /* ATMEL_PMECC_H */
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
deleted file mode 100644
index 9ebd5ecefea6..000000000000
--- a/drivers/mtd/nand/atmel_nand.c
+++ /dev/null
@@ -1,2479 +0,0 @@
-/*
- *  Copyright © 2003 Rick Bronson
- *
- *  Derived from drivers/mtd/nand/autcpu12.c
- *	 Copyright © 2001 Thomas Gleixner (gleixner@autronix.de)
- *
- *  Derived from drivers/mtd/spia.c
- *	 Copyright © 2000 Steven J. Hill (sjhill@cotw.com)
- *
- *
- *  Add Hardware ECC support for AT91SAM9260 / AT91SAM9263
- *     Richard Genoud (richard.genoud@gmail.com), Adeneo Copyright © 2007
- *
- *     Derived from Das U-Boot source code
- *     		(u-boot-1.1.5/board/atmel/at91sam9263ek/nand.c)
- *     © Copyright 2006 ATMEL Rousset, Lacressonniere Nicolas
- *
- *  Add Programmable Multibit ECC support for various AT91 SoC
- *     © Copyright 2012 ATMEL, Hong Xu
- *
- *  Add Nand Flash Controller support for SAMA5 SoC
- *     © Copyright 2013 ATMEL, Josh Wu (josh.wu@atmel.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/clk.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_gpio.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/nand.h>
-#include <linux/mtd/partitions.h>
-
-#include <linux/delay.h>
-#include <linux/dmaengine.h>
-#include <linux/gpio.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/platform_data/atmel.h>
-
-static int use_dma = 1;
-module_param(use_dma, int, 0);
-
-static int on_flash_bbt = 0;
-module_param(on_flash_bbt, int, 0);
-
-/* Register access macros */
-#define ecc_readl(add, reg)				\
-	__raw_readl(add + ATMEL_ECC_##reg)
-#define ecc_writel(add, reg, value)			\
-	__raw_writel((value), add + ATMEL_ECC_##reg)
-
-#include "atmel_nand_ecc.h"	/* Hardware ECC registers */
-#include "atmel_nand_nfc.h"	/* Nand Flash Controller definition */
-
-struct atmel_nand_caps {
-	bool pmecc_correct_erase_page;
-	uint8_t pmecc_max_correction;
-};
-
-/*
- * oob layout for large page size
- * bad block info is on bytes 0 and 1
- * the bytes have to be consecutives to avoid
- * several NAND_CMD_RNDOUT during read
- *
- * oob layout for small page size
- * bad block info is on bytes 4 and 5
- * the bytes have to be consecutives to avoid
- * several NAND_CMD_RNDOUT during read
- */
-static int atmel_ooblayout_ecc_sp(struct mtd_info *mtd, int section,
-				  struct mtd_oob_region *oobregion)
-{
-	if (section)
-		return -ERANGE;
-
-	oobregion->length = 4;
-	oobregion->offset = 0;
-
-	return 0;
-}
-
-static int atmel_ooblayout_free_sp(struct mtd_info *mtd, int section,
-				   struct mtd_oob_region *oobregion)
-{
-	if (section)
-		return -ERANGE;
-
-	oobregion->offset = 6;
-	oobregion->length = mtd->oobsize - oobregion->offset;
-
-	return 0;
-}
-
-static const struct mtd_ooblayout_ops atmel_ooblayout_sp_ops = {
-	.ecc = atmel_ooblayout_ecc_sp,
-	.free = atmel_ooblayout_free_sp,
-};
-
-struct atmel_nfc {
-	void __iomem		*base_cmd_regs;
-	void __iomem		*hsmc_regs;
-	void			*sram_bank0;
-	dma_addr_t		sram_bank0_phys;
-	bool			use_nfc_sram;
-	bool			write_by_sram;
-
-	struct clk		*clk;
-
-	bool			is_initialized;
-	struct completion	comp_ready;
-	struct completion	comp_cmd_done;
-	struct completion	comp_xfer_done;
-
-	/* Point to the sram bank which include readed data via NFC */
-	void			*data_in_sram;
-	bool			will_write_sram;
-};
-static struct atmel_nfc	nand_nfc;
-
-struct atmel_nand_host {
-	struct nand_chip	nand_chip;
-	void __iomem		*io_base;
-	dma_addr_t		io_phys;
-	struct atmel_nand_data	board;
-	struct device		*dev;
-	void __iomem		*ecc;
-
-	struct completion	comp;
-	struct dma_chan		*dma_chan;
-
-	struct atmel_nfc	*nfc;
-
-	const struct atmel_nand_caps	*caps;
-	bool			has_pmecc;
-	u8			pmecc_corr_cap;
-	u16			pmecc_sector_size;
-	bool			has_no_lookup_table;
-	u32			pmecc_lookup_table_offset;
-	u32			pmecc_lookup_table_offset_512;
-	u32			pmecc_lookup_table_offset_1024;
-
-	int			pmecc_degree;	/* Degree of remainders */
-	int			pmecc_cw_len;	/* Length of codeword */
-
-	void __iomem		*pmerrloc_base;
-	void __iomem		*pmerrloc_el_base;
-	void __iomem		*pmecc_rom_base;
-
-	/* lookup table for alpha_to and index_of */
-	void __iomem		*pmecc_alpha_to;
-	void __iomem		*pmecc_index_of;
-
-	/* data for pmecc computation */
-	int16_t			*pmecc_partial_syn;
-	int16_t			*pmecc_si;
-	int16_t			*pmecc_smu;	/* Sigma table */
-	int16_t			*pmecc_lmu;	/* polynomal order */
-	int			*pmecc_mu;
-	int			*pmecc_dmu;
-	int			*pmecc_delta;
-};
-
-/*
- * Enable NAND.
- */
-static void atmel_nand_enable(struct atmel_nand_host *host)
-{
-	if (gpio_is_valid(host->board.enable_pin))
-		gpio_set_value(host->board.enable_pin, 0);
-}
-
-/*
- * Disable NAND.
- */
-static void atmel_nand_disable(struct atmel_nand_host *host)
-{
-	if (gpio_is_valid(host->board.enable_pin))
-		gpio_set_value(host->board.enable_pin, 1);
-}
-
-/*
- * Hardware specific access to control-lines
- */
-static void atmel_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	if (ctrl & NAND_CTRL_CHANGE) {
-		if (ctrl & NAND_NCE)
-			atmel_nand_enable(host);
-		else
-			atmel_nand_disable(host);
-	}
-	if (cmd == NAND_CMD_NONE)
-		return;
-
-	if (ctrl & NAND_CLE)
-		writeb(cmd, host->io_base + (1 << host->board.cle));
-	else
-		writeb(cmd, host->io_base + (1 << host->board.ale));
-}
-
-/*
- * Read the Device Ready pin.
- */
-static int atmel_nand_device_ready(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	return gpio_get_value(host->board.rdy_pin) ^
-                !!host->board.rdy_pin_active_low;
-}
-
-/* Set up for hardware ready pin and enable pin. */
-static int atmel_nand_set_enable_ready_pins(struct mtd_info *mtd)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	int res = 0;
-
-	if (gpio_is_valid(host->board.rdy_pin)) {
-		res = devm_gpio_request(host->dev,
-				host->board.rdy_pin, "nand_rdy");
-		if (res < 0) {
-			dev_err(host->dev,
-				"can't request rdy gpio %d\n",
-				host->board.rdy_pin);
-			return res;
-		}
-
-		res = gpio_direction_input(host->board.rdy_pin);
-		if (res < 0) {
-			dev_err(host->dev,
-				"can't request input direction rdy gpio %d\n",
-				host->board.rdy_pin);
-			return res;
-		}
-
-		chip->dev_ready = atmel_nand_device_ready;
-	}
-
-	if (gpio_is_valid(host->board.enable_pin)) {
-		res = devm_gpio_request(host->dev,
-				host->board.enable_pin, "nand_enable");
-		if (res < 0) {
-			dev_err(host->dev,
-				"can't request enable gpio %d\n",
-				host->board.enable_pin);
-			return res;
-		}
-
-		res = gpio_direction_output(host->board.enable_pin, 1);
-		if (res < 0) {
-			dev_err(host->dev,
-				"can't request output direction enable gpio %d\n",
-				host->board.enable_pin);
-			return res;
-		}
-	}
-
-	return res;
-}
-
-/*
- * Minimal-overhead PIO for data access.
- */
-static void atmel_read_buf8(struct mtd_info *mtd, u8 *buf, int len)
-{
-	struct nand_chip	*nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) {
-		memcpy(buf, host->nfc->data_in_sram, len);
-		host->nfc->data_in_sram += len;
-	} else {
-		__raw_readsb(nand_chip->IO_ADDR_R, buf, len);
-	}
-}
-
-static void atmel_read_buf16(struct mtd_info *mtd, u8 *buf, int len)
-{
-	struct nand_chip	*nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) {
-		memcpy(buf, host->nfc->data_in_sram, len);
-		host->nfc->data_in_sram += len;
-	} else {
-		__raw_readsw(nand_chip->IO_ADDR_R, buf, len / 2);
-	}
-}
-
-static void atmel_write_buf8(struct mtd_info *mtd, const u8 *buf, int len)
-{
-	struct nand_chip	*nand_chip = mtd_to_nand(mtd);
-
-	__raw_writesb(nand_chip->IO_ADDR_W, buf, len);
-}
-
-static void atmel_write_buf16(struct mtd_info *mtd, const u8 *buf, int len)
-{
-	struct nand_chip	*nand_chip = mtd_to_nand(mtd);
-
-	__raw_writesw(nand_chip->IO_ADDR_W, buf, len / 2);
-}
-
-static void dma_complete_func(void *completion)
-{
-	complete(completion);
-}
-
-static int nfc_set_sram_bank(struct atmel_nand_host *host, unsigned int bank)
-{
-	/* NFC only has two banks. Must be 0 or 1 */
-	if (bank > 1)
-		return -EINVAL;
-
-	if (bank) {
-		struct mtd_info *mtd = nand_to_mtd(&host->nand_chip);
-
-		/* Only for a 2k-page or lower flash, NFC can handle 2 banks */
-		if (mtd->writesize > 2048)
-			return -EINVAL;
-		nfc_writel(host->nfc->hsmc_regs, BANK, ATMEL_HSMC_NFC_BANK1);
-	} else {
-		nfc_writel(host->nfc->hsmc_regs, BANK, ATMEL_HSMC_NFC_BANK0);
-	}
-
-	return 0;
-}
-
-static uint nfc_get_sram_off(struct atmel_nand_host *host)
-{
-	if (nfc_readl(host->nfc->hsmc_regs, BANK) & ATMEL_HSMC_NFC_BANK1)
-		return NFC_SRAM_BANK1_OFFSET;
-	else
-		return 0;
-}
-
-static dma_addr_t nfc_sram_phys(struct atmel_nand_host *host)
-{
-	if (nfc_readl(host->nfc->hsmc_regs, BANK) & ATMEL_HSMC_NFC_BANK1)
-		return host->nfc->sram_bank0_phys + NFC_SRAM_BANK1_OFFSET;
-	else
-		return host->nfc->sram_bank0_phys;
-}
-
-static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len,
-			       int is_read)
-{
-	struct dma_device *dma_dev;
-	enum dma_ctrl_flags flags;
-	dma_addr_t dma_src_addr, dma_dst_addr, phys_addr;
-	struct dma_async_tx_descriptor *tx = NULL;
-	dma_cookie_t cookie;
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	void *p = buf;
-	int err = -EIO;
-	enum dma_data_direction dir = is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-	struct atmel_nfc *nfc = host->nfc;
-
-	if (buf >= high_memory)
-		goto err_buf;
-
-	dma_dev = host->dma_chan->device;
-
-	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
-
-	phys_addr = dma_map_single(dma_dev->dev, p, len, dir);
-	if (dma_mapping_error(dma_dev->dev, phys_addr)) {
-		dev_err(host->dev, "Failed to dma_map_single\n");
-		goto err_buf;
-	}
-
-	if (is_read) {
-		if (nfc && nfc->data_in_sram)
-			dma_src_addr = nfc_sram_phys(host) + (nfc->data_in_sram
-				- (nfc->sram_bank0 + nfc_get_sram_off(host)));
-		else
-			dma_src_addr = host->io_phys;
-
-		dma_dst_addr = phys_addr;
-	} else {
-		dma_src_addr = phys_addr;
-
-		if (nfc && nfc->write_by_sram)
-			dma_dst_addr = nfc_sram_phys(host);
-		else
-			dma_dst_addr = host->io_phys;
-	}
-
-	tx = dma_dev->device_prep_dma_memcpy(host->dma_chan, dma_dst_addr,
-					     dma_src_addr, len, flags);
-	if (!tx) {
-		dev_err(host->dev, "Failed to prepare DMA memcpy\n");
-		goto err_dma;
-	}
-
-	init_completion(&host->comp);
-	tx->callback = dma_complete_func;
-	tx->callback_param = &host->comp;
-
-	cookie = tx->tx_submit(tx);
-	if (dma_submit_error(cookie)) {
-		dev_err(host->dev, "Failed to do DMA tx_submit\n");
-		goto err_dma;
-	}
-
-	dma_async_issue_pending(host->dma_chan);
-	wait_for_completion(&host->comp);
-
-	if (is_read && nfc && nfc->data_in_sram)
-		/* After read data from SRAM, need to increase the position */
-		nfc->data_in_sram += len;
-
-	err = 0;
-
-err_dma:
-	dma_unmap_single(dma_dev->dev, phys_addr, len, dir);
-err_buf:
-	if (err != 0)
-		dev_dbg(host->dev, "Fall back to CPU I/O\n");
-	return err;
-}
-
-static void atmel_read_buf(struct mtd_info *mtd, u8 *buf, int len)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
-	if (use_dma && len > mtd->oobsize)
-		/* only use DMA for bigger than oob size: better performances */
-		if (atmel_nand_dma_op(mtd, buf, len, 1) == 0)
-			return;
-
-	if (chip->options & NAND_BUSWIDTH_16)
-		atmel_read_buf16(mtd, buf, len);
-	else
-		atmel_read_buf8(mtd, buf, len);
-}
-
-static void atmel_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
-	if (use_dma && len > mtd->oobsize)
-		/* only use DMA for bigger than oob size: better performances */
-		if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) == 0)
-			return;
-
-	if (chip->options & NAND_BUSWIDTH_16)
-		atmel_write_buf16(mtd, buf, len);
-	else
-		atmel_write_buf8(mtd, buf, len);
-}
-
-/*
- * Return number of ecc bytes per sector according to sector size and
- * correction capability
- *
- * Following table shows what at91 PMECC supported:
- * Correction Capability	Sector_512_bytes	Sector_1024_bytes
- * =====================	================	=================
- *                2-bits                 4-bytes                  4-bytes
- *                4-bits                 7-bytes                  7-bytes
- *                8-bits                13-bytes                 14-bytes
- *               12-bits                20-bytes                 21-bytes
- *               24-bits                39-bytes                 42-bytes
- *               32-bits                52-bytes                 56-bytes
- */
-static int pmecc_get_ecc_bytes(int cap, int sector_size)
-{
-	int m = 12 + sector_size / 512;
-	return (m * cap + 7) / 8;
-}
-
-static void __iomem *pmecc_get_alpha_to(struct atmel_nand_host *host)
-{
-	int table_size;
-
-	table_size = host->pmecc_sector_size == 512 ?
-		PMECC_LOOKUP_TABLE_SIZE_512 : PMECC_LOOKUP_TABLE_SIZE_1024;
-
-	return host->pmecc_rom_base + host->pmecc_lookup_table_offset +
-			table_size * sizeof(int16_t);
-}
-
-static int pmecc_data_alloc(struct atmel_nand_host *host)
-{
-	const int cap = host->pmecc_corr_cap;
-	int size;
-
-	size = (2 * cap + 1) * sizeof(int16_t);
-	host->pmecc_partial_syn = devm_kzalloc(host->dev, size, GFP_KERNEL);
-	host->pmecc_si = devm_kzalloc(host->dev, size, GFP_KERNEL);
-	host->pmecc_lmu = devm_kzalloc(host->dev,
-			(cap + 1) * sizeof(int16_t), GFP_KERNEL);
-	host->pmecc_smu = devm_kzalloc(host->dev,
-			(cap + 2) * size, GFP_KERNEL);
-
-	size = (cap + 1) * sizeof(int);
-	host->pmecc_mu = devm_kzalloc(host->dev, size, GFP_KERNEL);
-	host->pmecc_dmu = devm_kzalloc(host->dev, size, GFP_KERNEL);
-	host->pmecc_delta = devm_kzalloc(host->dev, size, GFP_KERNEL);
-
-	if (!host->pmecc_partial_syn ||
-		!host->pmecc_si ||
-		!host->pmecc_lmu ||
-		!host->pmecc_smu ||
-		!host->pmecc_mu ||
-		!host->pmecc_dmu ||
-		!host->pmecc_delta)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static void pmecc_gen_syndrome(struct mtd_info *mtd, int sector)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	int i;
-	uint32_t value;
-
-	/* Fill odd syndromes */
-	for (i = 0; i < host->pmecc_corr_cap; i++) {
-		value = pmecc_readl_rem_relaxed(host->ecc, sector, i / 2);
-		if (i & 1)
-			value >>= 16;
-		value &= 0xffff;
-		host->pmecc_partial_syn[(2 * i) + 1] = (int16_t)value;
-	}
-}
-
-static void pmecc_substitute(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	int16_t __iomem *alpha_to = host->pmecc_alpha_to;
-	int16_t __iomem *index_of = host->pmecc_index_of;
-	int16_t *partial_syn = host->pmecc_partial_syn;
-	const int cap = host->pmecc_corr_cap;
-	int16_t *si;
-	int i, j;
-
-	/* si[] is a table that holds the current syndrome value,
-	 * an element of that table belongs to the field
-	 */
-	si = host->pmecc_si;
-
-	memset(&si[1], 0, sizeof(int16_t) * (2 * cap - 1));
-
-	/* Computation 2t syndromes based on S(x) */
-	/* Odd syndromes */
-	for (i = 1; i < 2 * cap; i += 2) {
-		for (j = 0; j < host->pmecc_degree; j++) {
-			if (partial_syn[i] & ((unsigned short)0x1 << j))
-				si[i] = readw_relaxed(alpha_to + i * j) ^ si[i];
-		}
-	}
-	/* Even syndrome = (Odd syndrome) ** 2 */
-	for (i = 2, j = 1; j <= cap; i = ++j << 1) {
-		if (si[j] == 0) {
-			si[i] = 0;
-		} else {
-			int16_t tmp;
-
-			tmp = readw_relaxed(index_of + si[j]);
-			tmp = (tmp * 2) % host->pmecc_cw_len;
-			si[i] = readw_relaxed(alpha_to + tmp);
-		}
-	}
-
-	return;
-}
-
-static void pmecc_get_sigma(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	int16_t *lmu = host->pmecc_lmu;
-	int16_t *si = host->pmecc_si;
-	int *mu = host->pmecc_mu;
-	int *dmu = host->pmecc_dmu;	/* Discrepancy */
-	int *delta = host->pmecc_delta; /* Delta order */
-	int cw_len = host->pmecc_cw_len;
-	const int16_t cap = host->pmecc_corr_cap;
-	const int num = 2 * cap + 1;
-	int16_t __iomem	*index_of = host->pmecc_index_of;
-	int16_t __iomem	*alpha_to = host->pmecc_alpha_to;
-	int i, j, k;
-	uint32_t dmu_0_count, tmp;
-	int16_t *smu = host->pmecc_smu;
-
-	/* index of largest delta */
-	int ro;
-	int largest;
-	int diff;
-
-	dmu_0_count = 0;
-
-	/* First Row */
-
-	/* Mu */
-	mu[0] = -1;
-
-	memset(smu, 0, sizeof(int16_t) * num);
-	smu[0] = 1;
-
-	/* discrepancy set to 1 */
-	dmu[0] = 1;
-	/* polynom order set to 0 */
-	lmu[0] = 0;
-	delta[0] = (mu[0] * 2 - lmu[0]) >> 1;
-
-	/* Second Row */
-
-	/* Mu */
-	mu[1] = 0;
-	/* Sigma(x) set to 1 */
-	memset(&smu[num], 0, sizeof(int16_t) * num);
-	smu[num] = 1;
-
-	/* discrepancy set to S1 */
-	dmu[1] = si[1];
-
-	/* polynom order set to 0 */
-	lmu[1] = 0;
-
-	delta[1] = (mu[1] * 2 - lmu[1]) >> 1;
-
-	/* Init the Sigma(x) last row */
-	memset(&smu[(cap + 1) * num], 0, sizeof(int16_t) * num);
-
-	for (i = 1; i <= cap; i++) {
-		mu[i + 1] = i << 1;
-		/* Begin Computing Sigma (Mu+1) and L(mu) */
-		/* check if discrepancy is set to 0 */
-		if (dmu[i] == 0) {
-			dmu_0_count++;
-
-			tmp = ((cap - (lmu[i] >> 1) - 1) / 2);
-			if ((cap - (lmu[i] >> 1) - 1) & 0x1)
-				tmp += 2;
-			else
-				tmp += 1;
-
-			if (dmu_0_count == tmp) {
-				for (j = 0; j <= (lmu[i] >> 1) + 1; j++)
-					smu[(cap + 1) * num + j] =
-							smu[i * num + j];
-
-				lmu[cap + 1] = lmu[i];
-				return;
-			}
-
-			/* copy polynom */
-			for (j = 0; j <= lmu[i] >> 1; j++)
-				smu[(i + 1) * num + j] = smu[i * num + j];
-
-			/* copy previous polynom order to the next */
-			lmu[i + 1] = lmu[i];
-		} else {
-			ro = 0;
-			largest = -1;
-			/* find largest delta with dmu != 0 */
-			for (j = 0; j < i; j++) {
-				if ((dmu[j]) && (delta[j] > largest)) {
-					largest = delta[j];
-					ro = j;
-				}
-			}
-
-			/* compute difference */
-			diff = (mu[i] - mu[ro]);
-
-			/* Compute degree of the new smu polynomial */
-			if ((lmu[i] >> 1) > ((lmu[ro] >> 1) + diff))
-				lmu[i + 1] = lmu[i];
-			else
-				lmu[i + 1] = ((lmu[ro] >> 1) + diff) * 2;
-
-			/* Init smu[i+1] with 0 */
-			for (k = 0; k < num; k++)
-				smu[(i + 1) * num + k] = 0;
-
-			/* Compute smu[i+1] */
-			for (k = 0; k <= lmu[ro] >> 1; k++) {
-				int16_t a, b, c;
-
-				if (!(smu[ro * num + k] && dmu[i]))
-					continue;
-				a = readw_relaxed(index_of + dmu[i]);
-				b = readw_relaxed(index_of + dmu[ro]);
-				c = readw_relaxed(index_of + smu[ro * num + k]);
-				tmp = a + (cw_len - b) + c;
-				a = readw_relaxed(alpha_to + tmp % cw_len);
-				smu[(i + 1) * num + (k + diff)] = a;
-			}
-
-			for (k = 0; k <= lmu[i] >> 1; k++)
-				smu[(i + 1) * num + k] ^= smu[i * num + k];
-		}
-
-		/* End Computing Sigma (Mu+1) and L(mu) */
-		/* In either case compute delta */
-		delta[i + 1] = (mu[i + 1] * 2 - lmu[i + 1]) >> 1;
-
-		/* Do not compute discrepancy for the last iteration */
-		if (i >= cap)
-			continue;
-
-		for (k = 0; k <= (lmu[i + 1] >> 1); k++) {
-			tmp = 2 * (i - 1);
-			if (k == 0) {
-				dmu[i + 1] = si[tmp + 3];
-			} else if (smu[(i + 1) * num + k] && si[tmp + 3 - k]) {
-				int16_t a, b, c;
-				a = readw_relaxed(index_of +
-						smu[(i + 1) * num + k]);
-				b = si[2 * (i - 1) + 3 - k];
-				c = readw_relaxed(index_of + b);
-				tmp = a + c;
-				tmp %= cw_len;
-				dmu[i + 1] = readw_relaxed(alpha_to + tmp) ^
-					dmu[i + 1];
-			}
-		}
-	}
-
-	return;
-}
-
-static int pmecc_err_location(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	unsigned long end_time;
-	const int cap = host->pmecc_corr_cap;
-	const int num = 2 * cap + 1;
-	int sector_size = host->pmecc_sector_size;
-	int err_nbr = 0;	/* number of error */
-	int roots_nbr;		/* number of roots */
-	int i;
-	uint32_t val;
-	int16_t *smu = host->pmecc_smu;
-
-	pmerrloc_writel(host->pmerrloc_base, ELDIS, PMERRLOC_DISABLE);
-
-	for (i = 0; i <= host->pmecc_lmu[cap + 1] >> 1; i++) {
-		pmerrloc_writel_sigma_relaxed(host->pmerrloc_base, i,
-				      smu[(cap + 1) * num + i]);
-		err_nbr++;
-	}
-
-	val = (err_nbr - 1) << 16;
-	if (sector_size == 1024)
-		val |= 1;
-
-	pmerrloc_writel(host->pmerrloc_base, ELCFG, val);
-	pmerrloc_writel(host->pmerrloc_base, ELEN,
-			sector_size * 8 + host->pmecc_degree * cap);
-
-	end_time = jiffies + msecs_to_jiffies(PMECC_MAX_TIMEOUT_MS);
-	while (!(pmerrloc_readl_relaxed(host->pmerrloc_base, ELISR)
-		 & PMERRLOC_CALC_DONE)) {
-		if (unlikely(time_after(jiffies, end_time))) {
-			dev_err(host->dev, "PMECC: Timeout to calculate error location.\n");
-			return -1;
-		}
-		cpu_relax();
-	}
-
-	roots_nbr = (pmerrloc_readl_relaxed(host->pmerrloc_base, ELISR)
-		& PMERRLOC_ERR_NUM_MASK) >> 8;
-	/* Number of roots == degree of smu hence <= cap */
-	if (roots_nbr == host->pmecc_lmu[cap + 1] >> 1)
-		return err_nbr - 1;
-
-	/* Number of roots does not match the degree of smu
-	 * unable to correct error */
-	return -1;
-}
-
-static void pmecc_correct_data(struct mtd_info *mtd, uint8_t *buf, uint8_t *ecc,
-		int sector_num, int extra_bytes, int err_nbr)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	int i = 0;
-	int byte_pos, bit_pos, sector_size, pos;
-	uint32_t tmp;
-	uint8_t err_byte;
-
-	sector_size = host->pmecc_sector_size;
-
-	while (err_nbr) {
-		tmp = pmerrloc_readl_el_relaxed(host->pmerrloc_el_base, i) - 1;
-		byte_pos = tmp / 8;
-		bit_pos  = tmp % 8;
-
-		if (byte_pos >= (sector_size + extra_bytes))
-			BUG();	/* should never happen */
-
-		if (byte_pos < sector_size) {
-			err_byte = *(buf + byte_pos);
-			*(buf + byte_pos) ^= (1 << bit_pos);
-
-			pos = sector_num * host->pmecc_sector_size + byte_pos;
-			dev_dbg(host->dev, "Bit flip in data area, byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
-				pos, bit_pos, err_byte, *(buf + byte_pos));
-		} else {
-			struct mtd_oob_region oobregion;
-
-			/* Bit flip in OOB area */
-			tmp = sector_num * nand_chip->ecc.bytes
-					+ (byte_pos - sector_size);
-			err_byte = ecc[tmp];
-			ecc[tmp] ^= (1 << bit_pos);
-
-			mtd_ooblayout_ecc(mtd, 0, &oobregion);
-			pos = tmp + oobregion.offset;
-			dev_dbg(host->dev, "Bit flip in OOB, oob_byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
-				pos, bit_pos, err_byte, ecc[tmp]);
-		}
-
-		i++;
-		err_nbr--;
-	}
-
-	return;
-}
-
-static int pmecc_correction(struct mtd_info *mtd, u32 pmecc_stat, uint8_t *buf,
-	u8 *ecc)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	int i, err_nbr;
-	uint8_t *buf_pos;
-	int max_bitflips = 0;
-
-	for (i = 0; i < nand_chip->ecc.steps; i++) {
-		err_nbr = 0;
-		if (pmecc_stat & 0x1) {
-			buf_pos = buf + i * host->pmecc_sector_size;
-
-			pmecc_gen_syndrome(mtd, i);
-			pmecc_substitute(mtd);
-			pmecc_get_sigma(mtd);
-
-			err_nbr = pmecc_err_location(mtd);
-			if (err_nbr >= 0) {
-				pmecc_correct_data(mtd, buf_pos, ecc, i,
-						   nand_chip->ecc.bytes,
-						   err_nbr);
-			} else if (!host->caps->pmecc_correct_erase_page) {
-				u8 *ecc_pos = ecc + (i * nand_chip->ecc.bytes);
-
-				/* Try to detect erased pages */
-				err_nbr = nand_check_erased_ecc_chunk(buf_pos,
-							host->pmecc_sector_size,
-							ecc_pos,
-							nand_chip->ecc.bytes,
-							NULL, 0,
-							nand_chip->ecc.strength);
-			}
-
-			if (err_nbr < 0) {
-				dev_err(host->dev, "PMECC: Too many errors\n");
-				mtd->ecc_stats.failed++;
-				return -EIO;
-			}
-
-			mtd->ecc_stats.corrected += err_nbr;
-			max_bitflips = max_t(int, max_bitflips, err_nbr);
-		}
-		pmecc_stat >>= 1;
-	}
-
-	return max_bitflips;
-}
-
-static void pmecc_enable(struct atmel_nand_host *host, int ecc_op)
-{
-	u32 val;
-
-	if (ecc_op != NAND_ECC_READ && ecc_op != NAND_ECC_WRITE) {
-		dev_err(host->dev, "atmel_nand: wrong pmecc operation type!");
-		return;
-	}
-
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_RST);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-	val = pmecc_readl_relaxed(host->ecc, CFG);
-
-	if (ecc_op == NAND_ECC_READ)
-		pmecc_writel(host->ecc, CFG, (val & ~PMECC_CFG_WRITE_OP)
-			| PMECC_CFG_AUTO_ENABLE);
-	else
-		pmecc_writel(host->ecc, CFG, (val | PMECC_CFG_WRITE_OP)
-			& ~PMECC_CFG_AUTO_ENABLE);
-
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_ENABLE);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DATA);
-}
-
-static int atmel_nand_pmecc_read_page(struct mtd_info *mtd,
-	struct nand_chip *chip, uint8_t *buf, int oob_required, int page)
-{
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	int eccsize = chip->ecc.size * chip->ecc.steps;
-	uint8_t *oob = chip->oob_poi;
-	uint32_t stat;
-	unsigned long end_time;
-	int bitflips = 0;
-
-	if (!host->nfc || !host->nfc->use_nfc_sram)
-		pmecc_enable(host, NAND_ECC_READ);
-
-	chip->read_buf(mtd, buf, eccsize);
-	chip->read_buf(mtd, oob, mtd->oobsize);
-
-	end_time = jiffies + msecs_to_jiffies(PMECC_MAX_TIMEOUT_MS);
-	while ((pmecc_readl_relaxed(host->ecc, SR) & PMECC_SR_BUSY)) {
-		if (unlikely(time_after(jiffies, end_time))) {
-			dev_err(host->dev, "PMECC: Timeout to get error status.\n");
-			return -EIO;
-		}
-		cpu_relax();
-	}
-
-	stat = pmecc_readl_relaxed(host->ecc, ISR);
-	if (stat != 0) {
-		struct mtd_oob_region oobregion;
-
-		mtd_ooblayout_ecc(mtd, 0, &oobregion);
-		bitflips = pmecc_correction(mtd, stat, buf,
-					    &oob[oobregion.offset]);
-		if (bitflips < 0)
-			/* uncorrectable errors */
-			return 0;
-	}
-
-	return bitflips;
-}
-
-static int atmel_nand_pmecc_write_page(struct mtd_info *mtd,
-		struct nand_chip *chip, const uint8_t *buf, int oob_required,
-		int page)
-{
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	struct mtd_oob_region oobregion = { };
-	int i, j, section = 0;
-	unsigned long end_time;
-
-	if (!host->nfc || !host->nfc->write_by_sram) {
-		pmecc_enable(host, NAND_ECC_WRITE);
-		chip->write_buf(mtd, (u8 *)buf, mtd->writesize);
-	}
-
-	end_time = jiffies + msecs_to_jiffies(PMECC_MAX_TIMEOUT_MS);
-	while ((pmecc_readl_relaxed(host->ecc, SR) & PMECC_SR_BUSY)) {
-		if (unlikely(time_after(jiffies, end_time))) {
-			dev_err(host->dev, "PMECC: Timeout to get ECC value.\n");
-			return -EIO;
-		}
-		cpu_relax();
-	}
-
-	for (i = 0; i < chip->ecc.steps; i++) {
-		for (j = 0; j < chip->ecc.bytes; j++) {
-			if (!oobregion.length)
-				mtd_ooblayout_ecc(mtd, section, &oobregion);
-
-			chip->oob_poi[oobregion.offset] =
-				pmecc_readb_ecc_relaxed(host->ecc, i, j);
-			oobregion.length--;
-			oobregion.offset++;
-			section++;
-		}
-	}
-	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
-
-	return 0;
-}
-
-static void atmel_pmecc_core_init(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	int eccbytes = mtd_ooblayout_count_eccbytes(mtd);
-	uint32_t val = 0;
-	struct mtd_oob_region oobregion;
-
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_RST);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-
-	switch (host->pmecc_corr_cap) {
-	case 2:
-		val = PMECC_CFG_BCH_ERR2;
-		break;
-	case 4:
-		val = PMECC_CFG_BCH_ERR4;
-		break;
-	case 8:
-		val = PMECC_CFG_BCH_ERR8;
-		break;
-	case 12:
-		val = PMECC_CFG_BCH_ERR12;
-		break;
-	case 24:
-		val = PMECC_CFG_BCH_ERR24;
-		break;
-	case 32:
-		val = PMECC_CFG_BCH_ERR32;
-		break;
-	}
-
-	if (host->pmecc_sector_size == 512)
-		val |= PMECC_CFG_SECTOR512;
-	else if (host->pmecc_sector_size == 1024)
-		val |= PMECC_CFG_SECTOR1024;
-
-	switch (nand_chip->ecc.steps) {
-	case 1:
-		val |= PMECC_CFG_PAGE_1SECTOR;
-		break;
-	case 2:
-		val |= PMECC_CFG_PAGE_2SECTORS;
-		break;
-	case 4:
-		val |= PMECC_CFG_PAGE_4SECTORS;
-		break;
-	case 8:
-		val |= PMECC_CFG_PAGE_8SECTORS;
-		break;
-	}
-
-	val |= (PMECC_CFG_READ_OP | PMECC_CFG_SPARE_DISABLE
-		| PMECC_CFG_AUTO_DISABLE);
-	pmecc_writel(host->ecc, CFG, val);
-
-	pmecc_writel(host->ecc, SAREA, mtd->oobsize - 1);
-	mtd_ooblayout_ecc(mtd, 0, &oobregion);
-	pmecc_writel(host->ecc, SADDR, oobregion.offset);
-	pmecc_writel(host->ecc, EADDR,
-		     oobregion.offset + eccbytes - 1);
-	/* See datasheet about PMECC Clock Control Register */
-	pmecc_writel(host->ecc, CLK, 2);
-	pmecc_writel(host->ecc, IDR, 0xff);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_ENABLE);
-}
-
-/*
- * Get minimum ecc requirements from NAND.
- * If pmecc-cap, pmecc-sector-size in DTS are not specified, this function
- * will set them according to minimum ecc requirement. Otherwise, use the
- * value in DTS file.
- * return 0 if success. otherwise return error code.
- */
-static int pmecc_choose_ecc(struct atmel_nand_host *host,
-		int *cap, int *sector_size)
-{
-	/* Get minimum ECC requirements */
-	if (host->nand_chip.ecc_strength_ds) {
-		*cap = host->nand_chip.ecc_strength_ds;
-		*sector_size = host->nand_chip.ecc_step_ds;
-		dev_info(host->dev, "minimum ECC: %d bits in %d bytes\n",
-				*cap, *sector_size);
-	} else {
-		*cap = 2;
-		*sector_size = 512;
-		dev_info(host->dev, "can't detect min. ECC, assume 2 bits in 512 bytes\n");
-	}
-
-	/* If device tree doesn't specify, use NAND's minimum ECC parameters */
-	if (host->pmecc_corr_cap == 0) {
-		if (*cap > host->caps->pmecc_max_correction)
-			return -EINVAL;
-
-		/* use the most fitable ecc bits (the near bigger one ) */
-		if (*cap <= 2)
-			host->pmecc_corr_cap = 2;
-		else if (*cap <= 4)
-			host->pmecc_corr_cap = 4;
-		else if (*cap <= 8)
-			host->pmecc_corr_cap = 8;
-		else if (*cap <= 12)
-			host->pmecc_corr_cap = 12;
-		else if (*cap <= 24)
-			host->pmecc_corr_cap = 24;
-		else if (*cap <= 32)
-			host->pmecc_corr_cap = 32;
-		else
-			return -EINVAL;
-	}
-	if (host->pmecc_sector_size == 0) {
-		/* use the most fitable sector size (the near smaller one ) */
-		if (*sector_size >= 1024)
-			host->pmecc_sector_size = 1024;
-		else if (*sector_size >= 512)
-			host->pmecc_sector_size = 512;
-		else
-			return -EINVAL;
-	}
-	return 0;
-}
-
-static inline int deg(unsigned int poly)
-{
-	/* polynomial degree is the most-significant bit index */
-	return fls(poly) - 1;
-}
-
-static int build_gf_tables(int mm, unsigned int poly,
-		int16_t *index_of, int16_t *alpha_to)
-{
-	unsigned int i, x = 1;
-	const unsigned int k = 1 << deg(poly);
-	unsigned int nn = (1 << mm) - 1;
-
-	/* primitive polynomial must be of degree m */
-	if (k != (1u << mm))
-		return -EINVAL;
-
-	for (i = 0; i < nn; i++) {
-		alpha_to[i] = x;
-		index_of[x] = i;
-		if (i && (x == 1))
-			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
-			return -EINVAL;
-		x <<= 1;
-		if (x & k)
-			x ^= poly;
-	}
-	alpha_to[nn] = 1;
-	index_of[0] = 0;
-
-	return 0;
-}
-
-static uint16_t *create_lookup_table(struct device *dev, int sector_size)
-{
-	int degree = (sector_size == 512) ?
-			PMECC_GF_DIMENSION_13 :
-			PMECC_GF_DIMENSION_14;
-	unsigned int poly = (sector_size == 512) ?
-			PMECC_GF_13_PRIMITIVE_POLY :
-			PMECC_GF_14_PRIMITIVE_POLY;
-	int table_size = (sector_size == 512) ?
-			PMECC_LOOKUP_TABLE_SIZE_512 :
-			PMECC_LOOKUP_TABLE_SIZE_1024;
-
-	int16_t *addr = devm_kzalloc(dev, 2 * table_size * sizeof(uint16_t),
-			GFP_KERNEL);
-	if (addr && build_gf_tables(degree, poly, addr, addr + table_size))
-		return NULL;
-
-	return addr;
-}
-
-static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
-					 struct atmel_nand_host *host)
-{
-	struct nand_chip *nand_chip = &host->nand_chip;
-	struct mtd_info *mtd = nand_to_mtd(nand_chip);
-	struct resource *regs, *regs_pmerr, *regs_rom;
-	uint16_t *galois_table;
-	int cap, sector_size, err_no;
-
-	err_no = pmecc_choose_ecc(host, &cap, &sector_size);
-	if (err_no) {
-		dev_err(host->dev, "The NAND flash's ECC requirement are not support!");
-		return err_no;
-	}
-
-	if (cap > host->pmecc_corr_cap ||
-			sector_size != host->pmecc_sector_size)
-		dev_info(host->dev, "WARNING: Be Caution! Using different PMECC parameters from Nand ONFI ECC reqirement.\n");
-
-	cap = host->pmecc_corr_cap;
-	sector_size = host->pmecc_sector_size;
-	host->pmecc_lookup_table_offset = (sector_size == 512) ?
-			host->pmecc_lookup_table_offset_512 :
-			host->pmecc_lookup_table_offset_1024;
-
-	dev_info(host->dev, "Initialize PMECC params, cap: %d, sector: %d\n",
-		 cap, sector_size);
-
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!regs) {
-		dev_warn(host->dev,
-			"Can't get I/O resource regs for PMECC controller, rolling back on software ECC\n");
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		nand_chip->ecc.algo = NAND_ECC_HAMMING;
-		return 0;
-	}
-
-	host->ecc = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(host->ecc)) {
-		err_no = PTR_ERR(host->ecc);
-		goto err;
-	}
-
-	regs_pmerr = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	host->pmerrloc_base = devm_ioremap_resource(&pdev->dev, regs_pmerr);
-	if (IS_ERR(host->pmerrloc_base)) {
-		err_no = PTR_ERR(host->pmerrloc_base);
-		goto err;
-	}
-	host->pmerrloc_el_base = host->pmerrloc_base + ATMEL_PMERRLOC_SIGMAx +
-		(host->caps->pmecc_max_correction + 1) * 4;
-
-	if (!host->has_no_lookup_table) {
-		regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
-		host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev,
-								regs_rom);
-		if (IS_ERR(host->pmecc_rom_base)) {
-			dev_err(host->dev, "Can not get I/O resource for ROM, will build a lookup table in runtime!\n");
-			host->has_no_lookup_table = true;
-		}
-	}
-
-	if (host->has_no_lookup_table) {
-		/* Build the look-up table in runtime */
-		galois_table = create_lookup_table(host->dev, sector_size);
-		if (!galois_table) {
-			dev_err(host->dev, "Failed to build a lookup table in runtime!\n");
-			err_no = -EINVAL;
-			goto err;
-		}
-
-		host->pmecc_rom_base = (void __iomem *)galois_table;
-		host->pmecc_lookup_table_offset = 0;
-	}
-
-	nand_chip->ecc.size = sector_size;
-
-	/* set ECC page size and oob layout */
-	switch (mtd->writesize) {
-	case 512:
-	case 1024:
-	case 2048:
-	case 4096:
-	case 8192:
-		if (sector_size > mtd->writesize) {
-			dev_err(host->dev, "pmecc sector size is bigger than the page size!\n");
-			err_no = -EINVAL;
-			goto err;
-		}
-
-		host->pmecc_degree = (sector_size == 512) ?
-			PMECC_GF_DIMENSION_13 : PMECC_GF_DIMENSION_14;
-		host->pmecc_cw_len = (1 << host->pmecc_degree) - 1;
-		host->pmecc_alpha_to = pmecc_get_alpha_to(host);
-		host->pmecc_index_of = host->pmecc_rom_base +
-			host->pmecc_lookup_table_offset;
-
-		nand_chip->ecc.strength = cap;
-		nand_chip->ecc.bytes = pmecc_get_ecc_bytes(cap, sector_size);
-		nand_chip->ecc.steps = mtd->writesize / sector_size;
-		nand_chip->ecc.total = nand_chip->ecc.bytes *
-			nand_chip->ecc.steps;
-		if (nand_chip->ecc.total >
-				mtd->oobsize - PMECC_OOB_RESERVED_BYTES) {
-			dev_err(host->dev, "No room for ECC bytes\n");
-			err_no = -EINVAL;
-			goto err;
-		}
-
-		mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
-		break;
-	default:
-		dev_warn(host->dev,
-			"Unsupported page size for PMECC, use Software ECC\n");
-		/* page size not handled by HW ECC */
-		/* switching back to soft ECC */
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		nand_chip->ecc.algo = NAND_ECC_HAMMING;
-		return 0;
-	}
-
-	/* Allocate data for PMECC computation */
-	err_no = pmecc_data_alloc(host);
-	if (err_no) {
-		dev_err(host->dev,
-				"Cannot allocate memory for PMECC computation!\n");
-		goto err;
-	}
-
-	nand_chip->options |= NAND_NO_SUBPAGE_WRITE;
-	nand_chip->ecc.read_page = atmel_nand_pmecc_read_page;
-	nand_chip->ecc.write_page = atmel_nand_pmecc_write_page;
-
-	atmel_pmecc_core_init(mtd);
-
-	return 0;
-
-err:
-	return err_no;
-}
-
-/*
- * Calculate HW ECC
- *
- * function called after a write
- *
- * mtd:        MTD block structure
- * dat:        raw data (unused)
- * ecc_code:   buffer for ECC
- */
-static int atmel_nand_calculate(struct mtd_info *mtd,
-		const u_char *dat, unsigned char *ecc_code)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	unsigned int ecc_value;
-
-	/* get the first 2 ECC bytes */
-	ecc_value = ecc_readl(host->ecc, PR);
-
-	ecc_code[0] = ecc_value & 0xFF;
-	ecc_code[1] = (ecc_value >> 8) & 0xFF;
-
-	/* get the last 2 ECC bytes */
-	ecc_value = ecc_readl(host->ecc, NPR) & ATMEL_ECC_NPARITY;
-
-	ecc_code[2] = ecc_value & 0xFF;
-	ecc_code[3] = (ecc_value >> 8) & 0xFF;
-
-	return 0;
-}
-
-/*
- * HW ECC read page function
- *
- * mtd:        mtd info structure
- * chip:       nand chip info structure
- * buf:        buffer to store read data
- * oob_required:    caller expects OOB data read to chip->oob_poi
- */
-static int atmel_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
-{
-	int eccsize = chip->ecc.size;
-	int eccbytes = chip->ecc.bytes;
-	uint8_t *p = buf;
-	uint8_t *oob = chip->oob_poi;
-	uint8_t *ecc_pos;
-	int stat;
-	unsigned int max_bitflips = 0;
-	struct mtd_oob_region oobregion = {};
-
-	/*
-	 * Errata: ALE is incorrectly wired up to the ECC controller
-	 * on the AP7000, so it will include the address cycles in the
-	 * ECC calculation.
-	 *
-	 * Workaround: Reset the parity registers before reading the
-	 * actual data.
-	 */
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	if (host->board.need_reset_workaround)
-		ecc_writel(host->ecc, CR, ATMEL_ECC_RST);
-
-	/* read the page */
-	chip->read_buf(mtd, p, eccsize);
-
-	/* move to ECC position if needed */
-	mtd_ooblayout_ecc(mtd, 0, &oobregion);
-	if (oobregion.offset != 0) {
-		/*
-		 * This only works on large pages because the ECC controller
-		 * waits for NAND_CMD_RNDOUTSTART after the NAND_CMD_RNDOUT.
-		 * Anyway, for small pages, the first ECC byte is at offset
-		 * 0 in the OOB area.
-		 */
-		chip->cmdfunc(mtd, NAND_CMD_RNDOUT,
-			      mtd->writesize + oobregion.offset, -1);
-	}
-
-	/* the ECC controller needs to read the ECC just after the data */
-	ecc_pos = oob + oobregion.offset;
-	chip->read_buf(mtd, ecc_pos, eccbytes);
-
-	/* check if there's an error */
-	stat = chip->ecc.correct(mtd, p, oob, NULL);
-
-	if (stat < 0) {
-		mtd->ecc_stats.failed++;
-	} else {
-		mtd->ecc_stats.corrected += stat;
-		max_bitflips = max_t(unsigned int, max_bitflips, stat);
-	}
-
-	/* get back to oob start (end of page) */
-	chip->cmdfunc(mtd, NAND_CMD_RNDOUT, mtd->writesize, -1);
-
-	/* read the oob */
-	chip->read_buf(mtd, oob, mtd->oobsize);
-
-	return max_bitflips;
-}
-
-/*
- * HW ECC Correction
- *
- * function called after a read
- *
- * mtd:        MTD block structure
- * dat:        raw data read from the chip
- * read_ecc:   ECC from the chip (unused)
- * isnull:     unused
- *
- * Detect and correct a 1 bit error for a page
- */
-static int atmel_nand_correct(struct mtd_info *mtd, u_char *dat,
-		u_char *read_ecc, u_char *isnull)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-	unsigned int ecc_status;
-	unsigned int ecc_word, ecc_bit;
-
-	/* get the status from the Status Register */
-	ecc_status = ecc_readl(host->ecc, SR);
-
-	/* if there's no error */
-	if (likely(!(ecc_status & ATMEL_ECC_RECERR)))
-		return 0;
-
-	/* get error bit offset (4 bits) */
-	ecc_bit = ecc_readl(host->ecc, PR) & ATMEL_ECC_BITADDR;
-	/* get word address (12 bits) */
-	ecc_word = ecc_readl(host->ecc, PR) & ATMEL_ECC_WORDADDR;
-	ecc_word >>= 4;
-
-	/* if there are multiple errors */
-	if (ecc_status & ATMEL_ECC_MULERR) {
-		/* check if it is a freshly erased block
-		 * (filled with 0xff) */
-		if ((ecc_bit == ATMEL_ECC_BITADDR)
-				&& (ecc_word == (ATMEL_ECC_WORDADDR >> 4))) {
-			/* the block has just been erased, return OK */
-			return 0;
-		}
-		/* it doesn't seems to be a freshly
-		 * erased block.
-		 * We can't correct so many errors */
-		dev_dbg(host->dev, "atmel_nand : multiple errors detected."
-				" Unable to correct.\n");
-		return -EBADMSG;
-	}
-
-	/* if there's a single bit error : we can correct it */
-	if (ecc_status & ATMEL_ECC_ECCERR) {
-		/* there's nothing much to do here.
-		 * the bit error is on the ECC itself.
-		 */
-		dev_dbg(host->dev, "atmel_nand : one bit error on ECC code."
-				" Nothing to correct\n");
-		return 0;
-	}
-
-	dev_dbg(host->dev, "atmel_nand : one bit error on data."
-			" (word offset in the page :"
-			" 0x%x bit offset : 0x%x)\n",
-			ecc_word, ecc_bit);
-	/* correct the error */
-	if (nand_chip->options & NAND_BUSWIDTH_16) {
-		/* 16 bits words */
-		((unsigned short *) dat)[ecc_word] ^= (1 << ecc_bit);
-	} else {
-		/* 8 bits words */
-		dat[ecc_word] ^= (1 << ecc_bit);
-	}
-	dev_dbg(host->dev, "atmel_nand : error corrected\n");
-	return 1;
-}
-
-/*
- * Enable HW ECC : unused on most chips
- */
-static void atmel_nand_hwctl(struct mtd_info *mtd, int mode)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	if (host->board.need_reset_workaround)
-		ecc_writel(host->ecc, CR, ATMEL_ECC_RST);
-}
-
-static int atmel_of_init_ecc(struct atmel_nand_host *host,
-			     struct device_node *np)
-{
-	u32 offset[2];
-	u32 val;
-
-	host->has_pmecc = of_property_read_bool(np, "atmel,has-pmecc");
-
-	/* Not using PMECC */
-	if (!(host->nand_chip.ecc.mode == NAND_ECC_HW) || !host->has_pmecc)
-		return 0;
-
-	/* use PMECC, get correction capability, sector size and lookup
-	 * table offset.
-	 * If correction bits and sector size are not specified, then find
-	 * them from NAND ONFI parameters.
-	 */
-	if (of_property_read_u32(np, "atmel,pmecc-cap", &val) == 0) {
-		if (val > host->caps->pmecc_max_correction) {
-			dev_err(host->dev,
-				"Required ECC strength too high: %u max %u\n",
-				val, host->caps->pmecc_max_correction);
-			return -EINVAL;
-		}
-		if ((val != 2)  && (val != 4)  && (val != 8) &&
-		    (val != 12) && (val != 24) && (val != 32)) {
-			dev_err(host->dev,
-				"Required ECC strength not supported: %u\n",
-				val);
-			return -EINVAL;
-		}
-		host->pmecc_corr_cap = (u8)val;
-	}
-
-	if (of_property_read_u32(np, "atmel,pmecc-sector-size", &val) == 0) {
-		if ((val != 512) && (val != 1024)) {
-			dev_err(host->dev,
-				"Required ECC sector size not supported: %u\n",
-				val);
-			return -EINVAL;
-		}
-		host->pmecc_sector_size = (u16)val;
-	}
-
-	if (of_property_read_u32_array(np, "atmel,pmecc-lookup-table-offset",
-			offset, 2) != 0) {
-		dev_err(host->dev, "Cannot get PMECC lookup table offset, will build a lookup table in runtime.\n");
-		host->has_no_lookup_table = true;
-		/* Will build a lookup table and initialize the offset later */
-		return 0;
-	}
-
-	if (!offset[0] && !offset[1]) {
-		dev_err(host->dev, "Invalid PMECC lookup table offset\n");
-		return -EINVAL;
-	}
-
-	host->pmecc_lookup_table_offset_512 = offset[0];
-	host->pmecc_lookup_table_offset_1024 = offset[1];
-
-	return 0;
-}
-
-static int atmel_of_init_port(struct atmel_nand_host *host,
-			      struct device_node *np)
-{
-	u32 val;
-	struct atmel_nand_data *board = &host->board;
-	enum of_gpio_flags flags = 0;
-
-	host->caps = (struct atmel_nand_caps *)
-		of_device_get_match_data(host->dev);
-
-	if (of_property_read_u32(np, "atmel,nand-addr-offset", &val) == 0) {
-		if (val >= 32) {
-			dev_err(host->dev, "invalid addr-offset %u\n", val);
-			return -EINVAL;
-		}
-		board->ale = val;
-	}
-
-	if (of_property_read_u32(np, "atmel,nand-cmd-offset", &val) == 0) {
-		if (val >= 32) {
-			dev_err(host->dev, "invalid cmd-offset %u\n", val);
-			return -EINVAL;
-		}
-		board->cle = val;
-	}
-
-	board->has_dma = of_property_read_bool(np, "atmel,nand-has-dma");
-
-	board->rdy_pin = of_get_gpio_flags(np, 0, &flags);
-	board->rdy_pin_active_low = (flags == OF_GPIO_ACTIVE_LOW);
-
-	board->enable_pin = of_get_gpio(np, 1);
-	board->det_pin = of_get_gpio(np, 2);
-
-	/* load the nfc driver if there is */
-	of_platform_populate(np, NULL, NULL, host->dev);
-
-	/*
-	 * Initialize ECC mode to NAND_ECC_SOFT so that we have a correct value
-	 * even if the nand-ecc-mode property is not defined.
-	 */
-	host->nand_chip.ecc.mode = NAND_ECC_SOFT;
-	host->nand_chip.ecc.algo = NAND_ECC_HAMMING;
-
-	return 0;
-}
-
-static int atmel_hw_nand_init_params(struct platform_device *pdev,
-					 struct atmel_nand_host *host)
-{
-	struct nand_chip *nand_chip = &host->nand_chip;
-	struct mtd_info *mtd = nand_to_mtd(nand_chip);
-	struct resource		*regs;
-
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!regs) {
-		dev_err(host->dev,
-			"Can't get I/O resource regs, use software ECC\n");
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		nand_chip->ecc.algo = NAND_ECC_HAMMING;
-		return 0;
-	}
-
-	host->ecc = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(host->ecc))
-		return PTR_ERR(host->ecc);
-
-	/* ECC is calculated for the whole page (1 step) */
-	nand_chip->ecc.size = mtd->writesize;
-
-	/* set ECC page size and oob layout */
-	switch (mtd->writesize) {
-	case 512:
-		mtd_set_ooblayout(mtd, &atmel_ooblayout_sp_ops);
-		ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_528);
-		break;
-	case 1024:
-		mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
-		ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_1056);
-		break;
-	case 2048:
-		mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
-		ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_2112);
-		break;
-	case 4096:
-		mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
-		ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_4224);
-		break;
-	default:
-		/* page size not handled by HW ECC */
-		/* switching back to soft ECC */
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		nand_chip->ecc.algo = NAND_ECC_HAMMING;
-		return 0;
-	}
-
-	/* set up for HW ECC */
-	nand_chip->ecc.calculate = atmel_nand_calculate;
-	nand_chip->ecc.correct = atmel_nand_correct;
-	nand_chip->ecc.hwctl = atmel_nand_hwctl;
-	nand_chip->ecc.read_page = atmel_nand_read_page;
-	nand_chip->ecc.bytes = 4;
-	nand_chip->ecc.strength = 1;
-
-	return 0;
-}
-
-static inline u32 nfc_read_status(struct atmel_nand_host *host)
-{
-	u32 err_flags = NFC_SR_DTOE | NFC_SR_UNDEF | NFC_SR_AWB | NFC_SR_ASE;
-	u32 nfc_status = nfc_readl(host->nfc->hsmc_regs, SR);
-
-	if (unlikely(nfc_status & err_flags)) {
-		if (nfc_status & NFC_SR_DTOE)
-			dev_err(host->dev, "NFC: Waiting Nand R/B Timeout Error\n");
-		else if (nfc_status & NFC_SR_UNDEF)
-			dev_err(host->dev, "NFC: Access Undefined Area Error\n");
-		else if (nfc_status & NFC_SR_AWB)
-			dev_err(host->dev, "NFC: Access memory While NFC is busy\n");
-		else if (nfc_status & NFC_SR_ASE)
-			dev_err(host->dev, "NFC: Access memory Size Error\n");
-	}
-
-	return nfc_status;
-}
-
-/* SMC interrupt service routine */
-static irqreturn_t hsmc_interrupt(int irq, void *dev_id)
-{
-	struct atmel_nand_host *host = dev_id;
-	u32 status, mask, pending;
-	irqreturn_t ret = IRQ_NONE;
-
-	status = nfc_read_status(host);
-	mask = nfc_readl(host->nfc->hsmc_regs, IMR);
-	pending = status & mask;
-
-	if (pending & NFC_SR_XFR_DONE) {
-		complete(&host->nfc->comp_xfer_done);
-		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_XFR_DONE);
-		ret = IRQ_HANDLED;
-	}
-	if (pending & NFC_SR_RB_EDGE) {
-		complete(&host->nfc->comp_ready);
-		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_RB_EDGE);
-		ret = IRQ_HANDLED;
-	}
-	if (pending & NFC_SR_CMD_DONE) {
-		complete(&host->nfc->comp_cmd_done);
-		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_CMD_DONE);
-		ret = IRQ_HANDLED;
-	}
-
-	return ret;
-}
-
-/* NFC(Nand Flash Controller) related functions */
-static void nfc_prepare_interrupt(struct atmel_nand_host *host, u32 flag)
-{
-	if (flag & NFC_SR_XFR_DONE)
-		init_completion(&host->nfc->comp_xfer_done);
-
-	if (flag & NFC_SR_RB_EDGE)
-		init_completion(&host->nfc->comp_ready);
-
-	if (flag & NFC_SR_CMD_DONE)
-		init_completion(&host->nfc->comp_cmd_done);
-
-	/* Enable interrupt that need to wait for */
-	nfc_writel(host->nfc->hsmc_regs, IER, flag);
-}
-
-static int nfc_wait_interrupt(struct atmel_nand_host *host, u32 flag)
-{
-	int i, index = 0;
-	struct completion *comp[3];	/* Support 3 interrupt completion */
-
-	if (flag & NFC_SR_XFR_DONE)
-		comp[index++] = &host->nfc->comp_xfer_done;
-
-	if (flag & NFC_SR_RB_EDGE)
-		comp[index++] = &host->nfc->comp_ready;
-
-	if (flag & NFC_SR_CMD_DONE)
-		comp[index++] = &host->nfc->comp_cmd_done;
-
-	if (index == 0) {
-		dev_err(host->dev, "Unknown interrupt flag: 0x%08x\n", flag);
-		return -EINVAL;
-	}
-
-	for (i = 0; i < index; i++) {
-		if (wait_for_completion_timeout(comp[i],
-				msecs_to_jiffies(NFC_TIME_OUT_MS)))
-			continue;	/* wait for next completion */
-		else
-			goto err_timeout;
-	}
-
-	return 0;
-
-err_timeout:
-	dev_err(host->dev, "Time out to wait for interrupt: 0x%08x\n", flag);
-	/* Disable the interrupt as it is not handled by interrupt handler */
-	nfc_writel(host->nfc->hsmc_regs, IDR, flag);
-	return -ETIMEDOUT;
-}
-
-static int nfc_send_command(struct atmel_nand_host *host,
-	unsigned int cmd, unsigned int addr, unsigned char cycle0)
-{
-	unsigned long timeout;
-	u32 flag = NFC_SR_CMD_DONE;
-	flag |= cmd & NFCADDR_CMD_DATAEN ? NFC_SR_XFR_DONE : 0;
-
-	dev_dbg(host->dev,
-		"nfc_cmd: 0x%08x, addr1234: 0x%08x, cycle0: 0x%02x\n",
-		cmd, addr, cycle0);
-
-	timeout = jiffies + msecs_to_jiffies(NFC_TIME_OUT_MS);
-	while (nfc_readl(host->nfc->hsmc_regs, SR) & NFC_SR_BUSY) {
-		if (time_after(jiffies, timeout)) {
-			dev_err(host->dev,
-				"Time out to wait for NFC ready!\n");
-			return -ETIMEDOUT;
-		}
-	}
-
-	nfc_prepare_interrupt(host, flag);
-	nfc_writel(host->nfc->hsmc_regs, CYCLE0, cycle0);
-	nfc_cmd_addr1234_writel(cmd, addr, host->nfc->base_cmd_regs);
-	return nfc_wait_interrupt(host, flag);
-}
-
-static int nfc_device_ready(struct mtd_info *mtd)
-{
-	u32 status, mask;
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	status = nfc_read_status(host);
-	mask = nfc_readl(host->nfc->hsmc_regs, IMR);
-
-	/* The mask should be 0. If not we may lost interrupts */
-	if (unlikely(mask & status))
-		dev_err(host->dev, "Lost the interrupt flags: 0x%08x\n",
-				mask & status);
-
-	return status & NFC_SR_RB_EDGE;
-}
-
-static void nfc_select_chip(struct mtd_info *mtd, int chip)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(nand_chip);
-
-	if (chip == -1)
-		nfc_writel(host->nfc->hsmc_regs, CTRL, NFC_CTRL_DISABLE);
-	else
-		nfc_writel(host->nfc->hsmc_regs, CTRL, NFC_CTRL_ENABLE);
-}
-
-static int nfc_make_addr(struct mtd_info *mtd, int command, int column,
-		int page_addr, unsigned int *addr1234, unsigned int *cycle0)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
-	int acycle = 0;
-	unsigned char addr_bytes[8];
-	int index = 0, bit_shift;
-
-	BUG_ON(addr1234 == NULL || cycle0 == NULL);
-
-	*cycle0 = 0;
-	*addr1234 = 0;
-
-	if (column != -1) {
-		if (chip->options & NAND_BUSWIDTH_16 &&
-				!nand_opcode_8bits(command))
-			column >>= 1;
-		addr_bytes[acycle++] = column & 0xff;
-		if (mtd->writesize > 512)
-			addr_bytes[acycle++] = (column >> 8) & 0xff;
-	}
-
-	if (page_addr != -1) {
-		addr_bytes[acycle++] = page_addr & 0xff;
-		addr_bytes[acycle++] = (page_addr >> 8) & 0xff;
-		if (chip->chipsize > (128 << 20))
-			addr_bytes[acycle++] = (page_addr >> 16) & 0xff;
-	}
-
-	if (acycle > 4)
-		*cycle0 = addr_bytes[index++];
-
-	for (bit_shift = 0; index < acycle; bit_shift += 8)
-		*addr1234 += addr_bytes[index++] << bit_shift;
-
-	/* return acycle in cmd register */
-	return acycle << NFCADDR_CMD_ACYCLE_BIT_POS;
-}
-
-static void nfc_nand_command(struct mtd_info *mtd, unsigned int command,
-				int column, int page_addr)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	unsigned long timeout;
-	unsigned int nfc_addr_cmd = 0;
-
-	unsigned int cmd1 = command << NFCADDR_CMD_CMD1_BIT_POS;
-
-	/* Set default settings: no cmd2, no addr cycle. read from nand */
-	unsigned int cmd2 = 0;
-	unsigned int vcmd2 = 0;
-	int acycle = NFCADDR_CMD_ACYCLE_NONE;
-	int csid = NFCADDR_CMD_CSID_3;
-	int dataen = NFCADDR_CMD_DATADIS;
-	int nfcwr = NFCADDR_CMD_NFCRD;
-	unsigned int addr1234 = 0;
-	unsigned int cycle0 = 0;
-	bool do_addr = true;
-	host->nfc->data_in_sram = NULL;
-
-	dev_dbg(host->dev, "%s: cmd = 0x%02x, col = 0x%08x, page = 0x%08x\n",
-	     __func__, command, column, page_addr);
-
-	switch (command) {
-	case NAND_CMD_RESET:
-		nfc_addr_cmd = cmd1 | acycle | csid | dataen | nfcwr;
-		nfc_send_command(host, nfc_addr_cmd, addr1234, cycle0);
-		udelay(chip->chip_delay);
-
-		nfc_nand_command(mtd, NAND_CMD_STATUS, -1, -1);
-		timeout = jiffies + msecs_to_jiffies(NFC_TIME_OUT_MS);
-		while (!(chip->read_byte(mtd) & NAND_STATUS_READY)) {
-			if (time_after(jiffies, timeout)) {
-				dev_err(host->dev,
-					"Time out to wait status ready!\n");
-				break;
-			}
-		}
-		return;
-	case NAND_CMD_STATUS:
-		do_addr = false;
-		break;
-	case NAND_CMD_PARAM:
-	case NAND_CMD_READID:
-		do_addr = false;
-		acycle = NFCADDR_CMD_ACYCLE_1;
-		if (column != -1)
-			addr1234 = column;
-		break;
-	case NAND_CMD_RNDOUT:
-		cmd2 = NAND_CMD_RNDOUTSTART << NFCADDR_CMD_CMD2_BIT_POS;
-		vcmd2 = NFCADDR_CMD_VCMD2;
-		break;
-	case NAND_CMD_READ0:
-	case NAND_CMD_READOOB:
-		if (command == NAND_CMD_READOOB) {
-			column += mtd->writesize;
-			command = NAND_CMD_READ0; /* only READ0 is valid */
-			cmd1 = command << NFCADDR_CMD_CMD1_BIT_POS;
-		}
-		if (host->nfc->use_nfc_sram) {
-			/* Enable Data transfer to sram */
-			dataen = NFCADDR_CMD_DATAEN;
-
-			/* Need enable PMECC now, since NFC will transfer
-			 * data in bus after sending nfc read command.
-			 */
-			if (chip->ecc.mode == NAND_ECC_HW && host->has_pmecc)
-				pmecc_enable(host, NAND_ECC_READ);
-		}
-
-		cmd2 = NAND_CMD_READSTART << NFCADDR_CMD_CMD2_BIT_POS;
-		vcmd2 = NFCADDR_CMD_VCMD2;
-		break;
-	/* For prgramming command, the cmd need set to write enable */
-	case NAND_CMD_PAGEPROG:
-	case NAND_CMD_SEQIN:
-	case NAND_CMD_RNDIN:
-		nfcwr = NFCADDR_CMD_NFCWR;
-		if (host->nfc->will_write_sram && command == NAND_CMD_SEQIN)
-			dataen = NFCADDR_CMD_DATAEN;
-		break;
-	default:
-		break;
-	}
-
-	if (do_addr)
-		acycle = nfc_make_addr(mtd, command, column, page_addr,
-				&addr1234, &cycle0);
-
-	nfc_addr_cmd = cmd1 | cmd2 | vcmd2 | acycle | csid | dataen | nfcwr;
-	nfc_send_command(host, nfc_addr_cmd, addr1234, cycle0);
-
-	/*
-	 * Program and erase have their own busy handlers status, sequential
-	 * in, and deplete1 need no delay.
-	 */
-	switch (command) {
-	case NAND_CMD_CACHEDPROG:
-	case NAND_CMD_PAGEPROG:
-	case NAND_CMD_ERASE1:
-	case NAND_CMD_ERASE2:
-	case NAND_CMD_RNDIN:
-	case NAND_CMD_STATUS:
-	case NAND_CMD_RNDOUT:
-	case NAND_CMD_SEQIN:
-	case NAND_CMD_READID:
-		return;
-
-	case NAND_CMD_READ0:
-		if (dataen == NFCADDR_CMD_DATAEN) {
-			host->nfc->data_in_sram = host->nfc->sram_bank0 +
-				nfc_get_sram_off(host);
-			return;
-		}
-		/* fall through */
-	default:
-		nfc_prepare_interrupt(host, NFC_SR_RB_EDGE);
-		nfc_wait_interrupt(host, NFC_SR_RB_EDGE);
-	}
-}
-
-static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			uint32_t offset, int data_len, const uint8_t *buf,
-			int oob_required, int page, int cached, int raw)
-{
-	int cfg, len;
-	int status = 0;
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	void *sram = host->nfc->sram_bank0 + nfc_get_sram_off(host);
-
-	/* Subpage write is not supported */
-	if (offset || (data_len < mtd->writesize))
-		return -EINVAL;
-
-	len = mtd->writesize;
-	/* Copy page data to sram that will write to nand via NFC */
-	if (use_dma) {
-		if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) != 0)
-			/* Fall back to use cpu copy */
-			memcpy(sram, buf, len);
-	} else {
-		memcpy(sram, buf, len);
-	}
-
-	cfg = nfc_readl(host->nfc->hsmc_regs, CFG);
-	if (unlikely(raw) && oob_required) {
-		memcpy(sram + len, chip->oob_poi, mtd->oobsize);
-		len += mtd->oobsize;
-		nfc_writel(host->nfc->hsmc_regs, CFG, cfg | NFC_CFG_WSPARE);
-	} else {
-		nfc_writel(host->nfc->hsmc_regs, CFG, cfg & ~NFC_CFG_WSPARE);
-	}
-
-	if (chip->ecc.mode == NAND_ECC_HW && host->has_pmecc)
-		/*
-		 * When use NFC sram, need set up PMECC before send
-		 * NAND_CMD_SEQIN command. Since when the nand command
-		 * is sent, nfc will do transfer from sram and nand.
-		 */
-		pmecc_enable(host, NAND_ECC_WRITE);
-
-	host->nfc->will_write_sram = true;
-	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
-	host->nfc->will_write_sram = false;
-
-	if (likely(!raw))
-		/* Need to write ecc into oob */
-		status = chip->ecc.write_page(mtd, chip, buf, oob_required,
-					      page);
-
-	if (status < 0)
-		return status;
-
-	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
-	status = chip->waitfunc(mtd, chip);
-
-	if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-		status = chip->errstat(mtd, chip, FL_WRITING, status, page);
-
-	if (status & NAND_STATUS_FAIL)
-		return -EIO;
-
-	return 0;
-}
-
-static int nfc_sram_init(struct mtd_info *mtd)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct atmel_nand_host *host = nand_get_controller_data(chip);
-	int res = 0;
-
-	/* Initialize the NFC CFG register */
-	unsigned int cfg_nfc = 0;
-
-	/* set page size and oob layout */
-	switch (mtd->writesize) {
-	case 512:
-		cfg_nfc = NFC_CFG_PAGESIZE_512;
-		break;
-	case 1024:
-		cfg_nfc = NFC_CFG_PAGESIZE_1024;
-		break;
-	case 2048:
-		cfg_nfc = NFC_CFG_PAGESIZE_2048;
-		break;
-	case 4096:
-		cfg_nfc = NFC_CFG_PAGESIZE_4096;
-		break;
-	case 8192:
-		cfg_nfc = NFC_CFG_PAGESIZE_8192;
-		break;
-	default:
-		dev_err(host->dev, "Unsupported page size for NFC.\n");
-		res = -ENXIO;
-		return res;
-	}
-
-	/* oob bytes size = (NFCSPARESIZE + 1) * 4
-	 * Max support spare size is 512 bytes. */
-	cfg_nfc |= (((mtd->oobsize / 4) - 1) << NFC_CFG_NFC_SPARESIZE_BIT_POS
-		& NFC_CFG_NFC_SPARESIZE);
-	/* default set a max timeout */
-	cfg_nfc |= NFC_CFG_RSPARE |
-			NFC_CFG_NFC_DTOCYC | NFC_CFG_NFC_DTOMUL;
-
-	nfc_writel(host->nfc->hsmc_regs, CFG, cfg_nfc);
-
-	host->nfc->will_write_sram = false;
-	nfc_set_sram_bank(host, 0);
-
-	/* Use Write page with NFC SRAM only for PMECC or ECC NONE. */
-	if (host->nfc->write_by_sram) {
-		if ((chip->ecc.mode == NAND_ECC_HW && host->has_pmecc) ||
-				chip->ecc.mode == NAND_ECC_NONE)
-			chip->write_page = nfc_sram_write_page;
-		else
-			host->nfc->write_by_sram = false;
-	}
-
-	dev_info(host->dev, "Using NFC Sram read %s\n",
-			host->nfc->write_by_sram ? "and write" : "");
-	return 0;
-}
-
-static struct platform_driver atmel_nand_nfc_driver;
-/*
- * Probe for the NAND device.
- */
-static int atmel_nand_probe(struct platform_device *pdev)
-{
-	struct atmel_nand_host *host;
-	struct mtd_info *mtd;
-	struct nand_chip *nand_chip;
-	struct resource *mem;
-	int res, irq;
-
-	/* Allocate memory for the device structure (and zero it) */
-	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
-	if (!host)
-		return -ENOMEM;
-
-	res = platform_driver_register(&atmel_nand_nfc_driver);
-	if (res)
-		dev_err(&pdev->dev, "atmel_nand: can't register NFC driver\n");
-
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	host->io_base = devm_ioremap_resource(&pdev->dev, mem);
-	if (IS_ERR(host->io_base)) {
-		res = PTR_ERR(host->io_base);
-		goto err_nand_ioremap;
-	}
-	host->io_phys = (dma_addr_t)mem->start;
-
-	nand_chip = &host->nand_chip;
-	mtd = nand_to_mtd(nand_chip);
-	host->dev = &pdev->dev;
-	if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) {
-		nand_set_flash_node(nand_chip, pdev->dev.of_node);
-		/* Only when CONFIG_OF is enabled of_node can be parsed */
-		res = atmel_of_init_port(host, pdev->dev.of_node);
-		if (res)
-			goto err_nand_ioremap;
-	} else {
-		memcpy(&host->board, dev_get_platdata(&pdev->dev),
-		       sizeof(struct atmel_nand_data));
-		nand_chip->ecc.mode = host->board.ecc_mode;
-
-		/*
-		 * When using software ECC every supported avr32 board means
-		 * Hamming algorithm. If that ever changes we'll need to add
-		 * ecc_algo field to the struct atmel_nand_data.
-		 */
-		if (nand_chip->ecc.mode == NAND_ECC_SOFT)
-			nand_chip->ecc.algo = NAND_ECC_HAMMING;
-
-		/* 16-bit bus width */
-		if (host->board.bus_width_16)
-			nand_chip->options |= NAND_BUSWIDTH_16;
-	}
-
-	 /* link the private data structures */
-	nand_set_controller_data(nand_chip, host);
-	mtd->dev.parent = &pdev->dev;
-
-	/* Set address of NAND IO lines */
-	nand_chip->IO_ADDR_R = host->io_base;
-	nand_chip->IO_ADDR_W = host->io_base;
-
-	if (nand_nfc.is_initialized) {
-		/* NFC driver is probed and initialized */
-		host->nfc = &nand_nfc;
-
-		nand_chip->select_chip = nfc_select_chip;
-		nand_chip->dev_ready = nfc_device_ready;
-		nand_chip->cmdfunc = nfc_nand_command;
-
-		/* Initialize the interrupt for NFC */
-		irq = platform_get_irq(pdev, 0);
-		if (irq < 0) {
-			dev_err(host->dev, "Cannot get HSMC irq!\n");
-			res = irq;
-			goto err_nand_ioremap;
-		}
-
-		res = devm_request_irq(&pdev->dev, irq, hsmc_interrupt,
-				0, "hsmc", host);
-		if (res) {
-			dev_err(&pdev->dev, "Unable to request HSMC irq %d\n",
-				irq);
-			goto err_nand_ioremap;
-		}
-	} else {
-		res = atmel_nand_set_enable_ready_pins(mtd);
-		if (res)
-			goto err_nand_ioremap;
-
-		nand_chip->cmd_ctrl = atmel_nand_cmd_ctrl;
-	}
-
-	nand_chip->chip_delay = 40;		/* 40us command delay time */
-
-
-	nand_chip->read_buf = atmel_read_buf;
-	nand_chip->write_buf = atmel_write_buf;
-
-	platform_set_drvdata(pdev, host);
-	atmel_nand_enable(host);
-
-	if (gpio_is_valid(host->board.det_pin)) {
-		res = devm_gpio_request(&pdev->dev,
-				host->board.det_pin, "nand_det");
-		if (res < 0) {
-			dev_err(&pdev->dev,
-				"can't request det gpio %d\n",
-				host->board.det_pin);
-			goto err_no_card;
-		}
-
-		res = gpio_direction_input(host->board.det_pin);
-		if (res < 0) {
-			dev_err(&pdev->dev,
-				"can't request input direction det gpio %d\n",
-				host->board.det_pin);
-			goto err_no_card;
-		}
-
-		if (gpio_get_value(host->board.det_pin)) {
-			dev_info(&pdev->dev, "No SmartMedia card inserted.\n");
-			res = -ENXIO;
-			goto err_no_card;
-		}
-	}
-
-	if (!host->board.has_dma)
-		use_dma = 0;
-
-	if (use_dma) {
-		dma_cap_mask_t mask;
-
-		dma_cap_zero(mask);
-		dma_cap_set(DMA_MEMCPY, mask);
-		host->dma_chan = dma_request_channel(mask, NULL, NULL);
-		if (!host->dma_chan) {
-			dev_err(host->dev, "Failed to request DMA channel\n");
-			use_dma = 0;
-		}
-	}
-	if (use_dma)
-		dev_info(host->dev, "Using %s for DMA transfers.\n",
-					dma_chan_name(host->dma_chan));
-	else
-		dev_info(host->dev, "No DMA support for NAND access.\n");
-
-	/* first scan to find the device and get the page size */
-	res = nand_scan_ident(mtd, 1, NULL);
-	if (res)
-		goto err_scan_ident;
-
-	if (host->board.on_flash_bbt || on_flash_bbt)
-		nand_chip->bbt_options |= NAND_BBT_USE_FLASH;
-
-	if (nand_chip->bbt_options & NAND_BBT_USE_FLASH)
-		dev_info(&pdev->dev, "Use On Flash BBT\n");
-
-	if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) {
-		res = atmel_of_init_ecc(host, pdev->dev.of_node);
-		if (res)
-			goto err_hw_ecc;
-	}
-
-	if (nand_chip->ecc.mode == NAND_ECC_HW) {
-		if (host->has_pmecc)
-			res = atmel_pmecc_nand_init_params(pdev, host);
-		else
-			res = atmel_hw_nand_init_params(pdev, host);
-
-		if (res != 0)
-			goto err_hw_ecc;
-	}
-
-	/* initialize the nfc configuration register */
-	if (host->nfc && host->nfc->use_nfc_sram) {
-		res = nfc_sram_init(mtd);
-		if (res) {
-			host->nfc->use_nfc_sram = false;
-			dev_err(host->dev, "Disable use nfc sram for data transfer.\n");
-		}
-	}
-
-	/* second phase scan */
-	res = nand_scan_tail(mtd);
-	if (res)
-		goto err_scan_tail;
-
-	mtd->name = "atmel_nand";
-	res = mtd_device_register(mtd, host->board.parts,
-				  host->board.num_parts);
-	if (!res)
-		return res;
-
-err_scan_tail:
-	if (host->has_pmecc && host->nand_chip.ecc.mode == NAND_ECC_HW)
-		pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-err_hw_ecc:
-err_scan_ident:
-err_no_card:
-	atmel_nand_disable(host);
-	if (host->dma_chan)
-		dma_release_channel(host->dma_chan);
-err_nand_ioremap:
-	return res;
-}
-
-/*
- * Remove a NAND device.
- */
-static int atmel_nand_remove(struct platform_device *pdev)
-{
-	struct atmel_nand_host *host = platform_get_drvdata(pdev);
-	struct mtd_info *mtd = nand_to_mtd(&host->nand_chip);
-
-	nand_release(mtd);
-
-	atmel_nand_disable(host);
-
-	if (host->has_pmecc && host->nand_chip.ecc.mode == NAND_ECC_HW) {
-		pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-		pmerrloc_writel(host->pmerrloc_base, ELDIS,
-				PMERRLOC_DISABLE);
-	}
-
-	if (host->dma_chan)
-		dma_release_channel(host->dma_chan);
-
-	platform_driver_unregister(&atmel_nand_nfc_driver);
-
-	return 0;
-}
-
-/*
- * AT91RM9200 does not have PMECC or PMECC Errloc peripherals for
- * BCH ECC. Combined with the "atmel,has-pmecc", it is used to describe
- * devices from the SAM9 family that have those.
- */
-static const struct atmel_nand_caps at91rm9200_caps = {
-	.pmecc_correct_erase_page = false,
-	.pmecc_max_correction = 24,
-};
-
-static const struct atmel_nand_caps sama5d4_caps = {
-	.pmecc_correct_erase_page = true,
-	.pmecc_max_correction = 24,
-};
-
-/*
- * The PMECC Errloc controller starting in SAMA5D2 is not compatible,
- * as the increased correction strength requires more registers.
- */
-static const struct atmel_nand_caps sama5d2_caps = {
-	.pmecc_correct_erase_page = true,
-	.pmecc_max_correction = 32,
-};
-
-static const struct of_device_id atmel_nand_dt_ids[] = {
-	{ .compatible = "atmel,at91rm9200-nand", .data = &at91rm9200_caps },
-	{ .compatible = "atmel,sama5d4-nand", .data = &sama5d4_caps },
-	{ .compatible = "atmel,sama5d2-nand", .data = &sama5d2_caps },
-	{ /* sentinel */ }
-};
-
-MODULE_DEVICE_TABLE(of, atmel_nand_dt_ids);
-
-static int atmel_nand_nfc_probe(struct platform_device *pdev)
-{
-	struct atmel_nfc *nfc = &nand_nfc;
-	struct resource *nfc_cmd_regs, *nfc_hsmc_regs, *nfc_sram;
-	int ret;
-
-	nfc_cmd_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	nfc->base_cmd_regs = devm_ioremap_resource(&pdev->dev, nfc_cmd_regs);
-	if (IS_ERR(nfc->base_cmd_regs))
-		return PTR_ERR(nfc->base_cmd_regs);
-
-	nfc_hsmc_regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	nfc->hsmc_regs = devm_ioremap_resource(&pdev->dev, nfc_hsmc_regs);
-	if (IS_ERR(nfc->hsmc_regs))
-		return PTR_ERR(nfc->hsmc_regs);
-
-	nfc_sram = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	if (nfc_sram) {
-		nfc->sram_bank0 = (void * __force)
-				devm_ioremap_resource(&pdev->dev, nfc_sram);
-		if (IS_ERR(nfc->sram_bank0)) {
-			dev_warn(&pdev->dev, "Fail to ioremap the NFC sram with error: %ld. So disable NFC sram.\n",
-					PTR_ERR(nfc->sram_bank0));
-		} else {
-			nfc->use_nfc_sram = true;
-			nfc->sram_bank0_phys = (dma_addr_t)nfc_sram->start;
-
-			if (pdev->dev.of_node)
-				nfc->write_by_sram = of_property_read_bool(
-						pdev->dev.of_node,
-						"atmel,write-by-sram");
-		}
-	}
-
-	nfc_writel(nfc->hsmc_regs, IDR, 0xffffffff);
-	nfc_readl(nfc->hsmc_regs, SR);	/* clear the NFC_SR */
-
-	nfc->clk = devm_clk_get(&pdev->dev, NULL);
-	if (!IS_ERR(nfc->clk)) {
-		ret = clk_prepare_enable(nfc->clk);
-		if (ret)
-			return ret;
-	} else {
-		dev_warn(&pdev->dev, "NFC clock missing, update your Device Tree");
-	}
-
-	nfc->is_initialized = true;
-	dev_info(&pdev->dev, "NFC is probed.\n");
-
-	return 0;
-}
-
-static int atmel_nand_nfc_remove(struct platform_device *pdev)
-{
-	struct atmel_nfc *nfc = &nand_nfc;
-
-	if (!IS_ERR(nfc->clk))
-		clk_disable_unprepare(nfc->clk);
-
-	return 0;
-}
-
-static const struct of_device_id atmel_nand_nfc_match[] = {
-	{ .compatible = "atmel,sama5d3-nfc" },
-	{ /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, atmel_nand_nfc_match);
-
-static struct platform_driver atmel_nand_nfc_driver = {
-	.driver = {
-		.name = "atmel_nand_nfc",
-		.of_match_table = of_match_ptr(atmel_nand_nfc_match),
-	},
-	.probe = atmel_nand_nfc_probe,
-	.remove = atmel_nand_nfc_remove,
-};
-
-static struct platform_driver atmel_nand_driver = {
-	.probe		= atmel_nand_probe,
-	.remove		= atmel_nand_remove,
-	.driver		= {
-		.name	= "atmel_nand",
-		.of_match_table	= of_match_ptr(atmel_nand_dt_ids),
-	},
-};
-
-module_platform_driver(atmel_nand_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rick Bronson");
-MODULE_DESCRIPTION("NAND/SmartMedia driver for AT91 / AVR32");
-MODULE_ALIAS("platform:atmel_nand");
diff --git a/drivers/mtd/nand/atmel_nand_ecc.h b/drivers/mtd/nand/atmel_nand_ecc.h
deleted file mode 100644
index 834d694487bd..000000000000
--- a/drivers/mtd/nand/atmel_nand_ecc.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Error Corrected Code Controller (ECC) - System peripherals regsters.
- * Based on AT91SAM9260 datasheet revision B.
- *
- * Copyright (C) 2007 Andrew Victor
- * Copyright (C) 2007 - 2012 Atmel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef ATMEL_NAND_ECC_H
-#define ATMEL_NAND_ECC_H
-
-#define ATMEL_ECC_CR		0x00			/* Control register */
-#define		ATMEL_ECC_RST		(1 << 0)		/* Reset parity */
-
-#define ATMEL_ECC_MR		0x04			/* Mode register */
-#define		ATMEL_ECC_PAGESIZE	(3 << 0)		/* Page Size */
-#define			ATMEL_ECC_PAGESIZE_528		(0)
-#define			ATMEL_ECC_PAGESIZE_1056		(1)
-#define			ATMEL_ECC_PAGESIZE_2112		(2)
-#define			ATMEL_ECC_PAGESIZE_4224		(3)
-
-#define ATMEL_ECC_SR		0x08			/* Status register */
-#define		ATMEL_ECC_RECERR		(1 << 0)		/* Recoverable Error */
-#define		ATMEL_ECC_ECCERR		(1 << 1)		/* ECC Single Bit Error */
-#define		ATMEL_ECC_MULERR		(1 << 2)		/* Multiple Errors */
-
-#define ATMEL_ECC_PR		0x0c			/* Parity register */
-#define		ATMEL_ECC_BITADDR	(0xf << 0)		/* Bit Error Address */
-#define		ATMEL_ECC_WORDADDR	(0xfff << 4)		/* Word Error Address */
-
-#define ATMEL_ECC_NPR		0x10			/* NParity register */
-#define		ATMEL_ECC_NPARITY	(0xffff << 0)		/* NParity */
-
-/* PMECC Register Definitions */
-#define ATMEL_PMECC_CFG			0x000	/* Configuration Register */
-#define		PMECC_CFG_BCH_ERR2		(0 << 0)
-#define		PMECC_CFG_BCH_ERR4		(1 << 0)
-#define		PMECC_CFG_BCH_ERR8		(2 << 0)
-#define		PMECC_CFG_BCH_ERR12		(3 << 0)
-#define		PMECC_CFG_BCH_ERR24		(4 << 0)
-#define		PMECC_CFG_BCH_ERR32		(5 << 0)
-
-#define		PMECC_CFG_SECTOR512		(0 << 4)
-#define		PMECC_CFG_SECTOR1024		(1 << 4)
-
-#define		PMECC_CFG_PAGE_1SECTOR		(0 << 8)
-#define		PMECC_CFG_PAGE_2SECTORS		(1 << 8)
-#define		PMECC_CFG_PAGE_4SECTORS		(2 << 8)
-#define		PMECC_CFG_PAGE_8SECTORS		(3 << 8)
-
-#define		PMECC_CFG_READ_OP		(0 << 12)
-#define		PMECC_CFG_WRITE_OP		(1 << 12)
-
-#define		PMECC_CFG_SPARE_ENABLE		(1 << 16)
-#define		PMECC_CFG_SPARE_DISABLE		(0 << 16)
-
-#define		PMECC_CFG_AUTO_ENABLE		(1 << 20)
-#define		PMECC_CFG_AUTO_DISABLE		(0 << 20)
-
-#define ATMEL_PMECC_SAREA		0x004	/* Spare area size */
-#define ATMEL_PMECC_SADDR		0x008	/* PMECC starting address */
-#define ATMEL_PMECC_EADDR		0x00c	/* PMECC ending address */
-#define ATMEL_PMECC_CLK			0x010	/* PMECC clock control */
-#define		PMECC_CLK_133MHZ		(2 << 0)
-
-#define ATMEL_PMECC_CTRL		0x014	/* PMECC control register */
-#define		PMECC_CTRL_RST			(1 << 0)
-#define		PMECC_CTRL_DATA			(1 << 1)
-#define		PMECC_CTRL_USER			(1 << 2)
-#define		PMECC_CTRL_ENABLE		(1 << 4)
-#define		PMECC_CTRL_DISABLE		(1 << 5)
-
-#define ATMEL_PMECC_SR			0x018	/* PMECC status register */
-#define		PMECC_SR_BUSY			(1 << 0)
-#define		PMECC_SR_ENABLE			(1 << 4)
-
-#define ATMEL_PMECC_IER			0x01c	/* PMECC interrupt enable */
-#define		PMECC_IER_ENABLE		(1 << 0)
-#define ATMEL_PMECC_IDR			0x020	/* PMECC interrupt disable */
-#define		PMECC_IER_DISABLE		(1 << 0)
-#define ATMEL_PMECC_IMR			0x024	/* PMECC interrupt mask */
-#define		PMECC_IER_MASK			(1 << 0)
-#define ATMEL_PMECC_ISR			0x028	/* PMECC interrupt status */
-#define ATMEL_PMECC_ECCx		0x040	/* PMECC ECC x */
-#define ATMEL_PMECC_REMx		0x240	/* PMECC REM x */
-
-/* PMERRLOC Register Definitions */
-#define ATMEL_PMERRLOC_ELCFG		0x000	/* Error location config */
-#define		PMERRLOC_ELCFG_SECTOR_512	(0 << 0)
-#define		PMERRLOC_ELCFG_SECTOR_1024	(1 << 0)
-#define		PMERRLOC_ELCFG_NUM_ERRORS(n)	((n) << 16)
-
-#define ATMEL_PMERRLOC_ELPRIM		0x004	/* Error location primitive */
-#define ATMEL_PMERRLOC_ELEN		0x008	/* Error location enable */
-#define ATMEL_PMERRLOC_ELDIS		0x00c	/* Error location disable */
-#define		PMERRLOC_DISABLE		(1 << 0)
-
-#define ATMEL_PMERRLOC_ELSR		0x010	/* Error location status */
-#define		PMERRLOC_ELSR_BUSY		(1 << 0)
-#define ATMEL_PMERRLOC_ELIER		0x014	/* Error location int enable */
-#define ATMEL_PMERRLOC_ELIDR		0x018	/* Error location int disable */
-#define ATMEL_PMERRLOC_ELIMR		0x01c	/* Error location int mask */
-#define ATMEL_PMERRLOC_ELISR		0x020	/* Error location int status */
-#define		PMERRLOC_ERR_NUM_MASK		(0x1f << 8)
-#define		PMERRLOC_CALC_DONE		(1 << 0)
-#define ATMEL_PMERRLOC_SIGMAx		0x028	/* Error location SIGMA x */
-
-/*
- * The ATMEL_PMERRLOC_ELx register location depends from the number of
- * bits corrected by the PMECC controller. Do not use it.
- */
-
-/* Register access macros for PMECC */
-#define pmecc_readl_relaxed(addr, reg) \
-	readl_relaxed((addr) + ATMEL_PMECC_##reg)
-
-#define pmecc_writel(addr, reg, value) \
-	writel((value), (addr) + ATMEL_PMECC_##reg)
-
-#define pmecc_readb_ecc_relaxed(addr, sector, n) \
-	readb_relaxed((addr) + ATMEL_PMECC_ECCx + ((sector) * 0x40) + (n))
-
-#define pmecc_readl_rem_relaxed(addr, sector, n) \
-	readl_relaxed((addr) + ATMEL_PMECC_REMx + ((sector) * 0x40) + ((n) * 4))
-
-#define pmerrloc_readl_relaxed(addr, reg) \
-	readl_relaxed((addr) + ATMEL_PMERRLOC_##reg)
-
-#define pmerrloc_writel(addr, reg, value) \
-	writel((value), (addr) + ATMEL_PMERRLOC_##reg)
-
-#define pmerrloc_writel_sigma_relaxed(addr, n, value) \
-	writel_relaxed((value), (addr) + ATMEL_PMERRLOC_SIGMAx + ((n) * 4))
-
-#define pmerrloc_readl_sigma_relaxed(addr, n) \
-	readl_relaxed((addr) + ATMEL_PMERRLOC_SIGMAx + ((n) * 4))
-
-#define pmerrloc_readl_el_relaxed(addr, n) \
-	readl_relaxed((addr) + ((n) * 4))
-
-/* Galois field dimension */
-#define PMECC_GF_DIMENSION_13			13
-#define PMECC_GF_DIMENSION_14			14
-
-/* Primitive Polynomial used by PMECC */
-#define PMECC_GF_13_PRIMITIVE_POLY		0x201b
-#define PMECC_GF_14_PRIMITIVE_POLY		0x4443
-
-#define PMECC_LOOKUP_TABLE_SIZE_512		0x2000
-#define PMECC_LOOKUP_TABLE_SIZE_1024		0x4000
-
-/* Time out value for reading PMECC status register */
-#define PMECC_MAX_TIMEOUT_MS			100
-
-/* Reserved bytes in oob area */
-#define PMECC_OOB_RESERVED_BYTES		2
-
-#endif
diff --git a/drivers/mtd/nand/atmel_nand_nfc.h b/drivers/mtd/nand/atmel_nand_nfc.h
deleted file mode 100644
index 4d5d26221a7e..000000000000
--- a/drivers/mtd/nand/atmel_nand_nfc.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Atmel Nand Flash Controller (NFC) - System peripherals regsters.
- * Based on SAMA5D3 datasheet.
- *
- * © Copyright 2013 Atmel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef ATMEL_NAND_NFC_H
-#define ATMEL_NAND_NFC_H
-
-/*
- * HSMC NFC registers
- */
-#define ATMEL_HSMC_NFC_CFG	0x00		/* NFC Configuration Register */
-#define		NFC_CFG_PAGESIZE	(7 << 0)
-#define			NFC_CFG_PAGESIZE_512	(0 << 0)
-#define			NFC_CFG_PAGESIZE_1024	(1 << 0)
-#define			NFC_CFG_PAGESIZE_2048	(2 << 0)
-#define			NFC_CFG_PAGESIZE_4096	(3 << 0)
-#define			NFC_CFG_PAGESIZE_8192	(4 << 0)
-#define		NFC_CFG_WSPARE		(1 << 8)
-#define		NFC_CFG_RSPARE		(1 << 9)
-#define		NFC_CFG_NFC_DTOCYC	(0xf << 16)
-#define		NFC_CFG_NFC_DTOMUL	(0x7 << 20)
-#define		NFC_CFG_NFC_SPARESIZE	(0x7f << 24)
-#define		NFC_CFG_NFC_SPARESIZE_BIT_POS	24
-
-#define ATMEL_HSMC_NFC_CTRL	0x04		/* NFC Control Register */
-#define		NFC_CTRL_ENABLE		(1 << 0)
-#define		NFC_CTRL_DISABLE	(1 << 1)
-
-#define ATMEL_HSMC_NFC_SR	0x08		/* NFC Status Register */
-#define		NFC_SR_BUSY		(1 << 8)
-#define		NFC_SR_XFR_DONE		(1 << 16)
-#define		NFC_SR_CMD_DONE		(1 << 17)
-#define		NFC_SR_DTOE		(1 << 20)
-#define		NFC_SR_UNDEF		(1 << 21)
-#define		NFC_SR_AWB		(1 << 22)
-#define		NFC_SR_ASE		(1 << 23)
-#define		NFC_SR_RB_EDGE		(1 << 24)
-
-#define ATMEL_HSMC_NFC_IER	0x0c
-#define ATMEL_HSMC_NFC_IDR	0x10
-#define ATMEL_HSMC_NFC_IMR	0x14
-#define ATMEL_HSMC_NFC_CYCLE0	0x18		/* NFC Address Cycle Zero */
-#define		ATMEL_HSMC_NFC_ADDR_CYCLE0	(0xff)
-
-#define ATMEL_HSMC_NFC_BANK	0x1c		/* NFC Bank Register */
-#define		ATMEL_HSMC_NFC_BANK0		(0 << 0)
-#define		ATMEL_HSMC_NFC_BANK1		(1 << 0)
-
-#define nfc_writel(addr, reg, value) \
-	writel((value), (addr) + ATMEL_HSMC_NFC_##reg)
-
-#define nfc_readl(addr, reg) \
-	readl_relaxed((addr) + ATMEL_HSMC_NFC_##reg)
-
-/*
- * NFC Address Command definitions
- */
-#define NFCADDR_CMD_CMD1	(0xff << 2)	/* Command for Cycle 1 */
-#define NFCADDR_CMD_CMD1_BIT_POS	2
-#define NFCADDR_CMD_CMD2	(0xff << 10)	/* Command for Cycle 2 */
-#define NFCADDR_CMD_CMD2_BIT_POS	10
-#define NFCADDR_CMD_VCMD2	(0x1 << 18)	/* Valid Cycle 2 Command */
-#define NFCADDR_CMD_ACYCLE	(0x7 << 19)	/* Number of Address required */
-#define		NFCADDR_CMD_ACYCLE_NONE		(0x0 << 19)
-#define		NFCADDR_CMD_ACYCLE_1		(0x1 << 19)
-#define		NFCADDR_CMD_ACYCLE_2		(0x2 << 19)
-#define		NFCADDR_CMD_ACYCLE_3		(0x3 << 19)
-#define		NFCADDR_CMD_ACYCLE_4		(0x4 << 19)
-#define		NFCADDR_CMD_ACYCLE_5		(0x5 << 19)
-#define NFCADDR_CMD_ACYCLE_BIT_POS	19
-#define NFCADDR_CMD_CSID	(0x7 << 22)	/* Chip Select Identifier */
-#define		NFCADDR_CMD_CSID_0		(0x0 << 22)
-#define		NFCADDR_CMD_CSID_1		(0x1 << 22)
-#define		NFCADDR_CMD_CSID_2		(0x2 << 22)
-#define		NFCADDR_CMD_CSID_3		(0x3 << 22)
-#define		NFCADDR_CMD_CSID_4		(0x4 << 22)
-#define		NFCADDR_CMD_CSID_5		(0x5 << 22)
-#define		NFCADDR_CMD_CSID_6		(0x6 << 22)
-#define		NFCADDR_CMD_CSID_7		(0x7 << 22)
-#define NFCADDR_CMD_DATAEN	(0x1 << 25)	/* Data Transfer Enable */
-#define NFCADDR_CMD_DATADIS	(0x0 << 25)	/* Data Transfer Disable */
-#define NFCADDR_CMD_NFCRD	(0x0 << 26)	/* NFC Read Enable */
-#define NFCADDR_CMD_NFCWR	(0x1 << 26)	/* NFC Write Enable */
-#define NFCADDR_CMD_NFCBUSY	(0x1 << 27)	/* NFC Busy */
-
-#define nfc_cmd_addr1234_writel(cmd, addr1234, nfc_base) \
-	writel((addr1234), (cmd) + nfc_base)
-
-#define nfc_cmd_readl(bitstatus, nfc_base) \
-	readl_relaxed((bitstatus) + nfc_base)
-
-#define NFC_TIME_OUT_MS		100
-#define	NFC_SRAM_BANK1_OFFSET	0x1200
-
-#endif
-- 
cgit v1.2.3


From 82d0bf3417bd4a8f98b54da72d80c19d3d227800 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 16 Mar 2017 09:02:41 +0100
Subject: mtd: nand: atmel: Document the new DT bindings

The old NAND bindings were not exactly describing the hardware topology
and were preventing definitions of several NAND chips under the same
NAND controller.

New bindings address these limitations and should be preferred over the
old ones for new SoCs/boards.
Old bindings are still supported for backward compatibility but are
marked deprecated in the doc.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Acked-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/mtd/atmel-nand.txt         | 107 ++++++++++++++++++++-
 1 file changed, 106 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
index 3e7ee99d3949..f6bee57e453a 100644
--- a/Documentation/devicetree/bindings/mtd/atmel-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
@@ -1,4 +1,109 @@
-Atmel NAND flash
+Atmel NAND flash controller bindings
+
+The NAND flash controller node should be defined under the EBI bus (see
+Documentation/devicetree/bindings/memory-controllers/atmel,ebi.txt).
+One or several NAND devices can be defined under this NAND controller.
+The NAND controller might be connected to an ECC engine.
+
+* NAND controller bindings:
+
+Required properties:
+- compatible: should be one of the following
+	"atmel,at91rm9200-nand-controller"
+	"atmel,at91sam9260-nand-controller"
+	"atmel,at91sam9261-nand-controller"
+	"atmel,at91sam9g45-nand-controller"
+	"atmel,sama5d3-nand-controller"
+- ranges: empty ranges property to forward EBI ranges definitions.
+- #address-cells: should be set to 2.
+- #size-cells: should be set to 1.
+- atmel,nfc-io: phandle to the NFC IO block. Only required for sama5d3
+		controllers.
+- atmel,nfc-sram: phandle to the NFC SRAM block. Only required for sama5d3
+		  controllers.
+
+Optional properties:
+- ecc-engine: phandle to the PMECC block. Only meaningful if the SoC embeds
+	      a PMECC engine.
+
+* NAND device/chip bindings:
+
+Required properties:
+- reg: describes the CS lines assigned to the NAND device. If the NAND device
+       exposes multiple CS lines (multi-dies chips), your reg property will
+       contain X tuples of 3 entries.
+       1st entry: the CS line this NAND chip is connected to
+       2nd entry: the base offset of the memory region assigned to this
+		  device (always 0)
+       3rd entry: the memory region size (always 0x800000)
+
+Optional properties:
+- rb-gpios: the GPIO(s) used to check the Ready/Busy status of the NAND.
+- cs-gpios: the GPIO(s) used to control the CS line.
+- det-gpios: the GPIO used to detect if a Smartmedia Card is present.
+- atmel,rb: an integer identifying the native Ready/Busy pin. Only meaningful
+	    on sama5 SoCs.
+
+All generic properties described in
+Documentation/devicetree/bindings/mtd/{common,nand}.txt also apply to the NAND
+device node, and NAND partitions should be defined under the NAND node as
+described in Documentation/devicetree/bindings/mtd/partition.txt.
+
+* ECC engine (PMECC) bindings:
+
+Required properties:
+- compatible: should be one of the following
+	"atmel,at91sam9g45-pmecc"
+	"atmel,sama5d4-pmecc"
+	"atmel,sama5d2-pmecc"
+- reg: should contain 2 register ranges. The first one is pointing to the PMECC
+       block, and the second one to the PMECC_ERRLOC block.
+
+Example:
+
+	pmecc: ecc-engine@ffffc070 {
+		compatible = "atmel,at91sam9g45-pmecc";
+                reg = <0xffffc070 0x490>,
+                      <0xffffc500 0x100>;
+	};
+
+	ebi: ebi@10000000 {
+		compatible = "atmel,sama5d3-ebi";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		atmel,smc = <&hsmc>;
+		reg = <0x10000000 0x10000000
+		       0x40000000 0x30000000>;
+		ranges = <0x0 0x0 0x10000000 0x10000000
+			  0x1 0x0 0x40000000 0x10000000
+			  0x2 0x0 0x50000000 0x10000000
+			  0x3 0x0 0x60000000 0x10000000>;
+		clocks = <&mck>;
+
+                nand_controller: nand-controller {
+			compatible = "atmel,sama5d3-nand-controller";
+			atmel,nfc-sram = <&nfc_sram>;
+			atmel,nfc-io = <&nfc_io>;
+			ecc-engine = <&pmecc>;
+			#address-cells = <2>;
+			#size-cells = <1>;
+			ranges;
+
+			nand@3 {
+				reg = <0x3 0x0 0x800000>;
+				atmel,rb = <0>;
+
+				/*
+				 * Put generic NAND/MTD properties and
+				 * subnodes here.
+				 */
+			};
+		};
+	};
+
+-----------------------------------------------------------------------
+
+Deprecated bindings (should not be used in new device trees):
 
 Required properties:
 - compatible: The possible values are:
-- 
cgit v1.2.3


From f107d7a43923a83d837b3ea3c7b7de58cd014bbd Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 16 Mar 2017 09:02:42 +0100
Subject: mtd: nand: Remove unused chip->write_page() hook

The last/only user of the chip->write_page() hook (the Atmel NAND
controller driver) has been reworked and is no longer specifying a custom
->write_page() implementation.
Drop this hook before someone else start abusing it.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 drivers/mtd/nand/nand_base.c | 12 +++++-------
 include/linux/mtd/nand.h     |  4 ----
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index c4c3386e4c0a..36258e69a389 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2633,7 +2633,7 @@ static int nand_write_page_syndrome(struct mtd_info *mtd,
 }
 
 /**
- * nand_write_page - [REPLACEABLE] write one page
+ * nand_write_page - write one page
  * @mtd: MTD device structure
  * @chip: NAND chip descriptor
  * @offset: address offset within the page
@@ -2836,9 +2836,10 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 			/* We still need to erase leftover OOB data */
 			memset(chip->oob_poi, 0xff, mtd->oobsize);
 		}
-		ret = chip->write_page(mtd, chip, column, bytes, wbuf,
-					oob_required, page, cached,
-					(ops->mode == MTD_OPS_RAW));
+
+		ret = nand_write_page(mtd, chip, column, bytes, wbuf,
+				      oob_required, page, cached,
+				      (ops->mode == MTD_OPS_RAW));
 		if (ret)
 			break;
 
@@ -4548,9 +4549,6 @@ int nand_scan_tail(struct mtd_info *mtd)
 		}
 	}
 
-	if (!chip->write_page)
-		chip->write_page = nand_write_page;
-
 	/*
 	 * Check ECC mode, default to software if 3byte/512byte hardware ECC is
 	 * selected and we have 256 byte pagesize fallback to software ECC
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c7de017c7f4c..40657939797c 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -828,7 +828,6 @@ struct nand_manufacturer_ops {
  * @errstat:		[OPTIONAL] hardware specific function to perform
  *			additional error status checks (determine if errors are
  *			correctable).
- * @write_page:		[REPLACEABLE] High-level page write function
  * @manufacturer:	[INTERN] Contains manufacturer information
  */
 
@@ -854,9 +853,6 @@ struct nand_chip {
 	int (*scan_bbt)(struct mtd_info *mtd);
 	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
 			int status, int page);
-	int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			uint32_t offset, int data_len, const uint8_t *buf,
-			int oob_required, int page, int cached, int raw);
 	int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			int feature_addr, uint8_t *subfeature_para);
 	int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip,
-- 
cgit v1.2.3


From 07604686e808cd93d352172806a7828860f048f5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:47 +0900
Subject: mtd: nand: relax ecc.read_page() return value for uncorrectable ECC

The comment for ecc.read_page() requires that it should return
"0 if bitflips uncorrectable".

Actually, drivers could return positive values when uncorrectable
bitflips occur.  For example, nand_read_page_swecc() is the case.
If ecc.correct() returns -EBADMSG for the first ECC sector, and
a positive value for the second one, nand_read_page_swecc() returns
a positive max_bitflips and increments ecc_stats.failed for the same
page.

The requirement can be relaxed by tweaking nand_do_read_ops().
Move the max_bitflips calculation below the retry.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 3 +--
 include/linux/mtd/nand.h     | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 36258e69a389..de6c8045c85b 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1993,8 +1993,6 @@ read_retry:
 				break;
 			}
 
-			max_bitflips = max_t(unsigned int, max_bitflips, ret);
-
 			/* Transfer not aligned data */
 			if (use_bufpoi) {
 				if (!NAND_HAS_SUBPAGE_READ(chip) && !oob &&
@@ -2045,6 +2043,7 @@ read_retry:
 			}
 
 			buf += bytes;
+			max_bitflips = max_t(unsigned int, max_bitflips, ret);
 		} else {
 			memcpy(buf, chip->buffers->databuf + col, bytes);
 			buf += bytes;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 40657939797c..9e0c93c44bef 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -516,7 +516,7 @@ static inline void nand_hw_control_init(struct nand_hw_control *nfc)
  *			out-of-band data).
  * @read_page:	function to read a page according to the ECC generator
  *		requirements; returns maximum number of bitflips corrected in
- *		any single ECC step, 0 if bitflips uncorrectable, -EIO hw error
+ *		any single ECC step, -EIO hw error
  * @read_subpage:	function to read parts of the page covered by ECC;
  *			returns same as read_page()
  * @write_subpage:	function to write parts of the page covered by ECC.
-- 
cgit v1.2.3


From 8aabdf376f2baafbaaceeee1f3f7f7dca70f8e0b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:48 +0900
Subject: mtd: nand: denali: allow to override mtd->name from label DT property

Commit 28309572aac4 ("mtd: name the mtd device with an optional
label property") allow us to identify a chip in a user-friendly way.

If nand_set_flash_node() picks up the "label" from DT, let's respect
it.  Otherwise, let it fallback to the current name "denali-nand".

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 4ca75d3926ef..4e6d03d7a031 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1462,8 +1462,10 @@ int denali_init(struct denali_nand_info *denali)
 
 	/* now that our ISR is registered, we can enable interrupts */
 	denali_set_intr_modes(denali, true);
-	mtd->name = "denali-nand";
 	nand_set_flash_node(chip, denali->dev->of_node);
+	/* Fallback to the default name if DT did not give "label" property */
+	if (!mtd->name)
+		mtd->name = "denali-nand";
 
 	/* register the driver with the NAND core subsystem */
 	chip->select_chip = denali_select_chip;
-- 
cgit v1.2.3


From 8927ad394b0653329184863e3d44958f67705e84 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:49 +0900
Subject: mtd: nand: denali: remove meaningless pipeline read-ahead operation

The pipeline read-ahead function of the Denali IP enables continuous
reading from the device; while data is being read out by a CPU, the
controller maintains additional commands for streaming data from the
device.  This will reduce the latency of the second page or later.

This feature is obviously no help for per-page accessors of Linux
NAND driver interface.

In the current implementation, the pipeline command is issued to
load a single page, then data are read out immediately.  The use of
the pipeline operation is not adding any advantage, but just adding
complexity to the code.  Remove.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 42 +++---------------------------------------
 1 file changed, 3 insertions(+), 39 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 4e6d03d7a031..65cf7cccedbe 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -84,7 +84,6 @@ static inline struct denali_nand_info *mtd_to_denali(struct mtd_info *mtd)
 #define SPARE_ACCESS		0x41
 #define MAIN_ACCESS		0x42
 #define MAIN_SPARE_ACCESS	0x43
-#define PIPELINE_ACCESS		0x2000
 
 #define DENALI_READ	0
 #define DENALI_WRITE	0x100
@@ -683,15 +682,7 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali,
 				    int access_type, int op)
 {
 	int status = PASS;
-	uint32_t page_count = 1;
-	uint32_t addr, cmd, irq_status, irq_mask;
-
-	if (op == DENALI_READ)
-		irq_mask = INTR__LOAD_COMP;
-	else if (op == DENALI_WRITE)
-		irq_mask = 0;
-	else
-		BUG();
+	uint32_t addr, cmd;
 
 	setup_ecc_for_xfer(denali, ecc_en, transfer_spare);
 
@@ -714,35 +705,8 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali,
 		cmd = MODE_10 | addr;
 		index_addr(denali, cmd, access_type);
 
-		/*
-		 * page 33 of the NAND controller spec indicates we should not
-		 * use the pipeline commands in Spare area only mode.
-		 * So we don't.
-		 */
-		if (access_type == SPARE_ACCESS) {
-			cmd = MODE_01 | addr;
-			iowrite32(cmd, denali->flash_mem);
-		} else {
-			index_addr(denali, cmd,
-					PIPELINE_ACCESS | op | page_count);
-
-			/*
-			 * wait for command to be accepted
-			 * can always use status0 bit as the
-			 * mask is identical for each bank.
-			 */
-			irq_status = wait_for_irq(denali, irq_mask);
-
-			if (irq_status == 0) {
-				dev_err(denali->dev,
-					"cmd, page, addr on timeout (0x%x, 0x%x, 0x%x)\n",
-					cmd, denali->page, addr);
-				status = FAIL;
-			} else {
-				cmd = MODE_01 | addr;
-				iowrite32(cmd, denali->flash_mem);
-			}
-		}
+		cmd = MODE_01 | addr;
+		iowrite32(cmd, denali->flash_mem);
 	}
 	return status;
 }
-- 
cgit v1.2.3


From 20d48595f8857c9b7e0d31d9734ebe18d63faea1 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:50 +0900
Subject: mtd: nand: denali: fix bitflips calculation in handle_ecc()

This function is wrong in multiple ways:

[1] Counting corrected bytes instead of corrected bits.

The following code is counting the number of corrected _bytes_.

    /* correct the ECC error */
    buf[offset] ^= err_cor_value;
    mtd->ecc_stats.corrected++;
    bitflips++;

What the core framework expects is the number of corrected _bits_.
They can be different if multiple bitflips occur within one byte.

[2] total number of errors instead of max of per-sector errors

The core framework expects that corrected errors are counted per
sector, then the max value should be taken.  The current code simply
iterates over the whole page, i.e. counts the total number of
correction in the page.  This means "too many bitflips" is triggered
earlier than it should be, i.e. the NAND device is worn out sooner.

Besides those bugs, this function is unreadable due to the deep
nesting.  Notice the whole code in this function is wrapped in
if (irq_status & INTR__ECC_ERR), so this conditional can be moved
out of the function.  Also, use shorter names for local variables.

Re-work the function to fix all the issues.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 141 +++++++++++++++++++++++-----------------------
 1 file changed, 71 insertions(+), 70 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 65cf7cccedbe..c5c150a95fb6 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -836,80 +836,80 @@ static bool is_erased(uint8_t *buf, int len)
 #define ECC_SECTOR(x)	(((x) & ECC_ERROR_ADDRESS__SECTOR_NR) >> 12)
 #define ECC_BYTE(x)	(((x) & ECC_ERROR_ADDRESS__OFFSET))
 #define ECC_CORRECTION_VALUE(x) ((x) & ERR_CORRECTION_INFO__BYTEMASK)
-#define ECC_ERROR_CORRECTABLE(x) (!((x) & ERR_CORRECTION_INFO__ERROR_TYPE))
+#define ECC_ERROR_UNCORRECTABLE(x) ((x) & ERR_CORRECTION_INFO__ERROR_TYPE)
 #define ECC_ERR_DEVICE(x)	(((x) & ERR_CORRECTION_INFO__DEVICE_NR) >> 8)
 #define ECC_LAST_ERR(x)		((x) & ERR_CORRECTION_INFO__LAST_ERR_INFO)
 
-static bool handle_ecc(struct denali_nand_info *denali, uint8_t *buf,
-		       uint32_t irq_status, unsigned int *max_bitflips)
+static int handle_ecc(struct mtd_info *mtd, struct denali_nand_info *denali,
+		      uint8_t *buf, bool *check_erased_page)
 {
-	bool check_erased_page = false;
 	unsigned int bitflips = 0;
+	unsigned int max_bitflips = 0;
+	uint32_t err_addr, err_cor_info;
+	unsigned int err_byte, err_sector, err_device;
+	uint8_t err_cor_value;
+	unsigned int prev_sector = 0;
 
-	if (irq_status & INTR__ECC_ERR) {
-		/* read the ECC errors. we'll ignore them for now */
-		uint32_t err_address, err_correction_info, err_byte,
-			 err_sector, err_device, err_correction_value;
-		denali_set_intr_modes(denali, false);
-
-		do {
-			err_address = ioread32(denali->flash_reg +
-						ECC_ERROR_ADDRESS);
-			err_sector = ECC_SECTOR(err_address);
-			err_byte = ECC_BYTE(err_address);
-
-			err_correction_info = ioread32(denali->flash_reg +
-						ERR_CORRECTION_INFO);
-			err_correction_value =
-				ECC_CORRECTION_VALUE(err_correction_info);
-			err_device = ECC_ERR_DEVICE(err_correction_info);
-
-			if (ECC_ERROR_CORRECTABLE(err_correction_info)) {
-				/*
-				 * If err_byte is larger than ECC_SECTOR_SIZE,
-				 * means error happened in OOB, so we ignore
-				 * it. It's no need for us to correct it
-				 * err_device is represented the NAND error
-				 * bits are happened in if there are more
-				 * than one NAND connected.
-				 */
-				if (err_byte < ECC_SECTOR_SIZE) {
-					struct mtd_info *mtd =
-						nand_to_mtd(&denali->nand);
-					int offset;
-
-					offset = (err_sector *
-							ECC_SECTOR_SIZE +
-							err_byte) *
-							denali->devnum +
-							err_device;
-					/* correct the ECC error */
-					buf[offset] ^= err_correction_value;
-					mtd->ecc_stats.corrected++;
-					bitflips++;
-				}
-			} else {
-				/*
-				 * if the error is not correctable, need to
-				 * look at the page to see if it is an erased
-				 * page. if so, then it's not a real ECC error
-				 */
-				check_erased_page = true;
-			}
-		} while (!ECC_LAST_ERR(err_correction_info));
-		/*
-		 * Once handle all ecc errors, controller will triger
-		 * a ECC_TRANSACTION_DONE interrupt, so here just wait
-		 * for a while for this interrupt
-		 */
-		while (!(read_interrupt_status(denali) &
-				INTR__ECC_TRANSACTION_DONE))
-			cpu_relax();
-		clear_interrupts(denali);
-		denali_set_intr_modes(denali, true);
-	}
-	*max_bitflips = bitflips;
-	return check_erased_page;
+	/* read the ECC errors. we'll ignore them for now */
+	denali_set_intr_modes(denali, false);
+
+	do {
+		err_addr = ioread32(denali->flash_reg + ECC_ERROR_ADDRESS);
+		err_sector = ECC_SECTOR(err_addr);
+		err_byte = ECC_BYTE(err_addr);
+
+		err_cor_info = ioread32(denali->flash_reg + ERR_CORRECTION_INFO);
+		err_cor_value = ECC_CORRECTION_VALUE(err_cor_info);
+		err_device = ECC_ERR_DEVICE(err_cor_info);
+
+		/* reset the bitflip counter when crossing ECC sector */
+		if (err_sector != prev_sector)
+			bitflips = 0;
+
+		if (ECC_ERROR_UNCORRECTABLE(err_cor_info)) {
+			/*
+			 * if the error is not correctable, need to look at the
+			 * page to see if it is an erased page. if so, then
+			 * it's not a real ECC error
+			 */
+			*check_erased_page = true;
+		} else if (err_byte < ECC_SECTOR_SIZE) {
+			/*
+			 * If err_byte is larger than ECC_SECTOR_SIZE, means error
+			 * happened in OOB, so we ignore it. It's no need for
+			 * us to correct it err_device is represented the NAND
+			 * error bits are happened in if there are more than
+			 * one NAND connected.
+			 */
+			int offset;
+			unsigned int flips_in_byte;
+
+			offset = (err_sector * ECC_SECTOR_SIZE + err_byte) *
+						denali->devnum + err_device;
+
+			/* correct the ECC error */
+			flips_in_byte = hweight8(buf[offset] ^ err_cor_value);
+			buf[offset] ^= err_cor_value;
+			mtd->ecc_stats.corrected += flips_in_byte;
+			bitflips += flips_in_byte;
+
+			max_bitflips = max(max_bitflips, bitflips);
+		}
+
+		prev_sector = err_sector;
+	} while (!ECC_LAST_ERR(err_cor_info));
+
+	/*
+	 * Once handle all ecc errors, controller will trigger a
+	 * ECC_TRANSACTION_DONE interrupt, so here just wait for
+	 * a while for this interrupt
+	 */
+	while (!(read_interrupt_status(denali) & INTR__ECC_TRANSACTION_DONE))
+		cpu_relax();
+	clear_interrupts(denali);
+	denali_set_intr_modes(denali, true);
+
+	return max_bitflips;
 }
 
 /* programs the controller to either enable/disable DMA transfers */
@@ -1045,7 +1045,7 @@ static int denali_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
 static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			    uint8_t *buf, int oob_required, int page)
 {
-	unsigned int max_bitflips;
+	unsigned int max_bitflips = 0;
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
 	dma_addr_t addr = denali->buf.dma_buf;
@@ -1077,7 +1077,8 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 	memcpy(buf, denali->buf.buf, mtd->writesize);
 
-	check_erased_page = handle_ecc(denali, buf, irq_status, &max_bitflips);
+	if (irq_status & INTR__ECC_ERR)
+		max_bitflips = handle_ecc(mtd, denali, buf, &check_erased_page);
 	denali_enable_dma(denali, false);
 
 	if (check_erased_page) {
-- 
cgit v1.2.3


From d29109be2e8d4a102d8304d7b8bb0d6dfe5e1d27 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:51 +0900
Subject: mtd: nand: denali: fix erased page checking

This part is wrong in multiple ways:

[1] is_erased() is called against "buf" twice, so the OOB area is
not checked at all.  The second call should check chip->oob_poi.

[2] This code block is nested by double "if (check_erase_page)".
The inner one is redundant.

[3] The ECC_ERROR_ADDRESS register reports which sector(s) had
uncorrectable ECC errors.  It is pointless to check the whole page
if only one sector contains errors.

[4] Unfortunately, the Denali ECC correction engine has already
manipulated the data buffer before it decides the bitflips are
uncorrectable.  That is, not all of the data are 0xFF after an
erased page is processed by the ECC engine.  The current is_erased()
helper could report false-positive ECC errors.  Actually, a certain
mount of bitflips are allowed in an erased page.  The core framework
provides nand_check_erased_ecc_chunk() that takes the threshold into
account.  Let's use this.

This commit reworks the code to solve those problems.

Please note the erased page checking is implemented as a separate
helper function instead of embedding it in the loop in handle_ecc().
The reason is that OOB data are needed for the erased page checking,
but the controller can not start a new transaction until all ECC
error information is read out from the registers.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 77 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 50 insertions(+), 27 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index c5c150a95fb6..64a3bdcc6b04 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -818,19 +818,44 @@ static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 	}
 }
 
-/*
- * this function examines buffers to see if they contain data that
- * indicate that the buffer is part of an erased region of flash.
- */
-static bool is_erased(uint8_t *buf, int len)
+static int denali_check_erased_page(struct mtd_info *mtd,
+				    struct nand_chip *chip, uint8_t *buf,
+				    unsigned long uncor_ecc_flags,
+				    unsigned int max_bitflips)
 {
-	int i;
+	uint8_t *ecc_code = chip->buffers->ecccode;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	int i, ret, stat;
+
+	ret = mtd_ooblayout_get_eccbytes(mtd, ecc_code, chip->oob_poi, 0,
+					 chip->ecc.total);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ecc_steps; i++) {
+		if (!(uncor_ecc_flags & BIT(i)))
+			continue;
+
+		stat = nand_check_erased_ecc_chunk(buf, ecc_size,
+						  ecc_code, ecc_bytes,
+						  NULL, 0,
+						  chip->ecc.strength);
+		if (stat < 0) {
+			mtd->ecc_stats.failed++;
+		} else {
+			mtd->ecc_stats.corrected += stat;
+			max_bitflips = max_t(unsigned int, max_bitflips, stat);
+		}
 
-	for (i = 0; i < len; i++)
-		if (buf[i] != 0xFF)
-			return false;
-	return true;
+		buf += ecc_size;
+		ecc_code += ecc_bytes;
+	}
+
+	return max_bitflips;
 }
+
 #define ECC_SECTOR_SIZE 512
 
 #define ECC_SECTOR(x)	(((x) & ECC_ERROR_ADDRESS__SECTOR_NR) >> 12)
@@ -841,7 +866,7 @@ static bool is_erased(uint8_t *buf, int len)
 #define ECC_LAST_ERR(x)		((x) & ERR_CORRECTION_INFO__LAST_ERR_INFO)
 
 static int handle_ecc(struct mtd_info *mtd, struct denali_nand_info *denali,
-		      uint8_t *buf, bool *check_erased_page)
+		      uint8_t *buf, unsigned long *uncor_ecc_flags)
 {
 	unsigned int bitflips = 0;
 	unsigned int max_bitflips = 0;
@@ -868,11 +893,10 @@ static int handle_ecc(struct mtd_info *mtd, struct denali_nand_info *denali,
 
 		if (ECC_ERROR_UNCORRECTABLE(err_cor_info)) {
 			/*
-			 * if the error is not correctable, need to look at the
-			 * page to see if it is an erased page. if so, then
-			 * it's not a real ECC error
+			 * Check later if this is a real ECC error, or
+			 * an erased sector.
 			 */
-			*check_erased_page = true;
+			*uncor_ecc_flags |= BIT(err_sector);
 		} else if (err_byte < ECC_SECTOR_SIZE) {
 			/*
 			 * If err_byte is larger than ECC_SECTOR_SIZE, means error
@@ -1045,7 +1069,6 @@ static int denali_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
 static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			    uint8_t *buf, int oob_required, int page)
 {
-	unsigned int max_bitflips = 0;
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
 	dma_addr_t addr = denali->buf.dma_buf;
@@ -1053,7 +1076,8 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 	uint32_t irq_status;
 	uint32_t irq_mask = INTR__ECC_TRANSACTION_DONE | INTR__ECC_ERR;
-	bool check_erased_page = false;
+	unsigned long uncor_ecc_flags = 0;
+	int stat = 0;
 
 	if (page != denali->page) {
 		dev_err(denali->dev,
@@ -1078,21 +1102,20 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	memcpy(buf, denali->buf.buf, mtd->writesize);
 
 	if (irq_status & INTR__ECC_ERR)
-		max_bitflips = handle_ecc(mtd, denali, buf, &check_erased_page);
+		stat = handle_ecc(mtd, denali, buf, &uncor_ecc_flags);
 	denali_enable_dma(denali, false);
 
-	if (check_erased_page) {
+	if (stat < 0)
+		return stat;
+
+	if (uncor_ecc_flags) {
 		read_oob_data(mtd, chip->oob_poi, denali->page);
 
-		/* check ECC failures that may have occurred on erased pages */
-		if (check_erased_page) {
-			if (!is_erased(buf, mtd->writesize))
-				mtd->ecc_stats.failed++;
-			if (!is_erased(buf, mtd->oobsize))
-				mtd->ecc_stats.failed++;
-		}
+		stat = denali_check_erased_page(mtd, chip, buf,
+						uncor_ecc_flags, stat);
 	}
-	return max_bitflips;
+
+	return stat;
 }
 
 static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-- 
cgit v1.2.3


From 24715c749b2ff5545f0316e7ad8b65026f9e9612 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:52 +0900
Subject: mtd: nand: denali: support HW_ECC_FIXUP capability

Some old versions of the Denali IP (perhaps used only for Intel?)
detects ECC errors and provides correct data via a register, but
does not touch the transferred data.  So, the software must fixup
the data in the buffer according to the provided ECC correction
information.

Newer versions perform ECC correction before transferring the data.
No more software intervention is needed.  The ECC_ERROR_ADDRESS and
ECC_CORRECTION_INFO registers were deprecated.  Instead, the number
of corrected bit-flips are reported via the ECC_COR_INFO register.
When an uncorrectable ECC error happens, a status flag is set to the
INTR_STATUS and ECC_COR_INFO registers.

As is often the case with this IP, the register view of INTR_STATUS
had broken compatibility.

For older versions (SW ECC fixup):
  bit 0:  ECC_TRANSACTION_DONE
  bit 1:  ECC_ERR

For newer versions (HW ECC fixup):
  bit 0:  ECC_UNCOR_ERR
  bit 1:  Reserved

Due to this difference, the irq_mask must be fixed too.

The existing handle_ecc() has been renamed to denali_sw_ecc_fixup()
for clarification.

What is unfortunate with this feature is we can not know the total
number of corrected/uncorrected errors in a page.  The register
ECC_COR_INFO reports the maximum of per-sector bitflips.  This is
useful for ->read_page return value, but ecc_stats.{corrected,failed}
increments may not be precise.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 52 ++++++++++++++++++++++++++++++++++++++++-------
 drivers/mtd/nand/denali.h | 14 +++++++++++--
 2 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 64a3bdcc6b04..b95e33a84201 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -856,6 +856,41 @@ static int denali_check_erased_page(struct mtd_info *mtd,
 	return max_bitflips;
 }
 
+static int denali_hw_ecc_fixup(struct mtd_info *mtd,
+			       struct denali_nand_info *denali,
+			       unsigned long *uncor_ecc_flags)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	int bank = denali->flash_bank;
+	uint32_t ecc_cor;
+	unsigned int max_bitflips;
+
+	ecc_cor = ioread32(denali->flash_reg + ECC_COR_INFO(bank));
+	ecc_cor >>= ECC_COR_INFO__SHIFT(bank);
+
+	if (ecc_cor & ECC_COR_INFO__UNCOR_ERR) {
+		/*
+		 * This flag is set when uncorrectable error occurs at least in
+		 * one ECC sector.  We can not know "how many sectors", or
+		 * "which sector(s)".  We need erase-page check for all sectors.
+		 */
+		*uncor_ecc_flags = GENMASK(chip->ecc.steps - 1, 0);
+		return 0;
+	}
+
+	max_bitflips = ecc_cor & ECC_COR_INFO__MAX_ERRORS;
+
+	/*
+	 * The register holds the maximum of per-sector corrected bitflips.
+	 * This is suitable for the return value of the ->read_page() callback.
+	 * Unfortunately, we can not know the total number of corrected bits in
+	 * the page.  Increase the stats by max_bitflips. (compromised solution)
+	 */
+	mtd->ecc_stats.corrected += max_bitflips;
+
+	return max_bitflips;
+}
+
 #define ECC_SECTOR_SIZE 512
 
 #define ECC_SECTOR(x)	(((x) & ECC_ERROR_ADDRESS__SECTOR_NR) >> 12)
@@ -865,8 +900,9 @@ static int denali_check_erased_page(struct mtd_info *mtd,
 #define ECC_ERR_DEVICE(x)	(((x) & ERR_CORRECTION_INFO__DEVICE_NR) >> 8)
 #define ECC_LAST_ERR(x)		((x) & ERR_CORRECTION_INFO__LAST_ERR_INFO)
 
-static int handle_ecc(struct mtd_info *mtd, struct denali_nand_info *denali,
-		      uint8_t *buf, unsigned long *uncor_ecc_flags)
+static int denali_sw_ecc_fixup(struct mtd_info *mtd,
+			       struct denali_nand_info *denali,
+			       unsigned long *uncor_ecc_flags, uint8_t *buf)
 {
 	unsigned int bitflips = 0;
 	unsigned int max_bitflips = 0;
@@ -1070,12 +1106,12 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			    uint8_t *buf, int oob_required, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-
 	dma_addr_t addr = denali->buf.dma_buf;
 	size_t size = mtd->writesize + mtd->oobsize;
-
 	uint32_t irq_status;
-	uint32_t irq_mask = INTR__ECC_TRANSACTION_DONE | INTR__ECC_ERR;
+	uint32_t irq_mask = denali->caps & DENALI_CAP_HW_ECC_FIXUP ?
+				INTR__DMA_CMD_COMP | INTR__ECC_UNCOR_ERR :
+				INTR__ECC_TRANSACTION_DONE | INTR__ECC_ERR;
 	unsigned long uncor_ecc_flags = 0;
 	int stat = 0;
 
@@ -1101,8 +1137,10 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 	memcpy(buf, denali->buf.buf, mtd->writesize);
 
-	if (irq_status & INTR__ECC_ERR)
-		stat = handle_ecc(mtd, denali, buf, &uncor_ecc_flags);
+	if (denali->caps & DENALI_CAP_HW_ECC_FIXUP)
+		stat = denali_hw_ecc_fixup(mtd, denali, &uncor_ecc_flags);
+	else if (irq_status & INTR__ECC_ERR)
+		stat = denali_sw_ecc_fixup(mtd, denali, &uncor_ecc_flags, buf);
 	denali_enable_dma(denali, false);
 
 	if (stat < 0)
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 483c0e988f33..e532956b7177 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -20,6 +20,7 @@
 #ifndef __DENALI_H__
 #define __DENALI_H__
 
+#include <linux/bitops.h>
 #include <linux/mtd/nand.h>
 
 #define DEVICE_RESET				0x0
@@ -218,8 +219,10 @@
 
 #define INTR_STATUS(__bank)	(0x410 + ((__bank) * 0x50))
 #define INTR_EN(__bank)		(0x420 + ((__bank) * 0x50))
-#define     INTR__ECC_TRANSACTION_DONE			0x0001
-#define     INTR__ECC_ERR				0x0002
+/* bit[1:0] is used differently depending on IP version */
+#define     INTR__ECC_UNCOR_ERR				0x0001	/* new IP */
+#define     INTR__ECC_TRANSACTION_DONE			0x0001	/* old IP */
+#define     INTR__ECC_ERR				0x0002	/* old IP */
 #define     INTR__DMA_CMD_COMP				0x0004
 #define     INTR__TIME_OUT				0x0008
 #define     INTR__PROGRAM_FAIL				0x0010
@@ -259,6 +262,11 @@
 #define     ERR_CORRECTION_INFO__ERROR_TYPE		0x4000
 #define     ERR_CORRECTION_INFO__LAST_ERR_INFO		0x8000
 
+#define ECC_COR_INFO(bank)			(0x650 + (bank) / 2 * 0x10)
+#define     ECC_COR_INFO__SHIFT(bank)			((bank) % 2 * 8)
+#define     ECC_COR_INFO__MAX_ERRORS			0x007f
+#define     ECC_COR_INFO__UNCOR_ERR			0x0080
+
 #define DMA_ENABLE				0x700
 #define     DMA_ENABLE__FLAG				0x0001
 
@@ -338,6 +346,8 @@ struct denali_nand_info {
 	unsigned int caps;
 };
 
+#define DENALI_CAP_HW_ECC_FIXUP			BIT(0)
+
 extern int denali_init(struct denali_nand_info *denali);
 extern void denali_remove(struct denali_nand_info *denali);
 
-- 
cgit v1.2.3


From a56609c4c3f34de76d905e39160511b3c53310ac Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:53 +0900
Subject: mtd: nand: denali_dt: enable HW_ECC_FIXUP for Altera SOCFPGA variant

There are various customizable parameters, so several variants for
this IP.  A generic compatible like "denali,denali-nand-dt" is
useless.  Moreover, there are multiple things wrong with this string.
(Refer to Rob's comment [1])

The "denali,denali-nand-dt" was added by Altera for the SOCFPGA port.
Replace it with a more specific string "altr,socfpga-denali-nand".
There are no users (in upstream) of the old compatible string.

The Denali IP on SOCFPGA incorporates the hardware ECC fixup engine.
So, this capability should be associated with the compatible.

[1] https://lkml.org/lkml/2016/12/1/450

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 Documentation/devicetree/bindings/mtd/denali-nand.txt |  5 +++--
 drivers/mtd/nand/denali_dt.c                          | 14 ++++++++++----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt
index b04d03a1d499..6f4ab4c9bebc 100644
--- a/Documentation/devicetree/bindings/mtd/denali-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt
@@ -1,7 +1,8 @@
 * Denali NAND controller
 
 Required properties:
-  - compatible : should be "denali,denali-nand-dt"
+  - compatible : should be one of the following:
+      "altr,socfpga-denali-nand"            - for Altera SOCFPGA
   - reg : should contain registers location and length for data and reg.
   - reg-names: Should contain the reg names "nand_data" and "denali_reg"
   - interrupts : The interrupt number.
@@ -15,7 +16,7 @@ Examples:
 nand: nand@ff900000 {
 	#address-cells = <1>;
 	#size-cells = <1>;
-	compatible = "denali,denali-nand-dt";
+	compatible = "altr,socfpga-denali-nand";
 	reg = <0xff900000 0x100000>, <0xffb80000 0x10000>;
 	reg-names = "nand_data", "denali_reg";
 	interrupts = <0 144 4>;
diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index 293ddb8e5a32..9577bfde960e 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -33,11 +33,17 @@ struct denali_dt_data {
 	unsigned int caps;
 };
 
-static const struct of_device_id denali_nand_dt_ids[] = {
-		{ .compatible = "denali,denali-nand-dt" },
-		{ /* sentinel */ }
-	};
+static const struct denali_dt_data denali_socfpga_data = {
+	.caps = DENALI_CAP_HW_ECC_FIXUP,
+};
 
+static const struct of_device_id denali_nand_dt_ids[] = {
+	{
+		.compatible = "altr,socfpga-denali-nand",
+		.data = &denali_socfpga_data,
+	},
+	{ /* sentinel */ }
+};
 MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
 
 static u64 denali_dma_mask;
-- 
cgit v1.2.3


From 210a2c876fe02c9440569c9282af43e7a85562e3 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:54 +0900
Subject: mtd: nand: denali: support 64bit capable DMA engine

The current driver only supports the DMA engine up to 32 bit
physical address, but there also exists 64 bit capable DMA engine
for this IP.

The data DMA setup sequence is completely different, so I added the
64 bit DMA code as a new function denali_setup_dma64().  The 32 bit
one has been renamed to denali_setup_dma32().

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 39 +++++++++++++++++++++++++++++++++++----
 drivers/mtd/nand/denali.h |  1 +
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index b95e33a84201..417a8950cf49 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -979,8 +979,30 @@ static void denali_enable_dma(struct denali_nand_info *denali, bool en)
 	ioread32(denali->flash_reg + DMA_ENABLE);
 }
 
-/* setups the HW to perform the data DMA */
-static void denali_setup_dma(struct denali_nand_info *denali, int op)
+static void denali_setup_dma64(struct denali_nand_info *denali, int op)
+{
+	uint32_t mode;
+	const int page_count = 1;
+	uint64_t addr = denali->buf.dma_buf;
+
+	mode = MODE_10 | BANK(denali->flash_bank) | denali->page;
+
+	/* DMA is a three step process */
+
+	/*
+	 * 1. setup transfer type, interrupt when complete,
+	 *    burst len = 64 bytes, the number of pages
+	 */
+	index_addr(denali, mode, 0x01002000 | (64 << 16) | op | page_count);
+
+	/* 2. set memory low address */
+	index_addr(denali, mode, addr);
+
+	/* 3. set memory high address */
+	index_addr(denali, mode, addr >> 32);
+}
+
+static void denali_setup_dma32(struct denali_nand_info *denali, int op)
 {
 	uint32_t mode;
 	const int page_count = 1;
@@ -1003,6 +1025,14 @@ static void denali_setup_dma(struct denali_nand_info *denali, int op)
 	index_addr(denali, mode | 0x14000, 0x2400);
 }
 
+static void denali_setup_dma(struct denali_nand_info *denali, int op)
+{
+	if (denali->caps & DENALI_CAP_DMA_64BIT)
+		denali_setup_dma64(denali, op);
+	else
+		denali_setup_dma32(denali, op);
+}
+
 /*
  * writes a page. user specifies type, and this function handles the
  * configuration details.
@@ -1518,8 +1548,9 @@ int denali_init(struct denali_nand_info *denali)
 		goto failed_req_irq;
 	}
 
-	/* Is 32-bit DMA supported? */
-	ret = dma_set_mask(denali->dev, DMA_BIT_MASK(32));
+	ret = dma_set_mask(denali->dev,
+			   DMA_BIT_MASK(denali->caps & DENALI_CAP_DMA_64BIT ?
+					64 : 32));
 	if (ret) {
 		dev_err(denali->dev, "No usable DMA configuration\n");
 		goto failed_req_irq;
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index e532956b7177..1f413d05a010 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -347,6 +347,7 @@ struct denali_nand_info {
 };
 
 #define DENALI_CAP_HW_ECC_FIXUP			BIT(0)
+#define DENALI_CAP_DMA_64BIT			BIT(1)
 
 extern int denali_init(struct denali_nand_info *denali);
 extern void denali_remove(struct denali_nand_info *denali);
-- 
cgit v1.2.3


From 60d920d32ca40660e382cf9ccbc236599a49e607 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:55 +0900
Subject: mtd: nand: denali_dt: remove dma-mask DT property

The driver sets appropriate DMA mask.  Delete the "dma-mask" DT
property.  See [1] for negative comments for this binding.

[1] https://lkml.org/lkml/2016/2/8/57

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 Documentation/devicetree/bindings/mtd/denali-nand.txt | 2 --
 drivers/mtd/nand/denali_dt.c                          | 9 ---------
 2 files changed, 11 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt
index 6f4ab4c9bebc..e593bbeb2115 100644
--- a/Documentation/devicetree/bindings/mtd/denali-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt
@@ -6,7 +6,6 @@ Required properties:
   - reg : should contain registers location and length for data and reg.
   - reg-names: Should contain the reg names "nand_data" and "denali_reg"
   - interrupts : The interrupt number.
-  - dm-mask : DMA bit mask
 
 The device tree may optionally contain sub-nodes describing partitions of the
 address space. See partition.txt for more detail.
@@ -20,5 +19,4 @@ nand: nand@ff900000 {
 	reg = <0xff900000 0x100000>, <0xffb80000 0x10000>;
 	reg-names = "nand_data", "denali_reg";
 	interrupts = <0 144 4>;
-	dma-mask = <0xffffffff>;
 };
diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index 9577bfde960e..b8a8284d76cd 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -46,8 +46,6 @@ static const struct of_device_id denali_nand_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
 
-static u64 denali_dma_mask;
-
 static int denali_dt_probe(struct platform_device *ofdev)
 {
 	struct resource *denali_reg, *nand_data;
@@ -83,13 +81,6 @@ static int denali_dt_probe(struct platform_device *ofdev)
 	if (IS_ERR(denali->flash_mem))
 		return PTR_ERR(denali->flash_mem);
 
-	if (!of_property_read_u32(ofdev->dev.of_node,
-		"dma-mask", (u32 *)&denali_dma_mask)) {
-		denali->dev->dma_mask = &denali_dma_mask;
-	} else {
-		denali->dev->dma_mask = NULL;
-	}
-
 	dt->clk = devm_clk_get(&ofdev->dev, NULL);
 	if (IS_ERR(dt->clk)) {
 		dev_err(&ofdev->dev, "no clk available\n");
-- 
cgit v1.2.3


From 3f5c35819fc37d5f35680a55327c940b6e8fad41 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:56 +0900
Subject: mtd: nand: denali_dt: use pdev instead of ofdev for platform_device

"pdev" is much more often used to point a platform_device, so this
will help the driver code look consistent across the kernel.

While we are here, fix "line over 80 characters" coding style
violations.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali_dt.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index b8a8284d76cd..ada38636122c 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -46,7 +46,7 @@ static const struct of_device_id denali_nand_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
 
-static int denali_dt_probe(struct platform_device *ofdev)
+static int denali_dt_probe(struct platform_device *pdev)
 {
 	struct resource *denali_reg, *nand_data;
 	struct denali_dt *dt;
@@ -54,36 +54,38 @@ static int denali_dt_probe(struct platform_device *ofdev)
 	struct denali_nand_info *denali;
 	int ret;
 
-	dt = devm_kzalloc(&ofdev->dev, sizeof(*dt), GFP_KERNEL);
+	dt = devm_kzalloc(&pdev->dev, sizeof(*dt), GFP_KERNEL);
 	if (!dt)
 		return -ENOMEM;
 	denali = &dt->denali;
 
-	data = of_device_get_match_data(&ofdev->dev);
+	data = of_device_get_match_data(&pdev->dev);
 	if (data)
 		denali->caps = data->caps;
 
 	denali->platform = DT;
-	denali->dev = &ofdev->dev;
-	denali->irq = platform_get_irq(ofdev, 0);
+	denali->dev = &pdev->dev;
+	denali->irq = platform_get_irq(pdev, 0);
 	if (denali->irq < 0) {
-		dev_err(&ofdev->dev, "no irq defined\n");
+		dev_err(&pdev->dev, "no irq defined\n");
 		return denali->irq;
 	}
 
-	denali_reg = platform_get_resource_byname(ofdev, IORESOURCE_MEM, "denali_reg");
-	denali->flash_reg = devm_ioremap_resource(&ofdev->dev, denali_reg);
+	denali_reg = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						  "denali_reg");
+	denali->flash_reg = devm_ioremap_resource(&pdev->dev, denali_reg);
 	if (IS_ERR(denali->flash_reg))
 		return PTR_ERR(denali->flash_reg);
 
-	nand_data = platform_get_resource_byname(ofdev, IORESOURCE_MEM, "nand_data");
-	denali->flash_mem = devm_ioremap_resource(&ofdev->dev, nand_data);
+	nand_data = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						 "nand_data");
+	denali->flash_mem = devm_ioremap_resource(&pdev->dev, nand_data);
 	if (IS_ERR(denali->flash_mem))
 		return PTR_ERR(denali->flash_mem);
 
-	dt->clk = devm_clk_get(&ofdev->dev, NULL);
+	dt->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dt->clk)) {
-		dev_err(&ofdev->dev, "no clk available\n");
+		dev_err(&pdev->dev, "no clk available\n");
 		return PTR_ERR(dt->clk);
 	}
 	clk_prepare_enable(dt->clk);
@@ -92,7 +94,7 @@ static int denali_dt_probe(struct platform_device *ofdev)
 	if (ret)
 		goto out_disable_clk;
 
-	platform_set_drvdata(ofdev, dt);
+	platform_set_drvdata(pdev, dt);
 	return 0;
 
 out_disable_clk:
@@ -101,9 +103,9 @@ out_disable_clk:
 	return ret;
 }
 
-static int denali_dt_remove(struct platform_device *ofdev)
+static int denali_dt_remove(struct platform_device *pdev)
 {
-	struct denali_dt *dt = platform_get_drvdata(ofdev);
+	struct denali_dt *dt = platform_get_drvdata(pdev);
 
 	denali_remove(&dt->denali);
 	clk_disable_unprepare(dt->clk);
-- 
cgit v1.2.3


From e7beeeec854c40c28caa53bd84fdf26e9e459f06 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 15:45:57 +0900
Subject: mtd: nand: denali: allow to override revision number

Commit 271707b1d817 ("mtd: nand: denali: max_banks calculation
changed in revision 5.1") added a revision check to support the
new max_banks encoding.  Its git-log states "The encoding of
max_banks changed in Denali revision 5.1".

There are exceptional cases, for example, the revision register on
some UniPhier SoCs says the IP is 5.0 but the max_banks is encoded
in the new format.

This IP updates the resister specification from time to time (often
breaking the backward compatibility), but the revision number is not
incremented correctly.

The max_banks is not only the case that needs revision checking.
Let's allow to override an incorrect revision number.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c    | 23 +++++++++++++----------
 drivers/mtd/nand/denali.h    |  3 +--
 drivers/mtd/nand/denali_dt.c |  5 ++++-
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 417a8950cf49..16634df2e39a 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -419,17 +419,12 @@ static void find_valid_banks(struct denali_nand_info *denali)
 static void detect_max_banks(struct denali_nand_info *denali)
 {
 	uint32_t features = ioread32(denali->flash_reg + FEATURES);
-	/*
-	 * Read the revision register, so we can calculate the max_banks
-	 * properly: the encoding changed from rev 5.0 to 5.1
-	 */
-	u32 revision = MAKE_COMPARABLE_REVISION(
-				ioread32(denali->flash_reg + REVISION));
 
-	if (revision < REVISION_5_1)
-		denali->max_banks = 2 << (features & FEATURES__N_BANKS);
-	else
-		denali->max_banks = 1 << (features & FEATURES__N_BANKS);
+	denali->max_banks = 1 << (features & FEATURES__N_BANKS);
+
+	/* the encoding changed from rev 5.0 to 5.1 */
+	if (denali->revision < 0x0501)
+		denali->max_banks <<= 1;
 }
 
 static uint16_t denali_nand_timing_set(struct denali_nand_info *denali)
@@ -1319,6 +1314,14 @@ static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
 /* Initialization code to bring the device up to a known good state */
 static void denali_hw_init(struct denali_nand_info *denali)
 {
+	/*
+	 * The REVISION register may not be reliable.  Platforms are allowed to
+	 * override it.
+	 */
+	if (!denali->revision)
+		denali->revision =
+				swab16(ioread32(denali->flash_reg + REVISION));
+
 	/*
 	 * tell driver how many bit controller will skip before
 	 * writing ECC code in OOB, this register may be already
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 1f413d05a010..ec004850652a 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -179,8 +179,6 @@
 
 #define REVISION				0x370
 #define     REVISION__VALUE				0xffff
-#define MAKE_COMPARABLE_REVISION(x)		swab16((x) & REVISION__VALUE)
-#define REVISION_5_1				0x00000501
 
 #define ONFI_DEVICE_FEATURES			0x380
 #define     ONFI_DEVICE_FEATURES__VALUE			0x003f
@@ -343,6 +341,7 @@ struct denali_nand_info {
 	int devnum;	/* represent how many nands connected */
 	int bbtskipbytes;
 	int max_banks;
+	unsigned int revision;
 	unsigned int caps;
 };
 
diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index ada38636122c..df9ef36cc2ce 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -30,6 +30,7 @@ struct denali_dt {
 };
 
 struct denali_dt_data {
+	unsigned int revision;
 	unsigned int caps;
 };
 
@@ -60,8 +61,10 @@ static int denali_dt_probe(struct platform_device *pdev)
 	denali = &dt->denali;
 
 	data = of_device_get_match_data(&pdev->dev);
-	if (data)
+	if (data) {
+		denali->revision = data->revision;
 		denali->caps = data->caps;
+	}
 
 	denali->platform = DT;
 	denali->dev = &pdev->dev;
-- 
cgit v1.2.3


From 3deb9979c7319bc7846d1aac528a9db85162960a Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 17:15:04 +0900
Subject: mtd: nand: allocate aligned buffers if NAND_OWN_BUFFERS is unset

Some NAND controllers are using DMA engine requiring a specific
buffer alignment.  The core provides no guarantee on the nand_buffers
pointers, which forces some drivers to allocate their own buffers
and pass the NAND_OWN_BUFFERS flag.

Rework the nand_buffers allocation logic to allocate each buffer
independently.  This should make most NAND controllers/DMA engine
happy, and allow us to get rid of these custom buf allocation in
NAND controller drivers.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index de6c8045c85b..c796d0e4039a 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4495,7 +4495,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
-	struct nand_buffers *nbuf;
+	struct nand_buffers *nbuf = NULL;
 	int ret;
 
 	/* New bad blocks should be marked in OOB, flash-based BBT, or both */
@@ -4509,13 +4509,28 @@ int nand_scan_tail(struct mtd_info *mtd)
 	}
 
 	if (!(chip->options & NAND_OWN_BUFFERS)) {
-		nbuf = kzalloc(sizeof(*nbuf) + mtd->writesize
-				+ mtd->oobsize * 3, GFP_KERNEL);
+		nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL);
 		if (!nbuf)
 			return -ENOMEM;
-		nbuf->ecccalc = (uint8_t *)(nbuf + 1);
-		nbuf->ecccode = nbuf->ecccalc + mtd->oobsize;
-		nbuf->databuf = nbuf->ecccode + mtd->oobsize;
+
+		nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL);
+		if (!nbuf->ecccalc) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
+
+		nbuf->ecccode = kmalloc(mtd->oobsize, GFP_KERNEL);
+		if (!nbuf->ecccode) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
+
+		nbuf->databuf = kmalloc(mtd->writesize + mtd->oobsize,
+					GFP_KERNEL);
+		if (!nbuf->databuf) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
 
 		chip->buffers = nbuf;
 	} else {
@@ -4755,8 +4770,12 @@ int nand_scan_tail(struct mtd_info *mtd)
 	/* Build bad block table */
 	return chip->scan_bbt(mtd);
 err_free:
-	if (!(chip->options & NAND_OWN_BUFFERS))
-		kfree(chip->buffers);
+	if (nbuf) {
+		kfree(nbuf->databuf);
+		kfree(nbuf->ecccode);
+		kfree(nbuf->ecccalc);
+		kfree(nbuf);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(nand_scan_tail);
@@ -4807,8 +4826,12 @@ void nand_cleanup(struct nand_chip *chip)
 
 	/* Free bad block table memory */
 	kfree(chip->bbt);
-	if (!(chip->options & NAND_OWN_BUFFERS))
+	if (!(chip->options & NAND_OWN_BUFFERS) && chip->buffers) {
+		kfree(chip->buffers->databuf);
+		kfree(chip->buffers->ecccode);
+		kfree(chip->buffers->ecccalc);
 		kfree(chip->buffers);
+	}
 
 	/* Free bad block descriptor memory */
 	if (chip->badblock_pattern && chip->badblock_pattern->options
-- 
cgit v1.2.3


From 477544c62a84d3bacd9f90ba75ffc16c04d78071 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 30 Mar 2017 17:15:05 +0900
Subject: mtd: nand: allow drivers to request minimum alignment for passed
 buffer

In some cases, nand_do_{read,write}_ops is passed with unaligned
ops->datbuf.  Drivers using DMA will be unhappy about unaligned
buffer.

The new struct member, buf_align, represents the minimum alignment
the driver require for the buffer.  If the buffer passed from the
upper MTD layer does not have enough alignment, nand_do_*_ops will
use bufpoi.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 10 ++++++++--
 include/linux/mtd/nand.h     |  2 ++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index c796d0e4039a..ed49a1d634b0 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1954,7 +1954,9 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 		if (!aligned)
 			use_bufpoi = 1;
 		else if (chip->options & NAND_USE_BOUNCE_BUFFER)
-			use_bufpoi = !virt_addr_valid(buf);
+			use_bufpoi = !virt_addr_valid(buf) ||
+				     !IS_ALIGNED((unsigned long)buf,
+						 chip->buf_align);
 		else
 			use_bufpoi = 0;
 
@@ -2810,7 +2812,9 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 		if (part_pagewr)
 			use_bufpoi = 1;
 		else if (chip->options & NAND_USE_BOUNCE_BUFFER)
-			use_bufpoi = !virt_addr_valid(buf);
+			use_bufpoi = !virt_addr_valid(buf) ||
+				     !IS_ALIGNED((unsigned long)buf,
+						 chip->buf_align);
 		else
 			use_bufpoi = 0;
 
@@ -3429,6 +3433,8 @@ static void nand_set_defaults(struct nand_chip *chip)
 		nand_hw_control_init(chip->controller);
 	}
 
+	if (!chip->buf_align)
+		chip->buf_align = 1;
 }
 
 /* Sanitize ONFI strings so we can safely print them */
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 9e0c93c44bef..8f67b1581683 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -755,6 +755,7 @@ struct nand_manufacturer_ops {
  *			setting the read-retry mode. Mostly needed for MLC NAND.
  * @ecc:		[BOARDSPECIFIC] ECC control structure
  * @buffers:		buffer structure for read/write
+ * @buf_align:		minimum buffer alignment required by a platform
  * @hwcontrol:		platform-specific hardware control structure
  * @erase:		[REPLACEABLE] erase function
  * @scan_bbt:		[REPLACEABLE] function to scan bad block table
@@ -905,6 +906,7 @@ struct nand_chip {
 
 	struct nand_ecc_ctrl ecc;
 	struct nand_buffers *buffers;
+	unsigned long buf_align;
 	struct nand_hw_control hwcontrol;
 
 	uint8_t *bbt;
-- 
cgit v1.2.3


From 70106ddaf31150a57dc00b3d7d5167f693b521ef Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 4 Apr 2017 11:15:46 +0300
Subject: mtd: nand: Fix a couple error codes

We accidentally return 1 on error instead of proper error codes.

Fixes: 07b23e3db9ed ("mtd: nand: Cleanup/rework the atmel_nand driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/atmel/nand-controller.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
index 80e2459f92f8..c11f358c6df6 100644
--- a/drivers/mtd/nand/atmel/nand-controller.c
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -1685,7 +1685,7 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
 	nc->smc = syscon_node_to_regmap(np);
 	of_node_put(np);
 	if (IS_ERR(nc->smc)) {
-		ret = IS_ERR(nc->smc);
+		ret = PTR_ERR(nc->smc);
 		dev_err(dev, "Could not get SMC regmap (err = %d)\n", ret);
 		return ret;
 	}
@@ -1718,7 +1718,7 @@ atmel_smc_nand_controller_init(struct atmel_smc_nand_controller *nc)
 	nc->matrix = syscon_node_to_regmap(np);
 	of_node_put(np);
 	if (IS_ERR(nc->matrix)) {
-		ret = IS_ERR(nc->matrix);
+		ret = PTR_ERR(nc->matrix);
 		dev_err(dev, "Could not get Matrix regmap (err = %d)\n", ret);
 		return ret;
 	}
-- 
cgit v1.2.3


From 038e8ad6eb720d0b362183f1c30470c1e0797e39 Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Tue, 11 Apr 2017 07:22:52 +0200
Subject: mtd: nand: NULL terminate a of_device_id table

of_device_id tables should be NULL terminated.

Fixes: 07b23e3db9ed ("mtd: nand: Cleanup/rework the atmel_nand driver")

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/atmel/nand-controller.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
index c11f358c6df6..3b2446896147 100644
--- a/drivers/mtd/nand/atmel/nand-controller.c
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -1635,6 +1635,7 @@ static const struct of_device_id atmel_matrix_of_ids[] = {
 		.compatible = "atmel,at91sam9x5-matrix",
 		.data = (void *)AT91SAM9X5_MATRIX_EBICSA,
 	},
+	{ /* sentinel */ },
 };
 
 static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
-- 
cgit v1.2.3


From 2d283ede59869159f4bb84ae689258c5caffce54 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Thu, 30 Mar 2017 10:37:50 +0300
Subject: mtd: nand: omap2: Fix partition creation via cmdline mtdparts

commit c9711ec5250b ("mtd: nand: omap: Clean up device tree support")
caused the parent device name to be changed from "omap2-nand.0"
to "<base address>.nand"  (e.g. 30000000.nand on omap3 platforms).
This caused mtd->name to be changed as well. This breaks partition
creation via mtdparts passed by u-boot as it uses "omap2-nand.0"
for the mtd-id.

Fix this by explicitly setting the mtd->name to "omap2-nand.<CS number>"
if it isn't already set by nand_set_flash_node(). CS number is the
NAND controller instance ID.

Fixes: c9711ec5250b ("mtd: nand: omap: Clean up device tree support")
Cc: <stable@vger.kernel.org> # 4.7+
Reported-by: Leto Enrico <enrico.leto@siemens.com>
Reported-by: Adam Ford <aford173@gmail.com>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/omap2.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 2a52101120d4..084934a9f19c 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1856,6 +1856,15 @@ static int omap_nand_probe(struct platform_device *pdev)
 	nand_chip->ecc.priv	= NULL;
 	nand_set_flash_node(nand_chip, dev->of_node);
 
+	if (!mtd->name) {
+		mtd->name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+					   "omap2-nand.%d", info->gpmc_cs);
+		if (!mtd->name) {
+			dev_err(&pdev->dev, "Failed to set MTD name\n");
+			return -ENOMEM;
+		}
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	nand_chip->IO_ADDR_R = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(nand_chip->IO_ADDR_R))
-- 
cgit v1.2.3


From 65a2c1caa70f71690dcb5afd8fc6afe67fcde599 Mon Sep 17 00:00:00 2001
From: Sekhar Nori <nsekhar@ti.com>
Date: Thu, 30 Mar 2017 20:09:30 +0530
Subject: mtd: nand: davinci: add comment on NAND subpage write status on
 keystone

Add a comment clarifying that NAND subpage write on keystone works,
but is not being enabled in the interest of backward compatibility.

Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/davinci_nand.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index 27fa8b87cd5f..531c51991e57 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -581,6 +581,17 @@ static struct davinci_nand_pdata
 			"ti,davinci-nand-use-bbt"))
 			pdata->bbt_options = NAND_BBT_USE_FLASH;
 
+		/*
+		 * Since kernel v4.8, this driver has been fixed to enable
+		 * use of 4-bit hardware ECC with subpages and verified on
+		 * TI's keystone EVMs (K2L, K2HK and K2E).
+		 * However, in the interest of not breaking systems using
+		 * existing UBI partitions, sub-page writes are not being
+		 * (re)enabled. If you want to use subpage writes on Keystone
+		 * platforms (i.e. do not have any existing UBI partitions),
+		 * then use "ti,davinci-nand" as the compatible in your
+		 * device-tree file.
+		 */
 		if (of_device_is_compatible(pdev->dev.of_node,
 					    "ti,keystone-nand")) {
 			pdata->options |= NAND_NO_SUBPAGE_WRITE;
-- 
cgit v1.2.3


From 9d2ee0a60b8bd9bef2a0082c533736d6a7b39873 Mon Sep 17 00:00:00 2001
From: Kamal Dasu <kdasu.kdev@gmail.com>
Date: Fri, 3 Mar 2017 16:16:53 -0500
Subject: mtd: nand: brcmnand: Check flash #WP pin status before nand
 erase/program

On brcmnand controller v6.x and v7.x, the #WP pin is controlled through
the NAND_WP bit in CS_SELECT register.

The driver currently assumes that toggling the #WP pin is
instantaneously enabling/disabling write-protection, but it actually
takes some time to propagate the new state to the internal NAND chip
logic. This behavior is sometime causing data corruptions when an
erase/program operation is executed before write-protection has really
been disabled.

Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller")
Signed-off-by: Kamal Dasu <kdasu.kdev@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/brcmnand/brcmnand.c | 61 ++++++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index 42ebd73f821d..7419c5ce63f8 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -101,6 +101,9 @@ struct brcm_nand_dma_desc {
 #define BRCMNAND_MIN_BLOCKSIZE	(8 * 1024)
 #define BRCMNAND_MIN_DEVSIZE	(4ULL * 1024 * 1024)
 
+#define NAND_CTRL_RDY			(INTFC_CTLR_READY | INTFC_FLASH_READY)
+#define NAND_POLL_STATUS_TIMEOUT_MS	100
+
 /* Controller feature flags */
 enum {
 	BRCMNAND_HAS_1K_SECTORS			= BIT(0),
@@ -765,6 +768,31 @@ enum {
 	CS_SELECT_AUTO_DEVICE_ID_CFG		= BIT(30),
 };
 
+static int bcmnand_ctrl_poll_status(struct brcmnand_controller *ctrl,
+				    u32 mask, u32 expected_val,
+				    unsigned long timeout_ms)
+{
+	unsigned long limit;
+	u32 val;
+
+	if (!timeout_ms)
+		timeout_ms = NAND_POLL_STATUS_TIMEOUT_MS;
+
+	limit = jiffies + msecs_to_jiffies(timeout_ms);
+	do {
+		val = brcmnand_read_reg(ctrl, BRCMNAND_INTFC_STATUS);
+		if ((val & mask) == expected_val)
+			return 0;
+
+		cpu_relax();
+	} while (time_after(limit, jiffies));
+
+	dev_warn(ctrl->dev, "timeout on status poll (expected %x got %x)\n",
+		 expected_val, val & mask);
+
+	return -ETIMEDOUT;
+}
+
 static inline void brcmnand_set_wp(struct brcmnand_controller *ctrl, bool en)
 {
 	u32 val = en ? CS_SELECT_NAND_WP : 0;
@@ -1024,12 +1052,39 @@ static void brcmnand_wp(struct mtd_info *mtd, int wp)
 
 	if ((ctrl->features & BRCMNAND_HAS_WP) && wp_on == 1) {
 		static int old_wp = -1;
+		int ret;
 
 		if (old_wp != wp) {
 			dev_dbg(ctrl->dev, "WP %s\n", wp ? "on" : "off");
 			old_wp = wp;
 		}
+
+		/*
+		 * make sure ctrl/flash ready before and after
+		 * changing state of #WP pin
+		 */
+		ret = bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY |
+					       NAND_STATUS_READY,
+					       NAND_CTRL_RDY |
+					       NAND_STATUS_READY, 0);
+		if (ret)
+			return;
+
 		brcmnand_set_wp(ctrl, wp);
+		chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
+		/* NAND_STATUS_WP 0x00 = protected, 0x80 = not protected */
+		ret = bcmnand_ctrl_poll_status(ctrl,
+					       NAND_CTRL_RDY |
+					       NAND_STATUS_READY |
+					       NAND_STATUS_WP,
+					       NAND_CTRL_RDY |
+					       NAND_STATUS_READY |
+					       (wp ? 0 : NAND_STATUS_WP), 0);
+
+		if (ret)
+			dev_err_ratelimited(&host->pdev->dev,
+					    "nand #WP expected %s\n",
+					    wp ? "on" : "off");
 	}
 }
 
@@ -1157,15 +1212,15 @@ static irqreturn_t brcmnand_dma_irq(int irq, void *data)
 static void brcmnand_send_cmd(struct brcmnand_host *host, int cmd)
 {
 	struct brcmnand_controller *ctrl = host->ctrl;
-	u32 intfc;
+	int ret;
 
 	dev_dbg(ctrl->dev, "send native cmd %d addr_lo 0x%x\n", cmd,
 		brcmnand_read_reg(ctrl, BRCMNAND_CMD_ADDRESS));
 	BUG_ON(ctrl->cmd_pending != 0);
 	ctrl->cmd_pending = cmd;
 
-	intfc = brcmnand_read_reg(ctrl, BRCMNAND_INTFC_STATUS);
-	WARN_ON(!(intfc & INTFC_CTLR_READY));
+	ret = bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY, NAND_CTRL_RDY, 0);
+	WARN_ON(ret);
 
 	mb(); /* flush previous writes */
 	brcmnand_write_reg(ctrl, BRCMNAND_CMD_START,
-- 
cgit v1.2.3


From 6323f474902e372623d73486c1ad30eb40d6604f Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 21 Apr 2017 17:03:36 +0800
Subject: iommu/arm-smmu: Correct sid to mask

From code "SMR mask 0x%x out of range for SMMU", so, we need to use mask, not
sid.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm-smmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index dbd4998661c6..77242afa1efc 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1535,7 +1535,7 @@ static int arm_smmu_add_device(struct device *dev)
 		}
 		if (mask & ~smmu->smr_mask_mask) {
 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
-				sid, smmu->smr_mask_mask);
+				mask, smmu->smr_mask_mask);
 			goto out_free;
 		}
 	}
-- 
cgit v1.2.3


From 6db81643fe41f51cf9f6fd10558b8b323d9198ba Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 25 Apr 2017 11:45:59 -0300
Subject: perf buildid: Move prototypes from util.h to build-id.h

Where they belong.

Link: http://lkml.kernel.org/n/tip-94m3dziejxgo7k0488q3mqjm@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/sdt.c     | 1 -
 tools/perf/util/build-id.h | 4 ++++
 tools/perf/util/util.h     | 3 ---
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
index f73b3c5e125d..06eda675ae2c 100644
--- a/tools/perf/tests/sdt.c
+++ b/tools/perf/tests/sdt.c
@@ -1,7 +1,6 @@
 #include <errno.h>
 #include <stdio.h>
 #include <sys/epoll.h>
-#include <util/util.h>
 #include <util/evlist.h>
 #include <linux/filter.h>
 #include "tests.h"
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index a96081121179..8a89b195c1fc 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -44,6 +44,10 @@ bool build_id_cache__cached(const char *sbuild_id);
 int build_id_cache__add_s(const char *sbuild_id,
 			  const char *name, bool is_kallsyms, bool is_vdso);
 int build_id_cache__remove_s(const char *sbuild_id);
+
+extern char buildid_dir[];
+
+void set_buildid_dir(const char *dir);
 void disable_buildid_cache(void);
 
 #endif
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 3852b6d3270a..e7b6f36d8904 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -14,8 +14,6 @@
 #include <stdarg.h>
 #include <linux/types.h>
 
-extern char buildid_dir[];
-
 #ifdef __GNUC__
 #define NORETURN __attribute__((__noreturn__))
 #else
@@ -36,7 +34,6 @@ void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
 void set_warning_routine(void (*routine)(const char *err, va_list params));
 
 int prefixcmp(const char *str, const char *prefix);
-void set_buildid_dir(const char *dir);
 
 static inline void *zalloc(size_t size)
 {
-- 
cgit v1.2.3


From b3230e80a6d115d2eb50fe3f4794ad04d84766de Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 25 Apr 2017 10:56:19 -0400
Subject: pNFS: Ensure we check layout segment validity in the pg_init()
 callback

If we have a layout segment cached in pgio->pg_lseg, we should check it
for validity before reusing it in a new RPC request. Otherwise, if we
recoalesce, we can end up looping forever.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/filelayout/filelayout.c         |  2 ++
 fs/nfs/flexfilelayout/flexfilelayout.c |  2 ++
 fs/nfs/pnfs.c                          | 13 +++++++++++++
 fs/nfs/pnfs.h                          |  1 +
 4 files changed, 18 insertions(+)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index acd30baca461..d4174da89302 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -933,6 +933,7 @@ static void
 filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 			struct nfs_page *req)
 {
+	pnfs_generic_pg_check_layout(pgio);
 	if (!pgio->pg_lseg) {
 		pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
 						      req->wb_context,
@@ -959,6 +960,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 	struct nfs_commit_info cinfo;
 	int status;
 
+	pnfs_generic_pg_check_layout(pgio);
 	if (!pgio->pg_lseg) {
 		pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
 						      req->wb_context,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 42dedf2d625f..f23b63eb356e 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -846,6 +846,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 	int ds_idx;
 
 retry:
+	pnfs_generic_pg_check_layout(pgio);
 	/* Use full layout for now */
 	if (!pgio->pg_lseg)
 		ff_layout_pg_get_read(pgio, req, false);
@@ -894,6 +895,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 	int status;
 
 retry:
+	pnfs_generic_pg_check_layout(pgio);
 	if (!pgio->pg_lseg) {
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   req->wb_context,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3917a6272789..634adb3f8524 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2072,11 +2072,23 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
 
+void
+pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
+{
+	if (pgio->pg_lseg == NULL ||
+	    test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
+		return;
+	pnfs_put_lseg(pgio->pg_lseg);
+	pgio->pg_lseg = NULL;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
+
 void
 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 {
 	u64 rd_size = req->wb_bytes;
 
+	pnfs_generic_pg_check_layout(pgio);
 	if (pgio->pg_lseg == NULL) {
 		if (pgio->pg_dreq == NULL)
 			rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
@@ -2107,6 +2119,7 @@ void
 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 			   struct nfs_page *req, u64 wb_size)
 {
+	pnfs_generic_pg_check_layout(pgio);
 	if (pgio->pg_lseg == NULL) {
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   req->wb_context,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 825a1b8ddc4f..2d05b756a8d6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -234,6 +234,7 @@ void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);
 
 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *);
 void unset_pnfs_layoutdriver(struct nfs_server *);
+void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio);
 void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
 int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
 void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
-- 
cgit v1.2.3


From 5ab8c689f7c0c97e98b8014b7f0ede386bef5aaf Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 25 Apr 2017 15:30:47 -0300
Subject: perf tools: Move event prototypes from util.h to event.h

More needs to be done to have the actual functions and variables in a
smaller .c file that can then be included in the python binding,
avoiding dragging more stuff into it.

Link: http://lkml.kernel.org/n/tip-uecxz7cqkssouj7tlxrkqpl4@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c          | 1 +
 tools/perf/builtin-top.c          | 2 +-
 tools/perf/builtin-trace.c        | 1 +
 tools/perf/perf.c                 | 1 +
 tools/perf/tests/hists_cumulate.c | 1 +
 tools/perf/tests/hists_filter.c   | 1 +
 tools/perf/tests/hists_output.c   | 1 +
 tools/perf/util/event.c           | 1 +
 tools/perf/util/event.h           | 8 ++++++++
 tools/perf/util/evsel.c           | 1 +
 tools/perf/util/util.h            | 8 --------
 11 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index e33b4acece90..620a467ee304 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -27,6 +27,7 @@
 #include "tool.h"
 #include "data.h"
 #include "sort.h"
+#include "event.h"
 #include "evlist.h"
 #include "evsel.h"
 #include <asm/bug.h>
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 7ab42b8311a1..10b6362ca0bf 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -27,13 +27,13 @@
 #include "util/drv_configs.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/event.h"
 #include "util/machine.h"
 #include "util/session.h"
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/thread_map.h"
 #include "util/top.h"
-#include "util/util.h"
 #include <linux/rbtree.h>
 #include <subcmd/parse-options.h>
 #include "util/parse-events.h"
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index eaa66fb57347..d014350adc52 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -21,6 +21,7 @@
 #include "builtin.h"
 #include "util/color.h"
 #include "util/debug.h"
+#include "util/event.h"
 #include "util/evlist.h"
 #include <subcmd/exec-cmd.h>
 #include "util/machine.h"
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 4cc6960f6226..628a5e412cb1 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -17,6 +17,7 @@
 #include <subcmd/parse-options.h>
 #include "util/bpf-loader.h"
 #include "util/debug.h"
+#include "util/event.h"
 #include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
 #include <errno.h>
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 70918b986568..d549a9f2c41b 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -1,5 +1,6 @@
 #include "perf.h"
 #include "util/debug.h"
+#include "util/event.h"
 #include "util/symbol.h"
 #include "util/sort.h"
 #include "util/evsel.h"
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index f171b2da4899..df9c91f49af1 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -3,6 +3,7 @@
 #include "util/symbol.h"
 #include "util/sort.h"
 #include "util/evsel.h"
+#include "util/event.h"
 #include "util/evlist.h"
 #include "util/machine.h"
 #include "util/thread.h"
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index cdf0dde5fe97..06e5080182d3 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -1,5 +1,6 @@
 #include "perf.h"
 #include "util/debug.h"
+#include "util/event.h"
 #include "util/symbol.h"
 #include "util/sort.h"
 #include "util/evsel.h"
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 142835c0ca0a..437194fe0434 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -8,6 +8,7 @@
 #include <unistd.h>
 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
 #include <api/fs/fs.h>
+#include <linux/perf_event.h>
 #include "event.h"
 #include "debug.h"
 #include "hist.h"
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index db2de6413518..cfbe32bd7413 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -681,4 +681,12 @@ u64 kallsyms__get_function_start(const char *kallsyms_filename,
 void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max);
 void  cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
 			       u16 type, int max);
+
+void event_attr_init(struct perf_event_attr *attr);
+
+int perf_event_paranoid(void);
+
+extern int sysctl_perf_event_max_stack;
+extern int sysctl_perf_event_max_contexts_per_stack;
+
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0e879097adfb..e4f7902d5afa 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -21,6 +21,7 @@
 #include "asm/bug.h"
 #include "callchain.h"
 #include "cgroup.h"
+#include "event.h"
 #include "evsel.h"
 #include "evlist.h"
 #include "util.h"
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index e7b6f36d8904..f87b8948efdc 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -56,17 +56,11 @@ int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 si
 ssize_t readn(int fd, void *buf, size_t n);
 ssize_t writen(int fd, void *buf, size_t n);
 
-struct perf_event_attr;
-
-void event_attr_init(struct perf_event_attr *attr);
-
 size_t hex_width(u64 v);
 int hex2u64(const char *ptr, u64 *val);
 
 extern unsigned int page_size;
 extern int cacheline_size;
-extern int sysctl_perf_event_max_stack;
-extern int sysctl_perf_event_max_contexts_per_stack;
 
 struct parse_tag {
 	char tag;
@@ -75,8 +69,6 @@ struct parse_tag {
 
 unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
 
-int perf_event_paranoid(void);
-
 void mem_bswap_64(void *src, int byte_size);
 void mem_bswap_32(void *src, int byte_size);
 
-- 
cgit v1.2.3


From 98521b3869f8d6b4b9d2fdad8a56059e819ae002 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 25 Apr 2017 15:45:35 -0300
Subject: perf memswap: Split the byteswap memory range wrappers from util.[ch]

Just one more step into splitting util.[ch] to reduce the includes hell.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-navarr9mijkgwgbzu464dwam@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/Build      |  1 +
 tools/perf/util/header.c   |  1 +
 tools/perf/util/intel-pt.c |  1 +
 tools/perf/util/memswap.c  | 24 ++++++++++++++++++++++++
 tools/perf/util/memswap.h  |  7 +++++++
 tools/perf/util/session.c  |  1 +
 tools/perf/util/util.c     | 22 ----------------------
 tools/perf/util/util.h     |  3 ---
 8 files changed, 35 insertions(+), 25 deletions(-)
 create mode 100644 tools/perf/util/memswap.c
 create mode 100644 tools/perf/util/memswap.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 069583bdc670..79dea95a7f68 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -13,6 +13,7 @@ libperf-y += find_bit.o
 libperf-y += kallsyms.o
 libperf-y += levenshtein.o
 libperf-y += llvm-utils.o
+libperf-y += memswap.o
 libperf-y += parse-events.o
 libperf-y += perf_regs.o
 libperf-y += path.o
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 948b2c5efb65..314a07151fb7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -19,6 +19,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "header.h"
+#include "memswap.h"
 #include "../perf.h"
 #include "trace-event.h"
 #include "session.h"
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index bdd4a28c6cee..4c7718f87a08 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -23,6 +23,7 @@
 #include "../perf.h"
 #include "session.h"
 #include "machine.h"
+#include "memswap.h"
 #include "sort.h"
 #include "tool.h"
 #include "event.h"
diff --git a/tools/perf/util/memswap.c b/tools/perf/util/memswap.c
new file mode 100644
index 000000000000..55f7faa8d9ec
--- /dev/null
+++ b/tools/perf/util/memswap.c
@@ -0,0 +1,24 @@
+#include <byteswap.h>
+#include "memswap.h"
+#include <linux/types.h>
+
+void mem_bswap_32(void *src, int byte_size)
+{
+	u32 *m = src;
+	while (byte_size > 0) {
+		*m = bswap_32(*m);
+		byte_size -= sizeof(u32);
+		++m;
+	}
+}
+
+void mem_bswap_64(void *src, int byte_size)
+{
+	u64 *m = src;
+
+	while (byte_size > 0) {
+		*m = bswap_64(*m);
+		byte_size -= sizeof(u64);
+		++m;
+	}
+}
diff --git a/tools/perf/util/memswap.h b/tools/perf/util/memswap.h
new file mode 100644
index 000000000000..7d1b1c34bb57
--- /dev/null
+++ b/tools/perf/util/memswap.h
@@ -0,0 +1,7 @@
+#ifndef PERF_MEMSWAP_H_
+#define PERF_MEMSWAP_H_
+
+void mem_bswap_64(void *src, int byte_size);
+void mem_bswap_32(void *src, int byte_size);
+
+#endif /* PERF_MEMSWAP_H_ */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 3041c6b98191..7dc1096264c5 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -11,6 +11,7 @@
 
 #include "evlist.h"
 #include "evsel.h"
+#include "memswap.h"
 #include "session.h"
 #include "tool.h"
 #include "sort.h"
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 6450c75a6f5b..b460f0db84d1 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -13,7 +13,6 @@
 #include <string.h>
 #include <errno.h>
 #include <limits.h>
-#include <byteswap.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/time64.h>
@@ -372,27 +371,6 @@ int perf_event_paranoid(void)
 	return value;
 }
 
-void mem_bswap_32(void *src, int byte_size)
-{
-	u32 *m = src;
-	while (byte_size > 0) {
-		*m = bswap_32(*m);
-		byte_size -= sizeof(u32);
-		++m;
-	}
-}
-
-void mem_bswap_64(void *src, int byte_size)
-{
-	u64 *m = src;
-
-	while (byte_size > 0) {
-		*m = bswap_64(*m);
-		byte_size -= sizeof(u64);
-		++m;
-	}
-}
-
 bool find_process(const char *name)
 {
 	size_t len = strlen(name);
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index f87b8948efdc..6855c454e5bc 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -69,9 +69,6 @@ struct parse_tag {
 
 unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
 
-void mem_bswap_64(void *src, int byte_size);
-void mem_bswap_32(void *src, int byte_size);
-
 bool find_process(const char *name);
 
 int fetch_kernel_version(unsigned int *puint,
-- 
cgit v1.2.3


From 6aeafd05eca9bc8ab6b03d7e56d09ffd18190f44 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 25 Apr 2017 11:26:53 -0400
Subject: pNFS: Fix use after free issues in pnfs_do_read()

The assumption should be that if the caller returns PNFS_ATTEMPTED, then hdr
has been consumed, and so we should not be testing hdr->task.tk_status.
If the caller returns PNFS_TRY_AGAIN, then we need to recoalesce and
free hdr.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 634adb3f8524..eff266ea813c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2419,10 +2419,20 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 	enum pnfs_try_status trypnfs;
 
 	trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
-	if (trypnfs == PNFS_TRY_AGAIN)
-		pnfs_read_resend_pnfs(hdr);
-	if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status)
+	switch (trypnfs) {
+	case PNFS_NOT_ATTEMPTED:
 		pnfs_read_through_mds(desc, hdr);
+	case PNFS_ATTEMPTED:
+		break;
+	case PNFS_TRY_AGAIN:
+		/* cleanup hdr and prepare to redo pnfs */
+		if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+			struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
+			list_splice_init(&hdr->pages, &mirror->pg_list);
+			mirror->pg_recoalesce = 1;
+		}
+		hdr->mds_ops->rpc_release(hdr);
+	}
 }
 
 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
-- 
cgit v1.2.3


From a6598813a4c5bad76322bee2323dc549e7d7180d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 25 Apr 2017 12:06:00 -0400
Subject: NFS: Don't write back further requests if there is a pending write
 error

If the server has already returned a fatal write error that the user
has not yet received on this file, then don't write back the other pages.
Instead, act as if they have been sent, and have returned with the same
error.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 95ac001b6fdf..2e654940478f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -569,6 +569,18 @@ static void nfs_write_error_remove_page(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
+static bool
+nfs_error_is_fatal_on_server(int err)
+{
+	switch (err) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+		return false;
+	}
+	return nfs_error_is_fatal(err);
+}
+
 /*
  * Find an associated nfs write request, and prepare to flush it out
  * May return an error if the user signalled nfs_wait_on_request().
@@ -591,6 +603,10 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 	WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
 
 	ret = 0;
+	/* If there is a fatal error that covers this write, just exit */
+	if (nfs_error_is_fatal_on_server(req->wb_context->error))
+		goto out_launder;
+
 	if (!nfs_pageio_add_request(pgio, req)) {
 		ret = pgio->pg_error;
 		/*
@@ -600,10 +616,8 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 		 */
 		if (nfs_error_is_fatal(ret)) {
 			nfs_context_set_write_error(req->wb_context, ret);
-			if (launder) {
-				nfs_write_error_remove_page(req);
-				goto out;
-			}
+			if (launder)
+				goto out_launder;
 		}
 		nfs_redirty_request(req);
 		ret = -EAGAIN;
@@ -612,6 +626,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 				NFSIOS_WRITEPAGES, 1);
 out:
 	return ret;
+out_launder:
+	nfs_write_error_remove_page(req);
+	return ret;
 }
 
 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
-- 
cgit v1.2.3


From 9378b274e1eb6925db315e345f48850d2d5d9789 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:22:29 -0400
Subject: xprtrdma: Cancel refresh worker during buffer shutdown

Trying to create MRs while the transport is being torn down can
cause a crash.

Fixes: e2ac236c0b65 ("xprtrdma: Allocate MRs on demand")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3b332b395045..2e4fc1e46947 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1037,6 +1037,7 @@ void
 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
 	cancel_delayed_work_sync(&buf->rb_recovery_worker);
+	cancel_delayed_work_sync(&buf->rb_refresh_worker);
 
 	while (!list_empty(&buf->rb_recv_bufs)) {
 		struct rpcrdma_rep *rep;
-- 
cgit v1.2.3


From e2a4f4fbefc5e5b7b4435f73711b7be94f780584 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:22:38 -0400
Subject: sunrpc: Export xprt_force_disconnect()

xprt_force_disconnect() is already invoked from the socket
transport. I want to invoke xprt_force_disconnect() from the
RPC-over-RDMA transport, which is a separate module from sunrpc.ko.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b530a2852ba8..3e63c5e97ebe 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -651,6 +651,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
 	xprt_wake_pending_tasks(xprt, -EAGAIN);
 	spin_unlock_bh(&xprt->transport_lock);
 }
+EXPORT_SYMBOL_GPL(xprt_force_disconnect);
 
 /**
  * xprt_conditional_disconnect - force a transport to disconnect
-- 
cgit v1.2.3


From 33849792cbcdae2b04819cfb09fe3dca0a84a11e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:22:46 -0400
Subject: xprtrdma: Detect unreachable NFS/RDMA servers more reliably

Current NFS clients rely on connection loss to determine when to
retransmit. In particular, for protocols like NFSv4, clients no
longer rely on RPC timeouts to drive retransmission: NFSv4 servers
are required to terminate a connection when they need a client to
retransmit pending RPCs.

When a server is no longer reachable, either because it has crashed
or because the network path has broken, the server cannot actively
terminate a connection. Thus NFS clients depend on transport-level
keepalive to determine when a connection must be replaced and
pending RPCs retransmitted.

However, RDMA RC connections do not have a native keepalive
mechanism. If an NFS/RDMA server crashes after a client has sent
RPCs successfully (an RC ACK has been received for all OTW RDMA
requests), there is no way for the client to know the connection is
moribund.

In addition, new RDMA requests are subject to the RPC-over-RDMA
credit limit. If the client has consumed all granted credits with
NFS traffic, it is not allowed to send another RDMA request until
the server replies. Thus it has no way to send a true keepalive when
the workload has already consumed all credits with pending RPCs.

To address this, forcibly disconnect a transport when an RPC times
out. This prevents moribund connections from stopping the
detection of failover or other configuration changes on the server.

Note that even if the connection is still good, retransmitting
any RPC will trigger a disconnect thanks to this logic in
xprt_rdma_send_request:

	/* Must suppress retransmit to maintain credits */
	if (req->rl_connect_cookie == xprt->connect_cookie)
		goto drop_connection;
	req->rl_connect_cookie = xprt->connect_cookie;

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/transport.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c717f5410776..acf5d81f4d6e 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -484,6 +484,27 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
 	dprintk("RPC:       %s: %u\n", __func__, port);
 }
 
+/**
+ * xprt_rdma_timer - invoked when an RPC times out
+ * @xprt: controlling RPC transport
+ * @task: RPC task that timed out
+ *
+ * Invoked when the transport is still connected, but an RPC
+ * retransmit timeout occurs.
+ *
+ * Since RDMA connections don't have a keep-alive, forcibly
+ * disconnect and retry to connect. This drives full
+ * detection of the network path, and retransmissions of
+ * all pending RPCs.
+ */
+static void
+xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
+
+	xprt_force_disconnect(xprt);
+}
+
 static void
 xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
@@ -776,6 +797,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
 	.alloc_slot		= xprt_alloc_slot,
 	.release_request	= xprt_release_rqst_cong,       /* ditto */
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def, /* ditto */
+	.timer			= xprt_rdma_timer,
 	.rpcbind		= rpcb_getport_async,	/* sunrpc/rpcb_clnt.c */
 	.set_port		= xprt_rdma_set_port,
 	.connect		= xprt_rdma_connect,
-- 
cgit v1.2.3


From fff09594edf5e9b8595a2cefdc07e54b70f81729 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:22:54 -0400
Subject: xprtrdma: Refactor rpcrdma_ia_open()

In order to unload a device driver and reload it, xprtrdma will need
to close a transport's interface adapter, and then call
rpcrdma_ia_open again, possibly finding a different interface
adapter.

Make rpcrdma_ia_open safe to call on the same transport multiple
times.

This is a refactoring change only.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/transport.c |  6 +++---
 net/sunrpc/xprtrdma/verbs.c     | 46 ++++++++++++++++++++---------------------
 net/sunrpc/xprtrdma/xprt_rdma.h |  7 ++++++-
 3 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index acf5d81f4d6e..83e219d7aba4 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -66,8 +66,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
 unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_inline_write_padding;
-static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
-		int xprt_rdma_pad_optimize = 0;
+unsigned int xprt_rdma_memreg_strategy		= RPCRDMA_FRMR;
+int xprt_rdma_pad_optimize;
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 
@@ -396,7 +396,7 @@ xprt_setup_rdma(struct xprt_create *args)
 
 	new_xprt = rpcx_to_rdmax(xprt);
 
-	rc = rpcrdma_ia_open(new_xprt, sap, xprt_rdma_memreg_strategy);
+	rc = rpcrdma_ia_open(new_xprt, sap);
 	if (rc)
 		goto out1;
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 2e4fc1e46947..d4880d50d7be 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -413,13 +413,16 @@ out:
  * Exported functions.
  */
 
-/*
- * Open and initialize an Interface Adapter.
- *  o initializes fields of struct rpcrdma_ia, including
- *    interface and provider attributes and protection zone.
+/**
+ * rpcrdma_ia_open - Open and initialize an Interface Adapter.
+ * @xprt: controlling transport
+ * @addr: IP address of remote peer
+ *
+ * Returns 0 on success, negative errno if an appropriate
+ * Interface Adapter could not be found and opened.
  */
 int
-rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
 {
 	struct rpcrdma_ia *ia = &xprt->rx_ia;
 	int rc;
@@ -427,7 +430,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
 	if (IS_ERR(ia->ri_id)) {
 		rc = PTR_ERR(ia->ri_id);
-		goto out1;
+		goto out_err;
 	}
 	ia->ri_device = ia->ri_id->device;
 
@@ -435,10 +438,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	if (IS_ERR(ia->ri_pd)) {
 		rc = PTR_ERR(ia->ri_pd);
 		pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
-		goto out2;
+		goto out_err;
 	}
 
-	switch (memreg) {
+	switch (xprt_rdma_memreg_strategy) {
 	case RPCRDMA_FRMR:
 		if (frwr_is_supported(ia)) {
 			ia->ri_ops = &rpcrdma_frwr_memreg_ops;
@@ -452,28 +455,23 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 		}
 		/*FALLTHROUGH*/
 	default:
-		pr_err("rpcrdma: Unsupported memory registration mode: %d\n",
-		       memreg);
+		pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
+		       ia->ri_device->name, xprt_rdma_memreg_strategy);
 		rc = -EINVAL;
-		goto out3;
+		goto out_err;
 	}
 
 	return 0;
 
-out3:
-	ib_dealloc_pd(ia->ri_pd);
-	ia->ri_pd = NULL;
-out2:
-	rpcrdma_destroy_id(ia->ri_id);
-	ia->ri_id = NULL;
-out1:
+out_err:
+	rpcrdma_ia_close(ia);
 	return rc;
 }
 
-/*
- * Clean up/close an IA.
- *   o if event handles and PD have been initialized, free them.
- *   o close the IA
+/**
+ * rpcrdma_ia_close - Clean up/close an IA.
+ * @ia: interface adapter to close
+ *
  */
 void
 rpcrdma_ia_close(struct rpcrdma_ia *ia)
@@ -483,12 +481,14 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
 		if (ia->ri_id->qp)
 			rdma_destroy_qp(ia->ri_id);
 		rpcrdma_destroy_id(ia->ri_id);
-		ia->ri_id = NULL;
 	}
+	ia->ri_id = NULL;
+	ia->ri_device = NULL;
 
 	/* If the pd is still busy, xprtrdma missed freeing a resource */
 	if (ia->ri_pd && !IS_ERR(ia->ri_pd))
 		ib_dealloc_pd(ia->ri_pd);
+	ia->ri_pd = NULL;
 }
 
 /*
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 171a35116de9..af844fc30bd4 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -497,10 +497,15 @@ struct rpcrdma_xprt {
  * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
 extern int xprt_rdma_pad_optimize;
 
+/* This setting controls the hunt for a supported memory
+ * registration strategy.
+ */
+extern unsigned int xprt_rdma_memreg_strategy;
+
 /*
  * Interface Adapter calls - xprtrdma/verbs.c
  */
-int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
+int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
 void rpcrdma_ia_close(struct rpcrdma_ia *);
 bool frwr_is_supported(struct rpcrdma_ia *);
 bool fmr_is_supported(struct rpcrdma_ia *);
-- 
cgit v1.2.3


From 91a10c52975a8c89e146a4f740e64cd147ba8e8a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:23:02 -0400
Subject: xprtrdma: Use same device when mapping or syncing DMA buffers

When the underlying device driver is reloaded, ia->ri_device will be
replaced. All cached copies of that device pointer have to be
updated as well.

Commit 54cbd6b0c6b9 ("xprtrdma: Delay DMA mapping Send and Receive
buffers") added the rg_device field to each regbuf. As part of
handling a device removal, rpcrdma_dma_unmap_regbuf is invoked on
all regbufs for a transport.

Simply calling rpcrdma_dma_map_regbuf for each Receive buffer after
the driver has been reloaded should reinitialize rg_device correctly
for every case except rpcrdma_wc_receive, which still uses
rpcrdma_rep::rr_device.

Ensure the same device that was used to map a Receive buffer is also
used to sync it in rpcrdma_wc_receive by using rg_device there
instead of rr_device.

This is the only use of rr_device, so it can be removed.

The use of regbufs in the send path is also updated, for
completeness.

Fixes: 54cbd6b0c6b9 ("xprtrdma: Delay DMA mapping Send and ... ")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  |  4 ++--
 net/sunrpc/xprtrdma/verbs.c     | 12 ++++++------
 net/sunrpc/xprtrdma/xprt_rdma.h |  7 ++++++-
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a044be2d6ad7..e68131bed3ce 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -494,7 +494,7 @@ rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 	}
 	sge->length = len;
 
-	ib_dma_sync_single_for_device(ia->ri_device, sge->addr,
+	ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
 				      sge->length, DMA_TO_DEVICE);
 	req->rl_send_wr.num_sge++;
 	return true;
@@ -523,7 +523,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 	sge[sge_no].addr = rdmab_addr(rb);
 	sge[sge_no].length = xdr->head[0].iov_len;
 	sge[sge_no].lkey = rdmab_lkey(rb);
-	ib_dma_sync_single_for_device(device, sge[sge_no].addr,
+	ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr,
 				      sge[sge_no].length, DMA_TO_DEVICE);
 
 	/* If there is a Read chunk, the page list is being handled
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index d4880d50d7be..c8813fb2163f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -180,7 +180,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	rep->rr_wc_flags = wc->wc_flags;
 	rep->rr_inv_rkey = wc->ex.invalidate_rkey;
 
-	ib_dma_sync_single_for_cpu(rep->rr_device,
+	ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf),
 				   rdmab_addr(rep->rr_rdmabuf),
 				   rep->rr_len, DMA_FROM_DEVICE);
 
@@ -878,7 +878,6 @@ struct rpcrdma_rep *
 rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_rep *rep;
 	int rc;
 
@@ -894,7 +893,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
 		goto out_free;
 	}
 
-	rep->rr_device = ia->ri_device;
 	rep->rr_cqe.done = rpcrdma_wc_receive;
 	rep->rr_rxprt = r_xprt;
 	INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
@@ -1232,17 +1230,19 @@ rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
 bool
 __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
 {
+	struct ib_device *device = ia->ri_device;
+
 	if (rb->rg_direction == DMA_NONE)
 		return false;
 
-	rb->rg_iov.addr = ib_dma_map_single(ia->ri_device,
+	rb->rg_iov.addr = ib_dma_map_single(device,
 					    (void *)rb->rg_base,
 					    rdmab_length(rb),
 					    rb->rg_direction);
-	if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb)))
+	if (ib_dma_mapping_error(device, rdmab_addr(rb)))
 		return false;
 
-	rb->rg_device = ia->ri_device;
+	rb->rg_device = device;
 	rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
 	return true;
 }
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index af844fc30bd4..9d58260533fc 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -164,6 +164,12 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
 	return (struct rpcrdma_msg *)rb->rg_base;
 }
 
+static inline struct ib_device *
+rdmab_device(struct rpcrdma_regbuf *rb)
+{
+	return rb->rg_device;
+}
+
 #define RPCRDMA_DEF_GFP		(GFP_NOIO | __GFP_NOWARN)
 
 /* To ensure a transport can always make forward progress,
@@ -209,7 +215,6 @@ struct rpcrdma_rep {
 	unsigned int		rr_len;
 	int			rr_wc_flags;
 	u32			rr_inv_rkey;
-	struct ib_device	*rr_device;
 	struct rpcrdma_xprt	*rr_rxprt;
 	struct work_struct	rr_work;
 	struct list_head	rr_list;
-- 
cgit v1.2.3


From bebd031866caa404c522e91bb6fd0c69be04c707 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:23:10 -0400
Subject: xprtrdma: Support unplugging an HCA from under an NFS mount

The device driver for the underlying physical device associated
with an RPC-over-RDMA transport can be removed while RPC-over-RDMA
transports are still in use (ie, while NFS filesystems are still
mounted and active). The IB core performs a connection event upcall
to request that consumers free all RDMA resources associated with
a transport.

There may be pending RPCs when this occurs. Care must be taken to
release associated resources without leaving references that can
trigger a subsequent crash if a signal or soft timeout occurs. We
rely on the caller of the transport's ->close method to ensure that
the previous RPC task has invoked xprt_release but the transport
remains write-locked.

A DEVICE_REMOVE upcall forces a disconnect then sleeps. When ->close
is invoked, it destroys the transport's H/W resources, then wakes
the upcall, which completes and allows the core driver unload to
continue.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=266
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/transport.c | 29 +++++++++++++---
 net/sunrpc/xprtrdma/verbs.c     | 74 ++++++++++++++++++++++++++++++++++++++---
 net/sunrpc/xprtrdma/xprt_rdma.h |  7 ++++
 3 files changed, 101 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 83e219d7aba4..62ecbccd9748 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -457,19 +457,33 @@ out1:
 	return ERR_PTR(rc);
 }
 
-/*
- * Close a connection, during shutdown or timeout/reconnect
+/**
+ * xprt_rdma_close - Close down RDMA connection
+ * @xprt: generic transport to be closed
+ *
+ * Called during transport shutdown reconnect, or device
+ * removal. Caller holds the transport's write lock.
  */
 static void
 xprt_rdma_close(struct rpc_xprt *xprt)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+	dprintk("RPC:       %s: closing xprt %p\n", __func__, xprt);
 
-	dprintk("RPC:       %s: closing\n", __func__);
-	if (r_xprt->rx_ep.rep_connected > 0)
+	if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
+		xprt_clear_connected(xprt);
+		rpcrdma_ia_remove(ia);
+		return;
+	}
+	if (ep->rep_connected == -ENODEV)
+		return;
+	if (ep->rep_connected > 0)
 		xprt->reestablish_timeout = 0;
 	xprt_disconnect_done(xprt);
-	rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+	rpcrdma_ep_disconnect(ep, ia);
 }
 
 static void
@@ -680,6 +694,8 @@ xprt_rdma_free(struct rpc_task *task)
  * xprt_rdma_send_request - marshal and send an RPC request
  * @task: RPC task with an RPC message in rq_snd_buf
  *
+ * Caller holds the transport's write lock.
+ *
  * Return values:
  *        0:	The request has been sent
  * ENOTCONN:	Caller needs to invoke connect logic then call again
@@ -706,6 +722,9 @@ xprt_rdma_send_request(struct rpc_task *task)
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	int rc = 0;
 
+	if (!xprt_connected(xprt))
+		goto drop_connection;
+
 	/* On retransmit, remove any previously registered chunks */
 	if (unlikely(!list_empty(&req->rl_registered)))
 		r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index c8813fb2163f..938fd9e6f308 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -69,6 +69,8 @@
 /*
  * internal functions
  */
+static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
+static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
 
 static struct workqueue_struct *rpcrdma_receive_wq;
 
@@ -262,6 +264,21 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 			__func__, ep);
 		complete(&ia->ri_done);
 		break;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+		pr_info("rpcrdma: removing device for %pIS:%u\n",
+			sap, rpc_get_port(sap));
+#endif
+		set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
+		ep->rep_connected = -ENODEV;
+		xprt_force_disconnect(&xprt->rx_xprt);
+		wait_for_completion(&ia->ri_remove_done);
+
+		ia->ri_id = NULL;
+		ia->ri_pd = NULL;
+		ia->ri_device = NULL;
+		/* Return 1 to ensure the core destroys the id. */
+		return 1;
 	case RDMA_CM_EVENT_ESTABLISHED:
 		connstate = 1;
 		ib_query_qp(ia->ri_id->qp, attr,
@@ -291,9 +308,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		goto connected;
 	case RDMA_CM_EVENT_DISCONNECTED:
 		connstate = -ECONNABORTED;
-		goto connected;
-	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-		connstate = -ENODEV;
 connected:
 		dprintk("RPC:       %s: %sconnected\n",
 					__func__, connstate > 0 ? "" : "dis");
@@ -346,6 +360,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 	int rc;
 
 	init_completion(&ia->ri_done);
+	init_completion(&ia->ri_remove_done);
 
 	id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
 			    IB_QPT_RC);
@@ -468,6 +483,56 @@ out_err:
 	return rc;
 }
 
+/**
+ * rpcrdma_ia_remove - Handle device driver unload
+ * @ia: interface adapter being removed
+ *
+ * Divest transport H/W resources associated with this adapter,
+ * but allow it to be restored later.
+ */
+void
+rpcrdma_ia_remove(struct rpcrdma_ia *ia)
+{
+	struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
+						   rx_ia);
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct rpcrdma_req *req;
+	struct rpcrdma_rep *rep;
+
+	cancel_delayed_work_sync(&buf->rb_refresh_worker);
+
+	/* This is similar to rpcrdma_ep_destroy, but:
+	 * - Don't cancel the connect worker.
+	 * - Don't call rpcrdma_ep_disconnect, which waits
+	 *   for another conn upcall, which will deadlock.
+	 * - rdma_disconnect is unneeded, the underlying
+	 *   connection is already gone.
+	 */
+	if (ia->ri_id->qp) {
+		ib_drain_qp(ia->ri_id->qp);
+		rdma_destroy_qp(ia->ri_id);
+		ia->ri_id->qp = NULL;
+	}
+	ib_free_cq(ep->rep_attr.recv_cq);
+	ib_free_cq(ep->rep_attr.send_cq);
+
+	/* The ULP is responsible for ensuring all DMA
+	 * mappings and MRs are gone.
+	 */
+	list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
+		rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
+	list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
+		rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
+		rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
+		rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
+	}
+	rpcrdma_destroy_mrs(buf);
+
+	/* Allow waiters to continue */
+	complete(&ia->ri_remove_done);
+}
+
 /**
  * rpcrdma_ia_close - Clean up/close an IA.
  * @ia: interface adapter to close
@@ -1080,7 +1145,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
 
 out_nomws:
 	dprintk("RPC:       %s: no MWs available\n", __func__);
-	schedule_delayed_work(&buf->rb_refresh_worker, 0);
+	if (r_xprt->rx_ep.rep_connected != -ENODEV)
+		schedule_delayed_work(&buf->rb_refresh_worker, 0);
 
 	/* Allow the reply handler and refresh worker to run */
 	cond_resched();
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 9d58260533fc..1c5de1af195b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -69,6 +69,7 @@ struct rpcrdma_ia {
 	struct rdma_cm_id 	*ri_id;
 	struct ib_pd		*ri_pd;
 	struct completion	ri_done;
+	struct completion	ri_remove_done;
 	int			ri_async_rc;
 	unsigned int		ri_max_segs;
 	unsigned int		ri_max_frmr_depth;
@@ -78,10 +79,15 @@ struct rpcrdma_ia {
 	bool			ri_reminv_expected;
 	bool			ri_implicit_roundup;
 	enum ib_mr_type		ri_mrtype;
+	unsigned long		ri_flags;
 	struct ib_qp_attr	ri_qp_attr;
 	struct ib_qp_init_attr	ri_qp_init_attr;
 };
 
+enum {
+	RPCRDMA_IAF_REMOVING = 0,
+};
+
 /*
  * RDMA Endpoint -- one per transport instance
  */
@@ -511,6 +517,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
  * Interface Adapter calls - xprtrdma/verbs.c
  */
 int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
+void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
 void rpcrdma_ia_close(struct rpcrdma_ia *);
 bool frwr_is_supported(struct rpcrdma_ia *);
 bool fmr_is_supported(struct rpcrdma_ia *);
-- 
cgit v1.2.3


From 1890896b4e01e3858db71fa55a8edf85e15f9946 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:23:18 -0400
Subject: xprtrdma: Refactor rpcrdma_ep_connect

I'm about to add another arm to

    if (ep->rep_connected != 0)

It will be cleaner to use a switch statement here. We'll be looking
for a couple of specific errnos, or "anything else," basically to
sort out the difference between a normal reconnect and recovery from
device removal.

This is a refactoring change only.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 109 +++++++++++++++++++++++++-------------------
 1 file changed, 63 insertions(+), 46 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 938fd9e6f308..6479ad3fe69d 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -711,6 +711,57 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 	ib_free_cq(ep->rep_attr.send_cq);
 }
 
+static int
+rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
+		     struct rpcrdma_ia *ia)
+{
+	struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
+	struct rdma_cm_id *id, *old;
+	int err, rc;
+
+	dprintk("RPC:       %s: reconnecting...\n", __func__);
+
+	rpcrdma_ep_disconnect(ep, ia);
+
+	rc = -EHOSTUNREACH;
+	id = rpcrdma_create_id(r_xprt, ia, sap);
+	if (IS_ERR(id))
+		goto out;
+
+	/* As long as the new ID points to the same device as the
+	 * old ID, we can reuse the transport's existing PD and all
+	 * previously allocated MRs. Also, the same device means
+	 * the transport's previous DMA mappings are still valid.
+	 *
+	 * This is a sanity check only. There should be no way these
+	 * point to two different devices here.
+	 */
+	old = id;
+	rc = -ENETUNREACH;
+	if (ia->ri_device != id->device) {
+		pr_err("rpcrdma: can't reconnect on different device!\n");
+		goto out_destroy;
+	}
+
+	err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+	if (err) {
+		dprintk("RPC:       %s: rdma_create_qp returned %d\n",
+			__func__, err);
+		goto out_destroy;
+	}
+
+	/* Atomically replace the transport's ID and QP. */
+	rc = 0;
+	old = ia->ri_id;
+	ia->ri_id = id;
+	rdma_destroy_qp(old);
+
+out_destroy:
+	rpcrdma_destroy_id(old);
+out:
+	return rc;
+}
+
 /*
  * Connect unconnected endpoint.
  */
@@ -719,61 +770,25 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 {
 	struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
 						   rx_ia);
-	struct rdma_cm_id *id, *old;
-	struct sockaddr *sap;
 	unsigned int extras;
-	int rc = 0;
+	int rc;
 
-	if (ep->rep_connected != 0) {
 retry:
-		dprintk("RPC:       %s: reconnecting...\n", __func__);
-
-		rpcrdma_ep_disconnect(ep, ia);
-
-		sap = (struct sockaddr *)&r_xprt->rx_data.addr;
-		id = rpcrdma_create_id(r_xprt, ia, sap);
-		if (IS_ERR(id)) {
-			rc = -EHOSTUNREACH;
-			goto out;
-		}
-		/* TEMP TEMP TEMP - fail if new device:
-		 * Deregister/remarshal *all* requests!
-		 * Close and recreate adapter, pd, etc!
-		 * Re-determine all attributes still sane!
-		 * More stuff I haven't thought of!
-		 * Rrrgh!
-		 */
-		if (ia->ri_device != id->device) {
-			printk("RPC:       %s: can't reconnect on "
-				"different device!\n", __func__);
-			rpcrdma_destroy_id(id);
-			rc = -ENETUNREACH;
-			goto out;
-		}
-		/* END TEMP */
-		rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
-		if (rc) {
-			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
-				__func__, rc);
-			rpcrdma_destroy_id(id);
-			rc = -ENETUNREACH;
-			goto out;
-		}
-
-		old = ia->ri_id;
-		ia->ri_id = id;
-
-		rdma_destroy_qp(old);
-		rpcrdma_destroy_id(old);
-	} else {
+	switch (ep->rep_connected) {
+	case 0:
 		dprintk("RPC:       %s: connecting...\n", __func__);
 		rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
 		if (rc) {
 			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
 				__func__, rc);
-			/* do not update ep->rep_connected */
-			return -ENETUNREACH;
+			rc = -ENETUNREACH;
+			goto out_noupdate;
 		}
+		break;
+	default:
+		rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
+		if (rc)
+			goto out;
 	}
 
 	ep->rep_connected = 0;
@@ -801,6 +816,8 @@ retry:
 out:
 	if (rc)
 		ep->rep_connected = rc;
+
+out_noupdate:
 	return rc;
 }
 
-- 
cgit v1.2.3


From a9b0e381caa965bc9b00dad1e593c502f48f8c3d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:23:26 -0400
Subject: xprtrdma: Restore transport after device removal

After a device removal, enable the transport connect worker to
restore normal operation if there is another device with
connectivity to the server.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 48 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 6479ad3fe69d..b69daa30874f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -69,6 +69,7 @@
 /*
  * internal functions
  */
+static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
 static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
 static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
 
@@ -711,6 +712,48 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 	ib_free_cq(ep->rep_attr.send_cq);
 }
 
+/* Re-establish a connection after a device removal event.
+ * Unlike a normal reconnection, a fresh PD and a new set
+ * of MRs and buffers is needed.
+ */
+static int
+rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
+			 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+	struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
+	int rc, err;
+
+	pr_info("%s: r_xprt = %p\n", __func__, r_xprt);
+
+	rc = -EHOSTUNREACH;
+	if (rpcrdma_ia_open(r_xprt, sap))
+		goto out1;
+
+	rc = -ENOMEM;
+	err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data);
+	if (err) {
+		pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
+		goto out2;
+	}
+
+	rc = -ENETUNREACH;
+	err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+	if (err) {
+		pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
+		goto out3;
+	}
+
+	rpcrdma_create_mrs(r_xprt);
+	return 0;
+
+out3:
+	rpcrdma_ep_destroy(ep, ia);
+out2:
+	rpcrdma_ia_close(ia);
+out1:
+	return rc;
+}
+
 static int
 rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
 		     struct rpcrdma_ia *ia)
@@ -785,6 +828,11 @@ retry:
 			goto out_noupdate;
 		}
 		break;
+	case -ENODEV:
+		rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
+		if (rc)
+			goto out_noupdate;
+		break;
 	default:
 		rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
 		if (rc)
-- 
cgit v1.2.3


From 56a6bd154ddd60f7d01d6c0a5c272c1224d16b46 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:23:34 -0400
Subject: xprtrdma: Revert commit d0f36c46deea

Device removal is now adequately supported. Pinning the underlying
device driver to prevent removal while an NFS mount is active is no
longer necessary.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 33 +++++++--------------------------
 1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index b69daa30874f..d7c16005491e 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -53,7 +53,7 @@
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc_rdma.h>
 #include <asm/bitops.h>
-#include <linux/module.h> /* try_module_get()/module_put() */
+
 #include <rdma/ib_cm.h>
 
 #include "xprt_rdma.h"
@@ -344,14 +344,6 @@ connected:
 	return 0;
 }
 
-static void rpcrdma_destroy_id(struct rdma_cm_id *id)
-{
-	if (id) {
-		module_put(id->device->owner);
-		rdma_destroy_id(id);
-	}
-}
-
 static struct rdma_cm_id *
 rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 			struct rpcrdma_ia *ia, struct sockaddr *addr)
@@ -386,16 +378,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 		goto out;
 	}
 
-	/* FIXME:
-	 * Until xprtrdma supports DEVICE_REMOVAL, the provider must
-	 * be pinned while there are active NFS/RDMA mounts to prevent
-	 * hangs and crashes at umount time.
-	 */
-	if (!ia->ri_async_rc && !try_module_get(id->device->owner)) {
-		dprintk("RPC:       %s: Failed to get device module\n",
-			__func__);
-		ia->ri_async_rc = -ENODEV;
-	}
 	rc = ia->ri_async_rc;
 	if (rc)
 		goto out;
@@ -405,21 +387,20 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 	if (rc) {
 		dprintk("RPC:       %s: rdma_resolve_route() failed %i\n",
 			__func__, rc);
-		goto put;
+		goto out;
 	}
 	rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
 	if (rc < 0) {
 		dprintk("RPC:       %s: wait() exited: %i\n",
 			__func__, rc);
-		goto put;
+		goto out;
 	}
 	rc = ia->ri_async_rc;
 	if (rc)
-		goto put;
+		goto out;
 
 	return id;
-put:
-	module_put(id->device->owner);
+
 out:
 	rdma_destroy_id(id);
 	return ERR_PTR(rc);
@@ -546,7 +527,7 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
 	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
 		if (ia->ri_id->qp)
 			rdma_destroy_qp(ia->ri_id);
-		rpcrdma_destroy_id(ia->ri_id);
+		rdma_destroy_id(ia->ri_id);
 	}
 	ia->ri_id = NULL;
 	ia->ri_device = NULL;
@@ -800,7 +781,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
 	rdma_destroy_qp(old);
 
 out_destroy:
-	rpcrdma_destroy_id(old);
+	rdma_destroy_id(old);
 out:
 	return rc;
 }
-- 
cgit v1.2.3


From 7d7fa9b5509649ee1732fde0c643bea53f8db364 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:23:43 -0400
Subject: xprtrdma: Annotate receive workqueue

Micro-optimize the receive workqueue by marking it's anchor "read-
mostly."

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index d7c16005491e..3dbce9ac4327 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -73,7 +73,7 @@ static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
 static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
 static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
 
-static struct workqueue_struct *rpcrdma_receive_wq;
+static struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
 
 int
 rpcrdma_alloc_wq(void)
-- 
cgit v1.2.3


From 0031e47c76ec5ce5b4f64f2750a535a36040c82b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:23:51 -0400
Subject: xprtrdma: Squelch ENOBUFS warnings

When ro_map is out of buffers, that's not a permanent error, so
don't report a problem.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e68131bed3ce..694e9b13ecf0 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -781,9 +781,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	return 0;
 
 out_err:
-	pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
-	       PTR_ERR(iptr));
-	r_xprt->rx_stats.failed_marshal_count++;
+	if (PTR_ERR(iptr) != -ENOBUFS) {
+		pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
+		       PTR_ERR(iptr));
+		r_xprt->rx_stats.failed_marshal_count++;
+	}
 	return PTR_ERR(iptr);
 }
 
-- 
cgit v1.2.3


From 7ecce75fc3c52590950bc5e68eac0f00e50e8ce4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:23:59 -0400
Subject: sunrpc: Fix xdr_init_decode_pages() documenting comment

Clean up.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 1f7082144e01..e34f4ee7f2b6 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -807,7 +807,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
 /**
- * xdr_init_decode - Initialize an xdr_stream for decoding data.
+ * xdr_init_decode_pages - Initialize an xdr_stream for decoding into pages
  * @xdr: pointer to xdr_stream struct
  * @buf: pointer to XDR buffer from which to decode data
  * @pages: list of pages to decode into
-- 
cgit v1.2.3


From 2be1fce95e5b017dd7d23ca039d58cbefd0221e6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Apr 2017 13:24:07 -0400
Subject: xprtrdma: Remove rpcrdma_buffer::rb_pool

Since commit 1e465fd4ff47 ("xprtrdma: Replace send and receive
arrays"), this field is no longer used.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/xprt_rdma.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 1c5de1af195b..1d66acf1a723 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -391,7 +391,6 @@ struct rpcrdma_buffer {
 	spinlock_t		rb_mwlock;	/* protect rb_mws list */
 	struct list_head	rb_mws;
 	struct list_head	rb_all;
-	char			*rb_pool;
 
 	spinlock_t		rb_lock;	/* protect buf lists */
 	int			rb_send_count, rb_recv_count;
-- 
cgit v1.2.3


From bb3393d5b3594a6f9c2e670ec59a2bb8daf92ccc Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 25 Apr 2017 16:25:06 -0400
Subject: NFSv3: nfs3_nlm_alloc_call should be declared static

Fix compiler warnings.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs3proc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ce06ae0a25cf..0c07b567118d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -865,7 +865,7 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
 }
 
-void nfs3_nlm_alloc_call(void *data)
+static void nfs3_nlm_alloc_call(void *data)
 {
 	struct nfs_lock_context *l_ctx = data;
 	if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) {
@@ -874,7 +874,7 @@ void nfs3_nlm_alloc_call(void *data)
 	}
 }
 
-bool nfs3_nlm_unlock_prepare(struct rpc_task *task, void *data)
+static bool nfs3_nlm_unlock_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs_lock_context *l_ctx = data;
 	if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags))
@@ -883,7 +883,7 @@ bool nfs3_nlm_unlock_prepare(struct rpc_task *task, void *data)
 
 }
 
-void nfs3_nlm_release_call(void *data)
+static void nfs3_nlm_release_call(void *data)
 {
 	struct nfs_lock_context *l_ctx = data;
 	struct nfs_open_context *ctx;
-- 
cgit v1.2.3


From 51f567777799c9d85a778302b9eb61cf15214a98 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 6 Apr 2017 22:36:31 -0400
Subject: nfsd: check for oversized NFSv2/v3 arguments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A client can append random data to the end of an NFSv2 or NFSv3 RPC call
without our complaining; we'll just stop parsing at the end of the
expected data and ignore the rest.

Encoded arguments and replies are stored together in an array of pages,
and if a call is too large it could leave inadequate space for the
reply.  This is normally OK because NFS RPC's typically have either
short arguments and long replies (like READ) or long arguments and short
replies (like WRITE).  But a client that sends an incorrectly long reply
can violate those assumptions.  This was observed to cause crashes.

So, insist that the argument not be any longer than we expect.

Also, several operations increment rq_next_page in the decode routine
before checking the argument size, which can leave rq_next_page pointing
well past the end of the page array, causing trouble later in
svc_free_pages.

As followup we may also want to rewrite the encoding routines to check
more carefully that they aren't running off the end of the page array.

Reported-by: Tuomas Haanpää <thaan@synopsys.com>
Reported-by: Ari Kauppi <ari@synopsys.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3xdr.c          | 23 +++++++++++++++++------
 fs/nfsd/nfsxdr.c           | 13 ++++++++++---
 include/linux/sunrpc/svc.h |  3 +--
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 452334694a5d..12feac6ee2fd 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -334,8 +334,11 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	if (!p)
 		return 0;
 	p = xdr_decode_hyper(p, &args->offset);
-
 	args->count = ntohl(*p++);
+
+	if (!xdr_argsize_check(rqstp, p))
+		return 0;
+
 	len = min(args->count, max_blocksize);
 
 	/* set up the kvec */
@@ -349,7 +352,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 		v++;
 	}
 	args->vlen = v;
-	return xdr_argsize_check(rqstp, p);
+	return 1;
 }
 
 int
@@ -541,9 +544,11 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
+	if (!xdr_argsize_check(rqstp, p))
+		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return xdr_argsize_check(rqstp, p);
+	return 1;
 }
 
 int
@@ -569,10 +574,14 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	args->verf   = p; p += 2;
 	args->dircount = ~0;
 	args->count  = ntohl(*p++);
+
+	if (!xdr_argsize_check(rqstp, p))
+		return 0;
+
 	args->count  = min_t(u32, args->count, PAGE_SIZE);
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return xdr_argsize_check(rqstp, p);
+	return 1;
 }
 
 int
@@ -590,6 +599,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 	args->dircount = ntohl(*p++);
 	args->count    = ntohl(*p++);
 
+	if (!xdr_argsize_check(rqstp, p))
+		return 0;
+
 	len = args->count = min(args->count, max_blocksize);
 	while (len > 0) {
 		struct page *p = *(rqstp->rq_next_page++);
@@ -597,8 +609,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 			args->buffer = page_address(p);
 		len -= PAGE_SIZE;
 	}
-
-	return xdr_argsize_check(rqstp, p);
+	return 1;
 }
 
 int
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index de07ff625777..6a4947a3f4fa 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -257,6 +257,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	len = args->count     = ntohl(*p++);
 	p++; /* totalcount - unused */
 
+	if (!xdr_argsize_check(rqstp, p))
+		return 0;
+
 	len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
 
 	/* set up somewhere to store response.
@@ -272,7 +275,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 		v++;
 	}
 	args->vlen = v;
-	return xdr_argsize_check(rqstp, p);
+	return 1;
 }
 
 int
@@ -362,9 +365,11 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
+	if (!xdr_argsize_check(rqstp, p))
+		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return xdr_argsize_check(rqstp, p);
+	return 1;
 }
 
 int
@@ -402,9 +407,11 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	args->cookie = ntohl(*p++);
 	args->count  = ntohl(*p++);
 	args->count  = min_t(u32, args->count, PAGE_SIZE);
+	if (!xdr_argsize_check(rqstp, p))
+		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return xdr_argsize_check(rqstp, p);
+	return 1;
 }
 
 /*
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index e770abeed32d..6ef19cf658b4 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -336,8 +336,7 @@ xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p)
 {
 	char *cp = (char *)p;
 	struct kvec *vec = &rqstp->rq_arg.head[0];
-	return cp >= (char*)vec->iov_base
-		&& cp <= (char*)vec->iov_base + vec->iov_len;
+	return cp == (char *)vec->iov_base + vec->iov_len;
 }
 
 static inline int
-- 
cgit v1.2.3


From 16719199a43f0740113041fb34a0854b1d7f2111 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Wed, 1 Mar 2017 03:12:03 +0300
Subject: uapi: fix linux/nfsd/cld.h userspace compilation errors

Include <linux/types.h> and consistently use types it provides
to fix the following linux/nfsd/cld.h userspace compilation errors:

/usr/include/linux/nfsd/cld.h:40:2: error: unknown type name 'uint16_t'
  uint16_t cn_len;    /* length of cm_id */
/usr/include/linux/nfsd/cld.h:46:2: error: unknown type name 'uint8_t'
  uint8_t  cm_vers;  /* upcall version */
/usr/include/linux/nfsd/cld.h:47:2: error: unknown type name 'uint8_t'
  uint8_t  cm_cmd;   /* upcall command */
/usr/include/linux/nfsd/cld.h:48:2: error: unknown type name 'int16_t'
  int16_t  cm_status;  /* return code */
/usr/include/linux/nfsd/cld.h:49:2: error: unknown type name 'uint32_t'
  uint32_t cm_xid;   /* transaction id */
/usr/include/linux/nfsd/cld.h:51:3: error: unknown type name 'int64_t'
   int64_t  cm_gracetime; /* grace period start time */

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/uapi/linux/nfsd/cld.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/nfsd/cld.h b/include/uapi/linux/nfsd/cld.h
index f14a9ab06f1f..ec260274be0c 100644
--- a/include/uapi/linux/nfsd/cld.h
+++ b/include/uapi/linux/nfsd/cld.h
@@ -22,6 +22,8 @@
 #ifndef _NFSD_CLD_H
 #define _NFSD_CLD_H
 
+#include <linux/types.h>
+
 /* latest upcall version available */
 #define CLD_UPCALL_VERSION 1
 
@@ -37,18 +39,18 @@ enum cld_command {
 
 /* representation of long-form NFSv4 client ID */
 struct cld_name {
-	uint16_t	cn_len;				/* length of cm_id */
+	__u16		cn_len;				/* length of cm_id */
 	unsigned char	cn_id[NFS4_OPAQUE_LIMIT];	/* client-provided */
 } __attribute__((packed));
 
 /* message struct for communication with userspace */
 struct cld_msg {
-	uint8_t		cm_vers;		/* upcall version */
-	uint8_t		cm_cmd;			/* upcall command */
-	int16_t		cm_status;		/* return code */
-	uint32_t	cm_xid;			/* transaction id */
+	__u8		cm_vers;		/* upcall version */
+	__u8		cm_cmd;			/* upcall command */
+	__s16		cm_status;		/* return code */
+	__u32		cm_xid;			/* transaction id */
 	union {
-		int64_t		cm_gracetime;	/* grace period start time */
+		__s64		cm_gracetime;	/* grace period start time */
 		struct cld_name	cm_name;
 	} __attribute__((packed)) cm_u;
 } __attribute__((packed));
-- 
cgit v1.2.3


From 2f10fdcb6a1c5f26246339d3b606131fae483c44 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 23 Mar 2017 16:57:36 +0800
Subject: nfsd4: remove pointless strdup_if_nonnull

kstrdup() already checks for NULL.

(Brought to our attention by Jason Yann noticing (from sparse output)
that it should have been declared static.)

Signed-off-by: NeilBrown <neilb@suse.com>
Reported-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e9ef50addddb..22002fb75a18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1912,28 +1912,15 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
 	target->cl_clientid.cl_id = source->cl_clientid.cl_id; 
 }
 
-int strdup_if_nonnull(char **target, char *source)
-{
-	if (source) {
-		*target = kstrdup(source, GFP_KERNEL);
-		if (!*target)
-			return -ENOMEM;
-	} else
-		*target = NULL;
-	return 0;
-}
-
 static int copy_cred(struct svc_cred *target, struct svc_cred *source)
 {
-	int ret;
+	target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL);
+	target->cr_raw_principal = kstrdup(source->cr_raw_principal,
+								GFP_KERNEL);
+	if ((source->cr_principal && ! target->cr_principal) ||
+	    (source->cr_raw_principal && ! target->cr_raw_principal))
+		return -ENOMEM;
 
-	ret = strdup_if_nonnull(&target->cr_principal, source->cr_principal);
-	if (ret)
-		return ret;
-	ret = strdup_if_nonnull(&target->cr_raw_principal,
-					source->cr_raw_principal);
-	if (ret)
-		return ret;
 	target->cr_flavor = source->cr_flavor;
 	target->cr_uid = source->cr_uid;
 	target->cr_gid = source->cr_gid;
-- 
cgit v1.2.3


From 99bbf6ecc694dfe0b026e15359c5aa2a60b97a93 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 15 Mar 2017 12:40:44 +1100
Subject: NFS: don't try to cross a mountpount when there isn't one there.

consider the sequence of commands:
 mkdir -p /import/nfs /import/bind /import/etc
 mount --bind / /import/bind
 mount --make-private /import/bind
 mount --bind /import/etc /import/bind/etc

 exportfs -o rw,no_root_squash,crossmnt,async,no_subtree_check localhost:/
 mount -o vers=4 localhost:/ /import/nfs
 ls -l /import/nfs/etc

You would not expect this to report a stale file handle.
Yet it does.

The manipulations under /import/bind cause the dentry for
/etc to get the DCACHE_MOUNTED flag set, even though nothing
is mounted on /etc.  This causes nfsd to call
nfsd_cross_mnt() even though there is no mountpoint.  So an
upcall to mountd for "/etc" is performed.

The 'crossmnt' flag on the export of / causes mountd to
report that /etc is exported as it is a descendant of /.  It
assumes the kernel wouldn't ask about something that wasn't
a mountpoint.  The filehandle returned identifies the
filesystem and the inode number of /etc.

When this filehandle is presented to rpc.mountd, via
"nfsd.fh", the inode cannot be found associated with any
name in /etc/exports, or with any mountpoint listed by
getmntent().  So rpc.mountd says the filehandle doesn't
exist. Hence ESTALE.

This is fixed by teaching nfsd not to trust DCACHE_MOUNTED
too much.  It is just a hint, not a guarantee.
Change nfsd_mountpoint() to return '1' for a certain mountpoint,
'2' for a possible mountpoint, and 0 otherwise.

Then change nfsd_crossmnt() to check if follow_down()
actually found a mountpount and, if not, to avoid performing
a lookup if the location is not known to certainly require
an export-point.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 19d50f600e8d..04cafaa94bf7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -94,6 +94,12 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 	err = follow_down(&path);
 	if (err < 0)
 		goto out;
+	if (path.mnt == exp->ex_path.mnt && path.dentry == dentry &&
+	    nfsd_mountpoint(dentry, exp) == 2) {
+		/* This is only a mountpoint in some other namespace */
+		path_put(&path);
+		goto out;
+	}
 
 	exp2 = rqst_exp_get_by_name(rqstp, &path);
 	if (IS_ERR(exp2)) {
@@ -167,16 +173,26 @@ static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, st
 /*
  * For nfsd purposes, we treat V4ROOT exports as though there was an
  * export at *every* directory.
+ * We return:
+ * '1' if this dentry *must* be an export point,
+ * '2' if it might be, if there is really a mount here, and
+ * '0' if there is no chance of an export point here.
  */
 int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
 {
-	if (d_mountpoint(dentry))
+	if (!d_inode(dentry))
+		return 0;
+	if (exp->ex_flags & NFSEXP_V4ROOT)
 		return 1;
 	if (nfsd4_is_junction(dentry))
 		return 1;
-	if (!(exp->ex_flags & NFSEXP_V4ROOT))
-		return 0;
-	return d_inode(dentry) != NULL;
+	if (d_mountpoint(dentry))
+		/*
+		 * Might only be a mountpoint in a different namespace,
+		 * but we need to check.
+		 */
+		return 2;
+	return 0;
 }
 
 __be32
-- 
cgit v1.2.3


From 17f5f7f506aaca985b95df7ef7fc2ff49c36a8e9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:05:36 -0400
Subject: svcrdma: Move send_wr to svc_rdma_op_ctxt

Clean up: Move the ib_send_wr off the stack, and move common code
to post a Send Work Request into a helper.

This is a refactoring change only.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h            |  4 ++
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 11 +----
 net/sunrpc/xprtrdma/svc_rdma_sendto.c      | 64 ++++++++++++++++++------------
 3 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index b105f73e3ca2..287db5c179d8 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -85,6 +85,7 @@ struct svc_rdma_op_ctxt {
 	enum dma_data_direction direction;
 	int count;
 	unsigned int mapped_sges;
+	struct ib_send_wr send_wr;
 	struct ib_sge sge[RPCSVC_MAXPAGES];
 	struct page *pages[RPCSVC_MAXPAGES];
 };
@@ -227,6 +228,9 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
 /* svc_rdma_sendto.c */
 extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
 			    struct svc_rdma_req_map *, bool);
+extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
+				 struct svc_rdma_op_ctxt *ctxt,
+				 int num_sge, u32 inv_rkey);
 extern int svc_rdma_sendto(struct svc_rqst *);
 extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
 				int);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index ff1df40f0d26..f12f39c189c3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -104,7 +104,6 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 	struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
 	struct svc_rdma_op_ctxt *ctxt;
 	struct svc_rdma_req_map *vec;
-	struct ib_send_wr send_wr;
 	int ret;
 
 	vec = svc_rdma_get_req_map(rdma);
@@ -132,15 +131,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 	}
 	svc_rdma_count_mappings(rdma, ctxt);
 
-	memset(&send_wr, 0, sizeof(send_wr));
-	ctxt->cqe.done = svc_rdma_wc_send;
-	send_wr.wr_cqe = &ctxt->cqe;
-	send_wr.sg_list = ctxt->sge;
-	send_wr.num_sge = 1;
-	send_wr.opcode = IB_WR_SEND;
-	send_wr.send_flags = IB_SEND_SIGNALED;
-
-	ret = svc_rdma_send(rdma, &send_wr);
+	ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
 	if (ret) {
 		ret = -EIO;
 		goto out_unmap;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 515221b16d09..f90b40d0932f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -435,6 +435,43 @@ out_err:
 	return -EIO;
 }
 
+/**
+ * svc_rdma_post_send_wr - Set up and post one Send Work Request
+ * @rdma: controlling transport
+ * @ctxt: op_ctxt for transmitting the Send WR
+ * @num_sge: number of SGEs to send
+ * @inv_rkey: R_key argument to Send With Invalidate, or zero
+ *
+ * Returns:
+ *	%0 if the Send* was posted successfully,
+ *	%-ENOTCONN if the connection was lost or dropped,
+ *	%-EINVAL if there was a problem with the Send we built,
+ *	%-ENOMEM if ib_post_send failed.
+ */
+int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
+			  struct svc_rdma_op_ctxt *ctxt, int num_sge,
+			  u32 inv_rkey)
+{
+	struct ib_send_wr *send_wr = &ctxt->send_wr;
+
+	dprintk("svcrdma: posting Send WR with %u sge(s)\n", num_sge);
+
+	send_wr->next = NULL;
+	ctxt->cqe.done = svc_rdma_wc_send;
+	send_wr->wr_cqe = &ctxt->cqe;
+	send_wr->sg_list = ctxt->sge;
+	send_wr->num_sge = num_sge;
+	send_wr->send_flags = IB_SEND_SIGNALED;
+	if (inv_rkey) {
+		send_wr->opcode = IB_WR_SEND_WITH_INV;
+		send_wr->ex.invalidate_rkey = inv_rkey;
+	} else {
+		send_wr->opcode = IB_WR_SEND;
+	}
+
+	return svc_rdma_send(rdma, send_wr);
+}
+
 /* This function prepares the portion of the RPCRDMA message to be
  * sent in the RDMA_SEND. This function is called after data sent via
  * RDMA has already been transmitted. There are three cases:
@@ -460,7 +497,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		      u32 inv_rkey)
 {
 	struct svc_rdma_op_ctxt *ctxt;
-	struct ib_send_wr send_wr;
 	u32 xdr_off;
 	int sge_no;
 	int sge_bytes;
@@ -524,19 +560,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		pr_err("svcrdma: Too many sges (%d)\n", sge_no);
 		goto err;
 	}
-	memset(&send_wr, 0, sizeof send_wr);
-	ctxt->cqe.done = svc_rdma_wc_send;
-	send_wr.wr_cqe = &ctxt->cqe;
-	send_wr.sg_list = ctxt->sge;
-	send_wr.num_sge = sge_no;
-	if (inv_rkey) {
-		send_wr.opcode = IB_WR_SEND_WITH_INV;
-		send_wr.ex.invalidate_rkey = inv_rkey;
-	} else
-		send_wr.opcode = IB_WR_SEND;
-	send_wr.send_flags =  IB_SEND_SIGNALED;
 
-	ret = svc_rdma_send(rdma, &send_wr);
+	ret = svc_rdma_post_send_wr(rdma, ctxt, sge_no, inv_rkey);
 	if (ret)
 		goto err;
 
@@ -652,7 +677,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 			 int status)
 {
-	struct ib_send_wr err_wr;
 	struct page *p;
 	struct svc_rdma_op_ctxt *ctxt;
 	enum rpcrdma_errcode err;
@@ -692,17 +716,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	}
 	svc_rdma_count_mappings(xprt, ctxt);
 
-	/* Prepare SEND WR */
-	memset(&err_wr, 0, sizeof(err_wr));
-	ctxt->cqe.done = svc_rdma_wc_send;
-	err_wr.wr_cqe = &ctxt->cqe;
-	err_wr.sg_list = ctxt->sge;
-	err_wr.num_sge = 1;
-	err_wr.opcode = IB_WR_SEND;
-	err_wr.send_flags = IB_SEND_SIGNALED;
-
-	/* Post It */
-	ret = svc_rdma_send(xprt, &err_wr);
+	ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0);
 	if (ret) {
 		dprintk("svcrdma: Error %d posting send for protocol error\n",
 			ret);
-- 
cgit v1.2.3


From 6e6092ca305ad785c605d7e313727aad96c228a5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:05:44 -0400
Subject: svcrdma: Add svc_rdma_map_reply_hdr()

Introduce a helper to DMA-map a reply's transport header before
sending it. This will in part replace the map vector cache.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h            |  3 ++
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 36 ++++++------------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c      | 61 +++++++++++++++++++++++-------
 3 files changed, 62 insertions(+), 38 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 287db5c179d8..002a46d1faa1 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -228,6 +228,9 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
 /* svc_rdma_sendto.c */
 extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
 			    struct svc_rdma_req_map *, bool);
+extern int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma,
+				  struct svc_rdma_op_ctxt *ctxt,
+				  __be32 *rdma_resp, unsigned int len);
 extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
 				 struct svc_rdma_op_ctxt *ctxt,
 				 int num_sge, u32 inv_rkey);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index f12f39c189c3..0305b33d482f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -101,50 +101,36 @@ out_notfound:
 static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 			      struct rpc_rqst *rqst)
 {
-	struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
 	struct svc_rdma_op_ctxt *ctxt;
-	struct svc_rdma_req_map *vec;
 	int ret;
 
-	vec = svc_rdma_get_req_map(rdma);
-	ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false);
-	if (ret)
+	ctxt = svc_rdma_get_context(rdma);
+
+	/* rpcrdma_bc_send_request builds the transport header and
+	 * the backchannel RPC message in the same buffer. Thus only
+	 * one SGE is needed to send both.
+	 */
+	ret = svc_rdma_map_reply_hdr(rdma, ctxt, rqst->rq_buffer,
+				     rqst->rq_snd_buf.len);
+	if (ret < 0)
 		goto out_err;
 
 	ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
 	if (ret)
 		goto out_err;
 
-	ctxt = svc_rdma_get_context(rdma);
-	ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
-	ctxt->count = 1;
-
-	ctxt->direction = DMA_TO_DEVICE;
-	ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
-	ctxt->sge[0].length = sndbuf->len;
-	ctxt->sge[0].addr =
-	    ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
-			    sndbuf->len, DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
-		ret = -EIO;
-		goto out_unmap;
-	}
-	svc_rdma_count_mappings(rdma, ctxt);
-
 	ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
-	if (ret) {
-		ret = -EIO;
+	if (ret)
 		goto out_unmap;
-	}
 
 out_err:
-	svc_rdma_put_req_map(rdma, vec);
 	dprintk("svcrdma: %s returns %d\n", __func__, ret);
 	return ret;
 
 out_unmap:
 	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_context(ctxt, 1);
+	ret = -EIO;
 	goto out_err;
 }
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index f90b40d0932f..a7dc71daa776 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -217,6 +217,49 @@ static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
 	return 0;
 }
 
+static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
+				 struct svc_rdma_op_ctxt *ctxt,
+				 unsigned int sge_no,
+				 struct page *page,
+				 unsigned int offset,
+				 unsigned int len)
+{
+	struct ib_device *dev = rdma->sc_cm_id->device;
+	dma_addr_t dma_addr;
+
+	dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(dev, dma_addr))
+		return -EIO;
+
+	ctxt->sge[sge_no].addr = dma_addr;
+	ctxt->sge[sge_no].length = len;
+	ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
+	svc_rdma_count_mappings(rdma, ctxt);
+	return 0;
+}
+
+/**
+ * svc_rdma_map_reply_hdr - DMA map the transport header buffer
+ * @rdma: controlling transport
+ * @ctxt: op_ctxt for the Send WR
+ * @rdma_resp: buffer containing transport header
+ * @len: length of transport header
+ *
+ * Returns:
+ *	%0 if the header is DMA mapped,
+ *	%-EIO if DMA mapping failed.
+ */
+int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma,
+			   struct svc_rdma_op_ctxt *ctxt,
+			   __be32 *rdma_resp,
+			   unsigned int len)
+{
+	ctxt->direction = DMA_TO_DEVICE;
+	ctxt->pages[0] = virt_to_page(rdma_resp);
+	ctxt->count = 1;
+	return svc_rdma_dma_map_page(rdma, ctxt, 0, ctxt->pages[0], 0, len);
+}
+
 /* Assumptions:
  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  */
@@ -699,22 +742,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 		err = ERR_VERS;
 	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
 
+	/* Map transport header; no RPC message payload */
 	ctxt = svc_rdma_get_context(xprt);
-	ctxt->direction = DMA_TO_DEVICE;
-	ctxt->count = 1;
-	ctxt->pages[0] = p;
-
-	/* Prepare SGE for local address */
-	ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
-	ctxt->sge[0].length = length;
-	ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
-					    p, 0, length, DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
-		dprintk("svcrdma: Error mapping buffer for protocol error\n");
-		svc_rdma_put_context(ctxt, 1);
+	ret = svc_rdma_map_reply_hdr(xprt, ctxt, &rmsgp->rm_xid, length);
+	if (ret) {
+		dprintk("svcrdma: Error %d mapping send for protocol error\n",
+			ret);
 		return;
 	}
-	svc_rdma_count_mappings(xprt, ctxt);
 
 	ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0);
 	if (ret) {
-- 
cgit v1.2.3


From b623589dbacbc786c2fffc85113a1dc1a331e2ca Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:05:52 -0400
Subject: svcrdma: Eliminate RPCRDMA_SQ_DEPTH_MULT

The Send Queue depth is temporarily reduced to 1 SQE per credit. The
new rdma_rw API does an internal computation, during QP creation, to
increase the depth of the Send Queue to handle RDMA Read and Write
operations.

This change has to come before the NFSD code paths are updated to
use the rdma_rw API. Without this patch, rdma_rw_init_qp() increases
the size of the SQ too much, resulting in memory allocation failures
during QP creation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          | 1 -
 net/sunrpc/xprtrdma/svc_rdma.c           | 2 --
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +-
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 002a46d1faa1..11d5aa123f17 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -182,7 +182,6 @@ struct svcxprt_rdma {
 /* The default ORD value is based on two outstanding full-size writes with a
  * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ.  */
 #define RPCRDMA_ORD             (64/4)
-#define RPCRDMA_SQ_DEPTH_MULT   8
 #define RPCRDMA_MAX_REQUESTS    32
 #define RPCRDMA_MAX_REQ_SIZE    4096
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index c846ca9f1eba..912444174647 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -247,8 +247,6 @@ int svc_rdma_init(void)
 	dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
 	dprintk("\tsvcrdma_ord      : %d\n", svcrdma_ord);
 	dprintk("\tmax_requests     : %u\n", svcrdma_max_requests);
-	dprintk("\tsq_depth         : %u\n",
-		svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
 	dprintk("\tmax_bc_requests  : %u\n", svcrdma_max_bc_requests);
 	dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fc8f14c7bfec..e1097cc6d1eb 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1014,7 +1014,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 					    svcrdma_max_bc_requests);
 	newxprt->sc_rq_depth = newxprt->sc_max_requests +
 			       newxprt->sc_max_bc_requests;
-	newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+	newxprt->sc_sq_depth = newxprt->sc_rq_depth;
 	atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
 
 	if (!svc_rdma_prealloc_ctxts(newxprt))
-- 
cgit v1.2.3


From c55ab0707b2817046ad48d6c87a6b764119a2458 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:06:00 -0400
Subject: svcrdma: Add helper to save pages under I/O

Clean up: extract the logic to save pages under I/O into a helper to
add a big documenting comment without adding clutter in the send
path.

This is a refactoring change only.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index a7dc71daa776..2798f3ea0020 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -478,6 +478,23 @@ out_err:
 	return -EIO;
 }
 
+/* The svc_rqst and all resources it owns are released as soon as
+ * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
+ * so they are released by the Send completion handler.
+ */
+static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
+				   struct svc_rdma_op_ctxt *ctxt)
+{
+	int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
+
+	ctxt->count += pages;
+	for (i = 0; i < pages; i++) {
+		ctxt->pages[i + 1] = rqstp->rq_respages[i];
+		rqstp->rq_respages[i] = NULL;
+	}
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
+}
+
 /**
  * svc_rdma_post_send_wr - Set up and post one Send Work Request
  * @rdma: controlling transport
@@ -543,8 +560,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	u32 xdr_off;
 	int sge_no;
 	int sge_bytes;
-	int page_no;
-	int pages;
 	int ret = -EIO;
 
 	/* Prepare the context */
@@ -587,17 +602,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		goto err;
 	}
 
-	/* Save all respages in the ctxt and remove them from the
-	 * respages array. They are our pages until the I/O
-	 * completes.
-	 */
-	pages = rqstp->rq_next_page - rqstp->rq_respages;
-	for (page_no = 0; page_no < pages; page_no++) {
-		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
-		ctxt->count++;
-		rqstp->rq_respages[page_no] = NULL;
-	}
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
+	svc_rdma_save_io_pages(rqstp, ctxt);
 
 	if (sge_no > rdma->sc_max_sge) {
 		pr_err("svcrdma: Too many sges (%d)\n", sge_no);
-- 
cgit v1.2.3


From c238c4c034f857d12d7efbf9934d96b8bb68fbc7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:06:08 -0400
Subject: svcrdma: Clean up svc_rdma_get_inv_rkey()

Replace C structure-based XDR decoding with more portable code that
instead uses pointer arithmetic.

This is a refactoring change only.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 39 +++++++++++++++--------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 2798f3ea0020..2eb3df698e11 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -189,32 +189,25 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
  * Invalidate, and responder chooses one rkey to invalidate.
  *
  * Find a candidate rkey to invalidate when sending a reply.  Picks the
- * first rkey it finds in the chunks lists.
+ * first R_key it finds in the chunk lists.
  *
  * Returns zero if RPC's chunk lists are empty.
  */
-static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
-				 struct rpcrdma_write_array *wr_ary,
-				 struct rpcrdma_write_array *rp_ary)
+static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
+				 __be32 *wr_lst, __be32 *rp_ch)
 {
-	struct rpcrdma_read_chunk *rd_ary;
-	struct rpcrdma_segment *arg_ch;
-
-	rd_ary = (struct rpcrdma_read_chunk *)&rdma_argp->rm_body.rm_chunks[0];
-	if (rd_ary->rc_discrim != xdr_zero)
-		return be32_to_cpu(rd_ary->rc_target.rs_handle);
-
-	if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
-		arg_ch = &wr_ary->wc_array[0].wc_target;
-		return be32_to_cpu(arg_ch->rs_handle);
-	}
-
-	if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
-		arg_ch = &rp_ary->wc_array[0].wc_target;
-		return be32_to_cpu(arg_ch->rs_handle);
-	}
+	__be32 *p;
 
-	return 0;
+	p = rdma_argp + rpcrdma_fixed_maxsz;
+	if (*p != xdr_zero)
+		p += 2;
+	else if (wr_lst && be32_to_cpup(wr_lst + 1))
+		p = wr_lst + 2;
+	else if (rp_ch && be32_to_cpup(rp_ch + 1))
+		p = rp_ch + 2;
+	else
+		return 0;
+	return be32_to_cpup(p);
 }
 
 static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
@@ -650,7 +643,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 
 	inv_rkey = 0;
 	if (rdma->sc_snd_w_inv)
-		inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
+		inv_rkey = svc_rdma_get_inv_rkey(&rdma_argp->rm_xid,
+						 (__be32 *)wr_ary,
+						 (__be32 *)rp_ary);
 
 	/* Build an req vec for the XDR */
 	vec = svc_rdma_get_req_map(rdma);
-- 
cgit v1.2.3


From f13193f50b64e2e0c87706b838d6b9895626a892 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:06:16 -0400
Subject: svcrdma: Introduce local rdma_rw API helpers

The plan is to replace the local bespoke code that constructs and
posts RDMA Read and Write Work Requests with calls to the rdma_rw
API. This shares code with other RDMA-enabled ULPs that manages the
gory details of buffer registration and posting Work Requests.

Some design notes:

 o The structure of RPC-over-RDMA transport headers is flexible,
   allowing multiple segments per Reply with arbitrary alignment,
   each with a unique R_key. Write and Send WRs continue to be
   built and posted in separate code paths. However, one whole
   chunk (with one or more RDMA segments apiece) gets exactly
   one ib_post_send and one work completion.

 o svc_xprt reference counting is modified, since a chain of
   rdma_rw_ctx structs generates one completion, no matter how
   many Write WRs are posted.

 o The current code builds the transport header as it is construct-
   ing Write WRs. I've replaced that with marshaling of transport
   header data items in a separate step. This is because the exact
   structure of client-provided segments may not align with the
   components of the server's reply xdr_buf, or the pages in the
   page list. Thus parts of each client-provided segment may be
   written at different points in the send path.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  11 +
 net/sunrpc/Kconfig                       |   1 +
 net/sunrpc/xprtrdma/Makefile             |   2 +-
 net/sunrpc/xprtrdma/svc_rdma_rw.c        | 512 +++++++++++++++++++++++++++++++
 net/sunrpc/xprtrdma/svc_rdma_transport.c |   4 +
 5 files changed, 529 insertions(+), 1 deletion(-)
 create mode 100644 net/sunrpc/xprtrdma/svc_rdma_rw.c

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 11d5aa123f17..ca08671fb7e2 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -145,12 +145,15 @@ struct svcxprt_rdma {
 	u32		     sc_max_requests;	/* Max requests */
 	u32		     sc_max_bc_requests;/* Backward credits */
 	int                  sc_max_req_size;	/* Size of each RQ WR buf */
+	u8		     sc_port_num;
 
 	struct ib_pd         *sc_pd;
 
 	spinlock_t	     sc_ctxt_lock;
 	struct list_head     sc_ctxts;
 	int		     sc_ctxt_used;
+	spinlock_t	     sc_rw_ctxt_lock;
+	struct list_head     sc_rw_ctxts;
 	spinlock_t	     sc_map_lock;
 	struct list_head     sc_maps;
 
@@ -224,6 +227,14 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
 				struct svc_rdma_op_ctxt *, int *, u32 *,
 				u32, u32, u64, bool);
 
+/* svc_rdma_rw.c */
+extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
+extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
+				     __be32 *wr_ch, struct xdr_buf *xdr);
+extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
+				     __be32 *rp_ch, bool writelist,
+				     struct xdr_buf *xdr);
+
 /* svc_rdma_sendto.c */
 extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
 			    struct svc_rdma_req_map *, bool);
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 04ce2c0b660e..ac09ca803296 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -52,6 +52,7 @@ config SUNRPC_XPRT_RDMA
 	tristate "RPC-over-RDMA transport"
 	depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
 	default SUNRPC && INFINIBAND
+	select SG_POOL
 	help
 	  This option allows the NFS client and server to use RDMA
 	  transports (InfiniBand, iWARP, or RoCE).
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index ef19fa42c50f..c1ae8142ab73 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -4,5 +4,5 @@ rpcrdma-y := transport.o rpc_rdma.o verbs.o \
 	fmr_ops.o frwr_ops.o \
 	svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
 	svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
-	module.o
+	svc_rdma_rw.o module.o
 rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
new file mode 100644
index 000000000000..0cf620277693
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2016 Oracle.  All rights reserved.
+ *
+ * Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
+ */
+
+#include <linux/sunrpc/rpc_rdma.h>
+#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/debug.h>
+
+#include <rdma/rw.h>
+
+#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
+
+/* Each R/W context contains state for one chain of RDMA Read or
+ * Write Work Requests.
+ *
+ * Each WR chain handles a single contiguous server-side buffer,
+ * because scatterlist entries after the first have to start on
+ * page alignment. xdr_buf iovecs cannot guarantee alignment.
+ *
+ * Each WR chain handles only one R_key. Each RPC-over-RDMA segment
+ * from a client may contain a unique R_key, so each WR chain moves
+ * up to one segment at a time.
+ *
+ * The scatterlist makes this data structure over 4KB in size. To
+ * make it less likely to fail, and to handle the allocation for
+ * smaller I/O requests without disabling bottom-halves, these
+ * contexts are created on demand, but cached and reused until the
+ * controlling svcxprt_rdma is destroyed.
+ */
+struct svc_rdma_rw_ctxt {
+	struct list_head	rw_list;
+	struct rdma_rw_ctx	rw_ctx;
+	int			rw_nents;
+	struct sg_table		rw_sg_table;
+	struct scatterlist	rw_first_sgl[0];
+};
+
+static inline struct svc_rdma_rw_ctxt *
+svc_rdma_next_ctxt(struct list_head *list)
+{
+	return list_first_entry_or_null(list, struct svc_rdma_rw_ctxt,
+					rw_list);
+}
+
+static struct svc_rdma_rw_ctxt *
+svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
+{
+	struct svc_rdma_rw_ctxt *ctxt;
+
+	spin_lock(&rdma->sc_rw_ctxt_lock);
+
+	ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts);
+	if (ctxt) {
+		list_del(&ctxt->rw_list);
+		spin_unlock(&rdma->sc_rw_ctxt_lock);
+	} else {
+		spin_unlock(&rdma->sc_rw_ctxt_lock);
+		ctxt = kmalloc(sizeof(*ctxt) +
+			       SG_CHUNK_SIZE * sizeof(struct scatterlist),
+			       GFP_KERNEL);
+		if (!ctxt)
+			goto out;
+		INIT_LIST_HEAD(&ctxt->rw_list);
+	}
+
+	ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
+	if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
+				   ctxt->rw_sg_table.sgl)) {
+		kfree(ctxt);
+		ctxt = NULL;
+	}
+out:
+	return ctxt;
+}
+
+static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
+				 struct svc_rdma_rw_ctxt *ctxt)
+{
+	sg_free_table_chained(&ctxt->rw_sg_table, true);
+
+	spin_lock(&rdma->sc_rw_ctxt_lock);
+	list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts);
+	spin_unlock(&rdma->sc_rw_ctxt_lock);
+}
+
+/**
+ * svc_rdma_destroy_rw_ctxts - Free accumulated R/W contexts
+ * @rdma: transport about to be destroyed
+ *
+ */
+void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
+{
+	struct svc_rdma_rw_ctxt *ctxt;
+
+	while ((ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts)) != NULL) {
+		list_del(&ctxt->rw_list);
+		kfree(ctxt);
+	}
+}
+
+/* A chunk context tracks all I/O for moving one Read or Write
+ * chunk. This is a a set of rdma_rw's that handle data movement
+ * for all segments of one chunk.
+ *
+ * These are small, acquired with a single allocator call, and
+ * no more than one is needed per chunk. They are allocated on
+ * demand, and not cached.
+ */
+struct svc_rdma_chunk_ctxt {
+	struct ib_cqe		cc_cqe;
+	struct svcxprt_rdma	*cc_rdma;
+	struct list_head	cc_rwctxts;
+	int			cc_sqecount;
+	enum dma_data_direction cc_dir;
+};
+
+static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
+			     struct svc_rdma_chunk_ctxt *cc,
+			     enum dma_data_direction dir)
+{
+	cc->cc_rdma = rdma;
+	svc_xprt_get(&rdma->sc_xprt);
+
+	INIT_LIST_HEAD(&cc->cc_rwctxts);
+	cc->cc_sqecount = 0;
+	cc->cc_dir = dir;
+}
+
+static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc)
+{
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_rdma_rw_ctxt *ctxt;
+
+	while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
+		list_del(&ctxt->rw_list);
+
+		rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
+				    rdma->sc_port_num, ctxt->rw_sg_table.sgl,
+				    ctxt->rw_nents, cc->cc_dir);
+		svc_rdma_put_rw_ctxt(rdma, ctxt);
+	}
+	svc_xprt_put(&rdma->sc_xprt);
+}
+
+/* State for sending a Write or Reply chunk.
+ *  - Tracks progress of writing one chunk over all its segments
+ *  - Stores arguments for the SGL constructor functions
+ */
+struct svc_rdma_write_info {
+	/* write state of this chunk */
+	unsigned int		wi_seg_off;
+	unsigned int		wi_seg_no;
+	unsigned int		wi_nsegs;
+	__be32			*wi_segs;
+
+	/* SGL constructor arguments */
+	struct xdr_buf		*wi_xdr;
+	unsigned char		*wi_base;
+	unsigned int		wi_next_off;
+
+	struct svc_rdma_chunk_ctxt	wi_cc;
+};
+
+static struct svc_rdma_write_info *
+svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk)
+{
+	struct svc_rdma_write_info *info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return info;
+
+	info->wi_seg_off = 0;
+	info->wi_seg_no = 0;
+	info->wi_nsegs = be32_to_cpup(++chunk);
+	info->wi_segs = ++chunk;
+	svc_rdma_cc_init(rdma, &info->wi_cc, DMA_TO_DEVICE);
+	return info;
+}
+
+static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
+{
+	svc_rdma_cc_release(&info->wi_cc);
+	kfree(info);
+}
+
+/**
+ * svc_rdma_write_done - Write chunk completion
+ * @cq: controlling Completion Queue
+ * @wc: Work Completion
+ *
+ * Pages under I/O are freed by a subsequent Send completion.
+ */
+static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_chunk_ctxt *cc =
+			container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_rdma_write_info *info =
+			container_of(cc, struct svc_rdma_write_info, wi_cc);
+
+	atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+	wake_up(&rdma->sc_send_wait);
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			pr_err("svcrdma: write ctx: %s (%u/0x%x)\n",
+			       ib_wc_status_msg(wc->status),
+			       wc->status, wc->vendor_err);
+	}
+
+	svc_rdma_write_info_free(info);
+}
+
+/* This function sleeps when the transport's Send Queue is congested.
+ *
+ * Assumptions:
+ * - If ib_post_send() succeeds, only one completion is expected,
+ *   even if one or more WRs are flushed. This is true when posting
+ *   an rdma_rw_ctx or when posting a single signaled WR.
+ */
+static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
+{
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_xprt *xprt = &rdma->sc_xprt;
+	struct ib_send_wr *first_wr, *bad_wr;
+	struct list_head *tmp;
+	struct ib_cqe *cqe;
+	int ret;
+
+	first_wr = NULL;
+	cqe = &cc->cc_cqe;
+	list_for_each(tmp, &cc->cc_rwctxts) {
+		struct svc_rdma_rw_ctxt *ctxt;
+
+		ctxt = list_entry(tmp, struct svc_rdma_rw_ctxt, rw_list);
+		first_wr = rdma_rw_ctx_wrs(&ctxt->rw_ctx, rdma->sc_qp,
+					   rdma->sc_port_num, cqe, first_wr);
+		cqe = NULL;
+	}
+
+	do {
+		if (atomic_sub_return(cc->cc_sqecount,
+				      &rdma->sc_sq_avail) > 0) {
+			ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+			if (ret)
+				break;
+			return 0;
+		}
+
+		atomic_inc(&rdma_stat_sq_starve);
+		atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+		wait_event(rdma->sc_send_wait,
+			   atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
+	} while (1);
+
+	pr_err("svcrdma: ib_post_send failed (%d)\n", ret);
+	set_bit(XPT_CLOSE, &xprt->xpt_flags);
+
+	/* If even one was posted, there will be a completion. */
+	if (bad_wr != first_wr)
+		return 0;
+
+	atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+	wake_up(&rdma->sc_send_wait);
+	return -ENOTCONN;
+}
+
+/* Build and DMA-map an SGL that covers one kvec in an xdr_buf
+ */
+static void svc_rdma_vec_to_sg(struct svc_rdma_write_info *info,
+			       unsigned int len,
+			       struct svc_rdma_rw_ctxt *ctxt)
+{
+	struct scatterlist *sg = ctxt->rw_sg_table.sgl;
+
+	sg_set_buf(&sg[0], info->wi_base, len);
+	info->wi_base += len;
+
+	ctxt->rw_nents = 1;
+}
+
+/* Build and DMA-map an SGL that covers part of an xdr_buf's pagelist.
+ */
+static void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info,
+				    unsigned int remaining,
+				    struct svc_rdma_rw_ctxt *ctxt)
+{
+	unsigned int sge_no, sge_bytes, page_off, page_no;
+	struct xdr_buf *xdr = info->wi_xdr;
+	struct scatterlist *sg;
+	struct page **page;
+
+	page_off = (info->wi_next_off + xdr->page_base) & ~PAGE_MASK;
+	page_no = (info->wi_next_off + xdr->page_base) >> PAGE_SHIFT;
+	page = xdr->pages + page_no;
+	info->wi_next_off += remaining;
+	sg = ctxt->rw_sg_table.sgl;
+	sge_no = 0;
+	do {
+		sge_bytes = min_t(unsigned int, remaining,
+				  PAGE_SIZE - page_off);
+		sg_set_page(sg, *page, sge_bytes, page_off);
+
+		remaining -= sge_bytes;
+		sg = sg_next(sg);
+		page_off = 0;
+		sge_no++;
+		page++;
+	} while (remaining);
+
+	ctxt->rw_nents = sge_no;
+}
+
+/* Construct RDMA Write WRs to send a portion of an xdr_buf containing
+ * an RPC Reply.
+ */
+static int
+svc_rdma_build_writes(struct svc_rdma_write_info *info,
+		      void (*constructor)(struct svc_rdma_write_info *info,
+					  unsigned int len,
+					  struct svc_rdma_rw_ctxt *ctxt),
+		      unsigned int remaining)
+{
+	struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_rdma_rw_ctxt *ctxt;
+	__be32 *seg;
+	int ret;
+
+	cc->cc_cqe.done = svc_rdma_write_done;
+	seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz;
+	do {
+		unsigned int write_len;
+		u32 seg_length, seg_handle;
+		u64 seg_offset;
+
+		if (info->wi_seg_no >= info->wi_nsegs)
+			goto out_overflow;
+
+		seg_handle = be32_to_cpup(seg);
+		seg_length = be32_to_cpup(seg + 1);
+		xdr_decode_hyper(seg + 2, &seg_offset);
+		seg_offset += info->wi_seg_off;
+
+		write_len = min(remaining, seg_length - info->wi_seg_off);
+		ctxt = svc_rdma_get_rw_ctxt(rdma,
+					    (write_len >> PAGE_SHIFT) + 2);
+		if (!ctxt)
+			goto out_noctx;
+
+		constructor(info, write_len, ctxt);
+		ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp,
+				       rdma->sc_port_num, ctxt->rw_sg_table.sgl,
+				       ctxt->rw_nents, 0, seg_offset,
+				       seg_handle, DMA_TO_DEVICE);
+		if (ret < 0)
+			goto out_initerr;
+
+		list_add(&ctxt->rw_list, &cc->cc_rwctxts);
+		cc->cc_sqecount += ret;
+		if (write_len == seg_length - info->wi_seg_off) {
+			seg += 4;
+			info->wi_seg_no++;
+			info->wi_seg_off = 0;
+		} else {
+			info->wi_seg_off += write_len;
+		}
+		remaining -= write_len;
+	} while (remaining);
+
+	return 0;
+
+out_overflow:
+	dprintk("svcrdma: inadequate space in Write chunk (%u)\n",
+		info->wi_nsegs);
+	return -E2BIG;
+
+out_noctx:
+	dprintk("svcrdma: no R/W ctxs available\n");
+	return -ENOMEM;
+
+out_initerr:
+	svc_rdma_put_rw_ctxt(rdma, ctxt);
+	pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
+	return -EIO;
+}
+
+/* Send one of an xdr_buf's kvecs by itself. To send a Reply
+ * chunk, the whole RPC Reply is written back to the client.
+ * This function writes either the head or tail of the xdr_buf
+ * containing the Reply.
+ */
+static int svc_rdma_send_xdr_kvec(struct svc_rdma_write_info *info,
+				  struct kvec *vec)
+{
+	info->wi_base = vec->iov_base;
+	return svc_rdma_build_writes(info, svc_rdma_vec_to_sg,
+				     vec->iov_len);
+}
+
+/* Send an xdr_buf's page list by itself. A Write chunk is
+ * just the page list. a Reply chunk is the head, page list,
+ * and tail. This function is shared between the two types
+ * of chunk.
+ */
+static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
+				      struct xdr_buf *xdr)
+{
+	info->wi_xdr = xdr;
+	info->wi_next_off = 0;
+	return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg,
+				     xdr->page_len);
+}
+
+/**
+ * svc_rdma_send_write_chunk - Write all segments in a Write chunk
+ * @rdma: controlling RDMA transport
+ * @wr_ch: Write chunk provided by client
+ * @xdr: xdr_buf containing the data payload
+ *
+ * Returns a non-negative number of bytes the chunk consumed, or
+ *	%-E2BIG if the payload was larger than the Write chunk,
+ *	%-ENOMEM if rdma_rw context pool was exhausted,
+ *	%-ENOTCONN if posting failed (connection is lost),
+ *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ */
+int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
+			      struct xdr_buf *xdr)
+{
+	struct svc_rdma_write_info *info;
+	int ret;
+
+	if (!xdr->page_len)
+		return 0;
+
+	info = svc_rdma_write_info_alloc(rdma, wr_ch);
+	if (!info)
+		return -ENOMEM;
+
+	ret = svc_rdma_send_xdr_pagelist(info, xdr);
+	if (ret < 0)
+		goto out_err;
+
+	ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
+	if (ret < 0)
+		goto out_err;
+	return xdr->page_len;
+
+out_err:
+	svc_rdma_write_info_free(info);
+	return ret;
+}
+
+/**
+ * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk
+ * @rdma: controlling RDMA transport
+ * @rp_ch: Reply chunk provided by client
+ * @writelist: true if client provided a Write list
+ * @xdr: xdr_buf containing an RPC Reply
+ *
+ * Returns a non-negative number of bytes the chunk consumed, or
+ *	%-E2BIG if the payload was larger than the Reply chunk,
+ *	%-ENOMEM if rdma_rw context pool was exhausted,
+ *	%-ENOTCONN if posting failed (connection is lost),
+ *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ */
+int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
+			      bool writelist, struct xdr_buf *xdr)
+{
+	struct svc_rdma_write_info *info;
+	int consumed, ret;
+
+	info = svc_rdma_write_info_alloc(rdma, rp_ch);
+	if (!info)
+		return -ENOMEM;
+
+	ret = svc_rdma_send_xdr_kvec(info, &xdr->head[0]);
+	if (ret < 0)
+		goto out_err;
+	consumed = xdr->head[0].iov_len;
+
+	/* Send the page list in the Reply chunk only if the
+	 * client did not provide Write chunks.
+	 */
+	if (!writelist && xdr->page_len) {
+		ret = svc_rdma_send_xdr_pagelist(info, xdr);
+		if (ret < 0)
+			goto out_err;
+		consumed += xdr->page_len;
+	}
+
+	if (xdr->tail[0].iov_len) {
+		ret = svc_rdma_send_xdr_kvec(info, &xdr->tail[0]);
+		if (ret < 0)
+			goto out_err;
+		consumed += xdr->tail[0].iov_len;
+	}
+
+	ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
+	if (ret < 0)
+		goto out_err;
+	return consumed;
+
+out_err:
+	svc_rdma_write_info_free(info);
+	return ret;
+}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e1097cc6d1eb..b25c50992a95 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -561,6 +561,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
+	INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
 	INIT_LIST_HEAD(&cma_xprt->sc_maps);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
@@ -568,6 +569,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
 	spin_lock_init(&cma_xprt->sc_frmr_q_lock);
 	spin_lock_init(&cma_xprt->sc_ctxt_lock);
+	spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
 	spin_lock_init(&cma_xprt->sc_map_lock);
 
 	/*
@@ -999,6 +1001,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 		newxprt, newxprt->sc_cm_id);
 
 	dev = newxprt->sc_cm_id->device;
+	newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
 
 	/* Qualify the transport resource defaults with the
 	 * capabilities of this particular device */
@@ -1248,6 +1251,7 @@ static void __svc_rdma_free(struct work_struct *work)
 	}
 
 	rdma_dealloc_frmr_q(rdma);
+	svc_rdma_destroy_rw_ctxts(rdma);
 	svc_rdma_destroy_ctxts(rdma);
 	svc_rdma_destroy_maps(rdma);
 
-- 
cgit v1.2.3


From 9a6a180b7867ceceeeab88a6f011bac23174b939 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:06:25 -0400
Subject: svcrdma: Use rdma_rw API in RPC reply path

The current svcrdma sendto code path posts one RDMA Write WR at a
time. Each of these Writes typically carries a small number of pages
(for instance, up to 30 pages for mlx4 devices). That means a 1MB
NFS READ reply requires 9 ib_post_send() calls for the Write WRs,
and one for the Send WR carrying the actual RPC Reply message.

Instead, use the new rdma_rw API. The details of Write WR chain
construction and memory registration are taken care of in the RDMA
core. svcrdma can focus on the details of the RPC-over-RDMA
protocol. This gives three main benefits:

1. All Write WRs for one RDMA segment are posted in a single chain.
As few as one ib_post_send() for each Write chunk.

2. The Write path can now use FRWR to register the Write buffers.
If the device's maximum page list depth is large, this means a
single Write WR is needed for each RPC's Write chunk data.

3. The new code introduces support for RPCs that carry both a Write
list and a Reply chunk. This combination can be used for an NFSv4
READ where the data payload is large, and thus is removed from the
Payload Stream, but the Payload Stream is still larger than the
inline threshold.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h            |   1 -
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |   6 +-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c      | 696 ++++++++++++++---------------
 net/sunrpc/xprtrdma/svc_rdma_transport.c   |   2 +
 4 files changed, 350 insertions(+), 355 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index ca08671fb7e2..599ee03ee3fb 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -212,7 +212,6 @@ extern int svc_rdma_xdr_decode_req(struct xdr_buf *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
 				     struct rpcrdma_msg *,
 				     enum rpcrdma_errcode, __be32 *);
-extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
 extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
 extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
 					    __be32, __be64, u32);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 0305b33d482f..bf185b79c98f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -90,9 +90,9 @@ out_notfound:
  * Caller holds the connection's mutex and has already marshaled
  * the RPC/RDMA request.
  *
- * This is similar to svc_rdma_reply, but takes an rpc_rqst
- * instead, does not support chunks, and avoids blocking memory
- * allocation.
+ * This is similar to svc_rdma_send_reply_msg, but takes a struct
+ * rpc_rqst instead, does not support chunks, and avoids blocking
+ * memory allocation.
  *
  * XXX: There is still an opportunity to block in svc_rdma_send()
  * if there are no SQ entries to post the Send. This may occur if
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 2eb3df698e11..ce62b78e5bc9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2016 Oracle. All rights reserved.
  * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
@@ -40,6 +41,63 @@
  * Author: Tom Tucker <tom@opengridcomputing.com>
  */
 
+/* Operation
+ *
+ * The main entry point is svc_rdma_sendto. This is called by the
+ * RPC server when an RPC Reply is ready to be transmitted to a client.
+ *
+ * The passed-in svc_rqst contains a struct xdr_buf which holds an
+ * XDR-encoded RPC Reply message. sendto must construct the RPC-over-RDMA
+ * transport header, post all Write WRs needed for this Reply, then post
+ * a Send WR conveying the transport header and the RPC message itself to
+ * the client.
+ *
+ * svc_rdma_sendto must fully transmit the Reply before returning, as
+ * the svc_rqst will be recycled as soon as sendto returns. Remaining
+ * resources referred to by the svc_rqst are also recycled at that time.
+ * Therefore any resources that must remain longer must be detached
+ * from the svc_rqst and released later.
+ *
+ * Page Management
+ *
+ * The I/O that performs Reply transmission is asynchronous, and may
+ * complete well after sendto returns. Thus pages under I/O must be
+ * removed from the svc_rqst before sendto returns.
+ *
+ * The logic here depends on Send Queue and completion ordering. Since
+ * the Send WR is always posted last, it will always complete last. Thus
+ * when it completes, it is guaranteed that all previous Write WRs have
+ * also completed.
+ *
+ * Write WRs are constructed and posted. Each Write segment gets its own
+ * svc_rdma_rw_ctxt, allowing the Write completion handler to find and
+ * DMA-unmap the pages under I/O for that Write segment. The Write
+ * completion handler does not release any pages.
+ *
+ * When the Send WR is constructed, it also gets its own svc_rdma_op_ctxt.
+ * The ownership of all of the Reply's pages are transferred into that
+ * ctxt, the Send WR is posted, and sendto returns.
+ *
+ * The svc_rdma_op_ctxt is presented when the Send WR completes. The
+ * Send completion handler finally releases the Reply's pages.
+ *
+ * This mechanism also assumes that completions on the transport's Send
+ * Completion Queue do not run in parallel. Otherwise a Write completion
+ * and Send completion running at the same time could release pages that
+ * are still DMA-mapped.
+ *
+ * Error Handling
+ *
+ * - If the Send WR is posted successfully, it will either complete
+ *   successfully, or get flushed. Either way, the Send completion
+ *   handler releases the Reply's pages.
+ * - If the Send WR cannot be not posted, the forward path releases
+ *   the Reply's pages.
+ *
+ * This handles the case, without the use of page reference counting,
+ * where two different Write segments send portions of the same page.
+ */
+
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/rpc_rdma.h>
 #include <linux/spinlock.h>
@@ -55,6 +113,133 @@ static u32 xdr_padsize(u32 len)
 	return (len & 3) ? (4 - (len & 3)) : 0;
 }
 
+/* Returns length of transport header, in bytes.
+ */
+static unsigned int svc_rdma_reply_hdr_len(__be32 *rdma_resp)
+{
+	unsigned int nsegs;
+	__be32 *p;
+
+	p = rdma_resp;
+
+	/* RPC-over-RDMA V1 replies never have a Read list. */
+	p += rpcrdma_fixed_maxsz + 1;
+
+	/* Skip Write list. */
+	while (*p++ != xdr_zero) {
+		nsegs = be32_to_cpup(p++);
+		p += nsegs * rpcrdma_segment_maxsz;
+	}
+
+	/* Skip Reply chunk. */
+	if (*p++ != xdr_zero) {
+		nsegs = be32_to_cpup(p++);
+		p += nsegs * rpcrdma_segment_maxsz;
+	}
+
+	return (unsigned long)p - (unsigned long)rdma_resp;
+}
+
+/* One Write chunk is copied from Call transport header to Reply
+ * transport header. Each segment's length field is updated to
+ * reflect number of bytes consumed in the segment.
+ *
+ * Returns number of segments in this chunk.
+ */
+static unsigned int xdr_encode_write_chunk(__be32 *dst, __be32 *src,
+					   unsigned int remaining)
+{
+	unsigned int i, nsegs;
+	u32 seg_len;
+
+	/* Write list discriminator */
+	*dst++ = *src++;
+
+	/* number of segments in this chunk */
+	nsegs = be32_to_cpup(src);
+	*dst++ = *src++;
+
+	for (i = nsegs; i; i--) {
+		/* segment's RDMA handle */
+		*dst++ = *src++;
+
+		/* bytes returned in this segment */
+		seg_len = be32_to_cpu(*src);
+		if (remaining >= seg_len) {
+			/* entire segment was consumed */
+			*dst = *src;
+			remaining -= seg_len;
+		} else {
+			/* segment only partly filled */
+			*dst = cpu_to_be32(remaining);
+			remaining = 0;
+		}
+		dst++; src++;
+
+		/* segment's RDMA offset */
+		*dst++ = *src++;
+		*dst++ = *src++;
+	}
+
+	return nsegs;
+}
+
+/* The client provided a Write list in the Call message. Fill in
+ * the segments in the first Write chunk in the Reply's transport
+ * header with the number of bytes consumed in each segment.
+ * Remaining chunks are returned unused.
+ *
+ * Assumptions:
+ *  - Client has provided only one Write chunk
+ */
+static void svc_rdma_xdr_encode_write_list(__be32 *rdma_resp, __be32 *wr_ch,
+					   unsigned int consumed)
+{
+	unsigned int nsegs;
+	__be32 *p, *q;
+
+	/* RPC-over-RDMA V1 replies never have a Read list. */
+	p = rdma_resp + rpcrdma_fixed_maxsz + 1;
+
+	q = wr_ch;
+	while (*q != xdr_zero) {
+		nsegs = xdr_encode_write_chunk(p, q, consumed);
+		q += 2 + nsegs * rpcrdma_segment_maxsz;
+		p += 2 + nsegs * rpcrdma_segment_maxsz;
+		consumed = 0;
+	}
+
+	/* Terminate Write list */
+	*p++ = xdr_zero;
+
+	/* Reply chunk discriminator; may be replaced later */
+	*p = xdr_zero;
+}
+
+/* The client provided a Reply chunk in the Call message. Fill in
+ * the segments in the Reply chunk in the Reply message with the
+ * number of bytes consumed in each segment.
+ *
+ * Assumptions:
+ * - Reply can always fit in the provided Reply chunk
+ */
+static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch,
+					    unsigned int consumed)
+{
+	__be32 *p;
+
+	/* Find the Reply chunk in the Reply's xprt header.
+	 * RPC-over-RDMA V1 replies never have a Read list.
+	 */
+	p = rdma_resp + rpcrdma_fixed_maxsz + 1;
+
+	/* Skip past Write list */
+	while (*p++ != xdr_zero)
+		p += 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+
+	xdr_encode_write_chunk(p, rp_ch, consumed);
+}
+
 int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
 		     struct xdr_buf *xdr,
 		     struct svc_rdma_req_map *vec,
@@ -123,45 +308,14 @@ int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
 	return 0;
 }
 
-static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
-			      struct xdr_buf *xdr,
-			      u32 xdr_off, size_t len, int dir)
-{
-	struct page *page;
-	dma_addr_t dma_addr;
-	if (xdr_off < xdr->head[0].iov_len) {
-		/* This offset is in the head */
-		xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
-		page = virt_to_page(xdr->head[0].iov_base);
-	} else {
-		xdr_off -= xdr->head[0].iov_len;
-		if (xdr_off < xdr->page_len) {
-			/* This offset is in the page list */
-			xdr_off += xdr->page_base;
-			page = xdr->pages[xdr_off >> PAGE_SHIFT];
-			xdr_off &= ~PAGE_MASK;
-		} else {
-			/* This offset is in the tail */
-			xdr_off -= xdr->page_len;
-			xdr_off += (unsigned long)
-				xdr->tail[0].iov_base & ~PAGE_MASK;
-			page = virt_to_page(xdr->tail[0].iov_base);
-		}
-	}
-	dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
-				   min_t(size_t, PAGE_SIZE, len), dir);
-	return dma_addr;
-}
-
 /* Parse the RPC Call's transport header.
  */
-static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
-				      struct rpcrdma_write_array **write,
-				      struct rpcrdma_write_array **reply)
+static void svc_rdma_get_write_arrays(__be32 *rdma_argp,
+				      __be32 **write, __be32 **reply)
 {
 	__be32 *p;
 
-	p = (__be32 *)&rmsgp->rm_body.rm_chunks[0];
+	p = rdma_argp + rpcrdma_fixed_maxsz;
 
 	/* Read list */
 	while (*p++ != xdr_zero)
@@ -169,7 +323,7 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
 
 	/* Write list */
 	if (*p != xdr_zero) {
-		*write = (struct rpcrdma_write_array *)p;
+		*write = p;
 		while (*p++ != xdr_zero)
 			p += 1 + be32_to_cpu(*p) * 4;
 	} else {
@@ -179,7 +333,7 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
 
 	/* Reply chunk */
 	if (*p != xdr_zero)
-		*reply = (struct rpcrdma_write_array *)p;
+		*reply = p;
 	else
 		*reply = NULL;
 }
@@ -210,6 +364,32 @@ static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
 	return be32_to_cpup(p);
 }
 
+/* ib_dma_map_page() is used here because svc_rdma_dma_unmap()
+ * is used during completion to DMA-unmap this memory, and
+ * it uses ib_dma_unmap_page() exclusively.
+ */
+static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
+				struct svc_rdma_op_ctxt *ctxt,
+				unsigned int sge_no,
+				unsigned char *base,
+				unsigned int len)
+{
+	unsigned long offset = (unsigned long)base & ~PAGE_MASK;
+	struct ib_device *dev = rdma->sc_cm_id->device;
+	dma_addr_t dma_addr;
+
+	dma_addr = ib_dma_map_page(dev, virt_to_page(base),
+				   offset, len, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(dev, dma_addr))
+		return -EIO;
+
+	ctxt->sge[sge_no].addr = dma_addr;
+	ctxt->sge[sge_no].length = len;
+	ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
+	svc_rdma_count_mappings(rdma, ctxt);
+	return 0;
+}
+
 static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
 				 struct svc_rdma_op_ctxt *ctxt,
 				 unsigned int sge_no,
@@ -253,222 +433,73 @@ int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma,
 	return svc_rdma_dma_map_page(rdma, ctxt, 0, ctxt->pages[0], 0, len);
 }
 
-/* Assumptions:
- * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
+/* Load the xdr_buf into the ctxt's sge array, and DMA map each
+ * element as it is added.
+ *
+ * Returns the number of sge elements loaded on success, or
+ * a negative errno on failure.
  */
-static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
-		      u32 rmr, u64 to,
-		      u32 xdr_off, int write_len,
-		      struct svc_rdma_req_map *vec)
+static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
+				  struct svc_rdma_op_ctxt *ctxt,
+				  struct xdr_buf *xdr, __be32 *wr_lst)
 {
-	struct ib_rdma_wr write_wr;
-	struct ib_sge *sge;
-	int xdr_sge_no;
-	int sge_no;
-	int sge_bytes;
-	int sge_off;
-	int bc;
-	struct svc_rdma_op_ctxt *ctxt;
+	unsigned int len, sge_no, remaining, page_off;
+	struct page **ppages;
+	unsigned char *base;
+	u32 xdr_pad;
+	int ret;
 
-	if (vec->count > RPCSVC_MAXPAGES) {
-		pr_err("svcrdma: Too many pages (%lu)\n", vec->count);
-		return -EIO;
-	}
+	sge_no = 1;
 
-	dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
-		"write_len=%d, vec->sge=%p, vec->count=%lu\n",
-		rmr, (unsigned long long)to, xdr_off,
-		write_len, vec->sge, vec->count);
+	ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++,
+				   xdr->head[0].iov_base,
+				   xdr->head[0].iov_len);
+	if (ret < 0)
+		return ret;
 
-	ctxt = svc_rdma_get_context(xprt);
-	ctxt->direction = DMA_TO_DEVICE;
-	sge = ctxt->sge;
-
-	/* Find the SGE associated with xdr_off */
-	for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
-	     xdr_sge_no++) {
-		if (vec->sge[xdr_sge_no].iov_len > bc)
-			break;
-		bc -= vec->sge[xdr_sge_no].iov_len;
-	}
+	/* If a Write chunk is present, the xdr_buf's page list
+	 * is not included inline. However the Upper Layer may
+	 * have added XDR padding in the tail buffer, and that
+	 * should not be included inline.
+	 */
+	if (wr_lst) {
+		base = xdr->tail[0].iov_base;
+		len = xdr->tail[0].iov_len;
+		xdr_pad = xdr_padsize(xdr->page_len);
 
-	sge_off = bc;
-	bc = write_len;
-	sge_no = 0;
-
-	/* Copy the remaining SGE */
-	while (bc != 0) {
-		sge_bytes = min_t(size_t,
-			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
-		sge[sge_no].length = sge_bytes;
-		sge[sge_no].addr =
-			dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
-				    sge_bytes, DMA_TO_DEVICE);
-		xdr_off += sge_bytes;
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 sge[sge_no].addr))
-			goto err;
-		svc_rdma_count_mappings(xprt, ctxt);
-		sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
-		ctxt->count++;
-		sge_off = 0;
-		sge_no++;
-		xdr_sge_no++;
-		if (xdr_sge_no > vec->count) {
-			pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no);
-			goto err;
+		if (len && xdr_pad) {
+			base += xdr_pad;
+			len -= xdr_pad;
 		}
-		bc -= sge_bytes;
-		if (sge_no == xprt->sc_max_sge)
-			break;
-	}
-
-	/* Prepare WRITE WR */
-	memset(&write_wr, 0, sizeof write_wr);
-	ctxt->cqe.done = svc_rdma_wc_write;
-	write_wr.wr.wr_cqe = &ctxt->cqe;
-	write_wr.wr.sg_list = &sge[0];
-	write_wr.wr.num_sge = sge_no;
-	write_wr.wr.opcode = IB_WR_RDMA_WRITE;
-	write_wr.wr.send_flags = IB_SEND_SIGNALED;
-	write_wr.rkey = rmr;
-	write_wr.remote_addr = to;
-
-	/* Post It */
-	atomic_inc(&rdma_stat_write);
-	if (svc_rdma_send(xprt, &write_wr.wr))
-		goto err;
-	return write_len - bc;
- err:
-	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_context(ctxt, 0);
-	return -EIO;
-}
 
-noinline
-static int send_write_chunks(struct svcxprt_rdma *xprt,
-			     struct rpcrdma_write_array *wr_ary,
-			     struct rpcrdma_msg *rdma_resp,
-			     struct svc_rqst *rqstp,
-			     struct svc_rdma_req_map *vec)
-{
-	u32 xfer_len = rqstp->rq_res.page_len;
-	int write_len;
-	u32 xdr_off;
-	int chunk_off;
-	int chunk_no;
-	int nchunks;
-	struct rpcrdma_write_array *res_ary;
-	int ret;
-
-	res_ary = (struct rpcrdma_write_array *)
-		&rdma_resp->rm_body.rm_chunks[1];
-
-	/* Write chunks start at the pagelist */
-	nchunks = be32_to_cpu(wr_ary->wc_nchunks);
-	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
-	     xfer_len && chunk_no < nchunks;
-	     chunk_no++) {
-		struct rpcrdma_segment *arg_ch;
-		u64 rs_offset;
-
-		arg_ch = &wr_ary->wc_array[chunk_no].wc_target;
-		write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
-
-		/* Prepare the response chunk given the length actually
-		 * written */
-		xdr_decode_hyper((__be32 *)&arg_ch->rs_offset, &rs_offset);
-		svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
-						arg_ch->rs_handle,
-						arg_ch->rs_offset,
-						write_len);
-		chunk_off = 0;
-		while (write_len) {
-			ret = send_write(xprt, rqstp,
-					 be32_to_cpu(arg_ch->rs_handle),
-					 rs_offset + chunk_off,
-					 xdr_off,
-					 write_len,
-					 vec);
-			if (ret <= 0)
-				goto out_err;
-			chunk_off += ret;
-			xdr_off += ret;
-			xfer_len -= ret;
-			write_len -= ret;
-		}
+		goto tail;
 	}
-	/* Update the req with the number of chunks actually used */
-	svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
 
-	return rqstp->rq_res.page_len;
+	ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+	page_off = xdr->page_base & ~PAGE_MASK;
+	remaining = xdr->page_len;
+	while (remaining) {
+		len = min_t(u32, PAGE_SIZE - page_off, remaining);
 
-out_err:
-	pr_err("svcrdma: failed to send write chunks, rc=%d\n", ret);
-	return -EIO;
-}
-
-noinline
-static int send_reply_chunks(struct svcxprt_rdma *xprt,
-			     struct rpcrdma_write_array *rp_ary,
-			     struct rpcrdma_msg *rdma_resp,
-			     struct svc_rqst *rqstp,
-			     struct svc_rdma_req_map *vec)
-{
-	u32 xfer_len = rqstp->rq_res.len;
-	int write_len;
-	u32 xdr_off;
-	int chunk_no;
-	int chunk_off;
-	int nchunks;
-	struct rpcrdma_segment *ch;
-	struct rpcrdma_write_array *res_ary;
-	int ret;
+		ret = svc_rdma_dma_map_page(rdma, ctxt, sge_no++,
+					    *ppages++, page_off, len);
+		if (ret < 0)
+			return ret;
 
-	/* XXX: need to fix when reply lists occur with read-list and or
-	 * write-list */
-	res_ary = (struct rpcrdma_write_array *)
-		&rdma_resp->rm_body.rm_chunks[2];
-
-	/* xdr offset starts at RPC message */
-	nchunks = be32_to_cpu(rp_ary->wc_nchunks);
-	for (xdr_off = 0, chunk_no = 0;
-	     xfer_len && chunk_no < nchunks;
-	     chunk_no++) {
-		u64 rs_offset;
-		ch = &rp_ary->wc_array[chunk_no].wc_target;
-		write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
-
-		/* Prepare the reply chunk given the length actually
-		 * written */
-		xdr_decode_hyper((__be32 *)&ch->rs_offset, &rs_offset);
-		svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
-						ch->rs_handle, ch->rs_offset,
-						write_len);
-		chunk_off = 0;
-		while (write_len) {
-			ret = send_write(xprt, rqstp,
-					 be32_to_cpu(ch->rs_handle),
-					 rs_offset + chunk_off,
-					 xdr_off,
-					 write_len,
-					 vec);
-			if (ret <= 0)
-				goto out_err;
-			chunk_off += ret;
-			xdr_off += ret;
-			xfer_len -= ret;
-			write_len -= ret;
-		}
+		remaining -= len;
+		page_off = 0;
 	}
-	/* Update the req with the number of chunks actually used */
-	svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
 
-	return rqstp->rq_res.len;
+	base = xdr->tail[0].iov_base;
+	len = xdr->tail[0].iov_len;
+tail:
+	if (len) {
+		ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, base, len);
+		if (ret < 0)
+			return ret;
+	}
 
-out_err:
-	pr_err("svcrdma: failed to send reply chunks, rc=%d\n", ret);
-	return -EIO;
+	return sge_no - 1;
 }
 
 /* The svc_rqst and all resources it owns are released as soon as
@@ -525,90 +556,66 @@ int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
 	return svc_rdma_send(rdma, send_wr);
 }
 
-/* This function prepares the portion of the RPCRDMA message to be
- * sent in the RDMA_SEND. This function is called after data sent via
- * RDMA has already been transmitted. There are three cases:
- * - The RPCRDMA header, RPC header, and payload are all sent in a
- *   single RDMA_SEND. This is the "inline" case.
- * - The RPCRDMA header and some portion of the RPC header and data
- *   are sent via this RDMA_SEND and another portion of the data is
- *   sent via RDMA.
- * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
- *   header and data are all transmitted via RDMA.
- * In all three cases, this function prepares the RPCRDMA header in
- * sge[0], the 'type' parameter indicates the type to place in the
- * RPCRDMA header, and the 'byte_count' field indicates how much of
- * the XDR to include in this RDMA_SEND. NB: The offset of the payload
- * to send is zero in the XDR.
+/* Prepare the portion of the RPC Reply that will be transmitted
+ * via RDMA Send. The RPC-over-RDMA transport header is prepared
+ * in sge[0], and the RPC xdr_buf is prepared in following sges.
+ *
+ * Depending on whether a Write list or Reply chunk is present,
+ * the server may send all, a portion of, or none of the xdr_buf.
+ * In the latter case, only the transport header (sge[0]) is
+ * transmitted.
+ *
+ * RDMA Send is the last step of transmitting an RPC reply. Pages
+ * involved in the earlier RDMA Writes are here transferred out
+ * of the rqstp and into the ctxt's page array. These pages are
+ * DMA unmapped by each Write completion, but the subsequent Send
+ * completion finally releases these pages.
+ *
+ * Assumptions:
+ * - The Reply's transport header will never be larger than a page.
  */
-static int send_reply(struct svcxprt_rdma *rdma,
-		      struct svc_rqst *rqstp,
-		      struct page *page,
-		      struct rpcrdma_msg *rdma_resp,
-		      struct svc_rdma_req_map *vec,
-		      int byte_count,
-		      u32 inv_rkey)
+static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
+				   __be32 *rdma_argp, __be32 *rdma_resp,
+				   struct svc_rqst *rqstp,
+				   __be32 *wr_lst, __be32 *rp_ch)
 {
 	struct svc_rdma_op_ctxt *ctxt;
-	u32 xdr_off;
-	int sge_no;
-	int sge_bytes;
-	int ret = -EIO;
+	u32 inv_rkey;
+	int ret;
+
+	dprintk("svcrdma: sending %s reply: head=%zu, pagelen=%u, tail=%zu\n",
+		(rp_ch ? "RDMA_NOMSG" : "RDMA_MSG"),
+		rqstp->rq_res.head[0].iov_len,
+		rqstp->rq_res.page_len,
+		rqstp->rq_res.tail[0].iov_len);
 
-	/* Prepare the context */
 	ctxt = svc_rdma_get_context(rdma);
-	ctxt->direction = DMA_TO_DEVICE;
-	ctxt->pages[0] = page;
-	ctxt->count = 1;
 
-	/* Prepare the SGE for the RPCRDMA Header */
-	ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
-	ctxt->sge[0].length =
-	    svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp);
-	ctxt->sge[0].addr =
-	    ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
-			    ctxt->sge[0].length, DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
+	ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp,
+				     svc_rdma_reply_hdr_len(rdma_resp));
+	if (ret < 0)
 		goto err;
-	svc_rdma_count_mappings(rdma, ctxt);
-
-	ctxt->direction = DMA_TO_DEVICE;
 
-	/* Map the payload indicated by 'byte_count' */
-	xdr_off = 0;
-	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
-		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
-		byte_count -= sge_bytes;
-		ctxt->sge[sge_no].addr =
-			dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
-				    sge_bytes, DMA_TO_DEVICE);
-		xdr_off += sge_bytes;
-		if (ib_dma_mapping_error(rdma->sc_cm_id->device,
-					 ctxt->sge[sge_no].addr))
+	if (!rp_ch) {
+		ret = svc_rdma_map_reply_msg(rdma, ctxt,
+					     &rqstp->rq_res, wr_lst);
+		if (ret < 0)
 			goto err;
-		svc_rdma_count_mappings(rdma, ctxt);
-		ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
-		ctxt->sge[sge_no].length = sge_bytes;
-	}
-	if (byte_count != 0) {
-		pr_err("svcrdma: Could not map %d bytes\n", byte_count);
-		goto err;
 	}
 
 	svc_rdma_save_io_pages(rqstp, ctxt);
 
-	if (sge_no > rdma->sc_max_sge) {
-		pr_err("svcrdma: Too many sges (%d)\n", sge_no);
-		goto err;
-	}
-
-	ret = svc_rdma_post_send_wr(rdma, ctxt, sge_no, inv_rkey);
+	inv_rkey = 0;
+	if (rdma->sc_snd_w_inv)
+		inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
+	ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, inv_rkey);
 	if (ret)
 		goto err;
 
 	return 0;
 
- err:
+err:
+	pr_err("svcrdma: failed to post Send WR (%d)\n", ret);
 	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_context(ctxt, 1);
 	return ret;
@@ -618,41 +625,36 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
 {
 }
 
+/**
+ * svc_rdma_sendto - Transmit an RPC reply
+ * @rqstp: processed RPC request, reply XDR already in ::rq_res
+ *
+ * Any resources still associated with @rqstp are released upon return.
+ * If no reply message was possible, the connection is closed.
+ *
+ * Returns:
+ *	%0 if an RPC reply has been successfully posted,
+ *	%-ENOMEM if a resource shortage occurred (connection is lost),
+ *	%-ENOTCONN if posting failed (connection is lost).
+ */
 int svc_rdma_sendto(struct svc_rqst *rqstp)
 {
 	struct svc_xprt *xprt = rqstp->rq_xprt;
 	struct svcxprt_rdma *rdma =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
-	struct rpcrdma_msg *rdma_argp;
-	struct rpcrdma_msg *rdma_resp;
-	struct rpcrdma_write_array *wr_ary, *rp_ary;
-	int ret;
-	int inline_bytes;
+	__be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch;
+	struct xdr_buf *xdr = &rqstp->rq_res;
 	struct page *res_page;
-	struct svc_rdma_req_map *vec;
-	u32 inv_rkey;
-	__be32 *p;
-
-	dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
+	int ret;
 
-	/* Get the RDMA request header. The receive logic always
-	 * places this at the start of page 0.
+	/* Find the call's chunk lists to decide how to send the reply.
+	 * Receive places the Call's xprt header at the start of page 0.
 	 */
 	rdma_argp = page_address(rqstp->rq_pages[0]);
-	svc_rdma_get_write_arrays(rdma_argp, &wr_ary, &rp_ary);
+	svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch);
 
-	inv_rkey = 0;
-	if (rdma->sc_snd_w_inv)
-		inv_rkey = svc_rdma_get_inv_rkey(&rdma_argp->rm_xid,
-						 (__be32 *)wr_ary,
-						 (__be32 *)rp_ary);
-
-	/* Build an req vec for the XDR */
-	vec = svc_rdma_get_req_map(rdma);
-	ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
-	if (ret)
-		goto err0;
-	inline_bytes = rqstp->rq_res.len;
+	dprintk("svcrdma: preparing response for XID 0x%08x\n",
+		be32_to_cpup(rdma_argp));
 
 	/* Create the RDMA response header. xprt->xpt_mutex,
 	 * acquired in svc_send(), serializes RPC replies. The
@@ -666,54 +668,46 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 		goto err0;
 	rdma_resp = page_address(res_page);
 
-	p = &rdma_resp->rm_xid;
-	*p++ = rdma_argp->rm_xid;
-	*p++ = rdma_argp->rm_vers;
+	p = rdma_resp;
+	*p++ = *rdma_argp;
+	*p++ = *(rdma_argp + 1);
 	*p++ = rdma->sc_fc_credits;
-	*p++ = rp_ary ? rdma_nomsg : rdma_msg;
+	*p++ = rp_ch ? rdma_nomsg : rdma_msg;
 
 	/* Start with empty chunks */
 	*p++ = xdr_zero;
 	*p++ = xdr_zero;
 	*p   = xdr_zero;
 
-	/* Send any write-chunk data and build resp write-list */
-	if (wr_ary) {
-		ret = send_write_chunks(rdma, wr_ary, rdma_resp, rqstp, vec);
+	if (wr_lst) {
+		/* XXX: Presume the client sent only one Write chunk */
+		ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr);
 		if (ret < 0)
 			goto err1;
-		inline_bytes -= ret + xdr_padsize(ret);
+		svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret);
 	}
-
-	/* Send any reply-list data and update resp reply-list */
-	if (rp_ary) {
-		ret = send_reply_chunks(rdma, rp_ary, rdma_resp, rqstp, vec);
+	if (rp_ch) {
+		ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr);
 		if (ret < 0)
 			goto err1;
-		inline_bytes -= ret;
+		svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret);
 	}
 
-	/* Post a fresh Receive buffer _before_ sending the reply */
 	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
 	if (ret)
 		goto err1;
-
-	ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
-			 inline_bytes, inv_rkey);
+	ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp,
+				      wr_lst, rp_ch);
 	if (ret < 0)
 		goto err0;
-
-	svc_rdma_put_req_map(rdma, vec);
-	dprintk("svcrdma: send_reply returns %d\n", ret);
-	return ret;
+	return 0;
 
  err1:
 	put_page(res_page);
  err0:
-	svc_rdma_put_req_map(rdma, vec);
 	pr_err("svcrdma: Could not send reply, err=%d. Closing transport.\n",
 	       ret);
-	set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+	set_bit(XPT_CLOSE, &xprt->xpt_flags);
 	return -ENOTCONN;
 }
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index b25c50992a95..237c377c1e06 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1053,6 +1053,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	memset(&qp_attr, 0, sizeof qp_attr);
 	qp_attr.event_handler = qp_event_handler;
 	qp_attr.qp_context = &newxprt->sc_xprt;
+	qp_attr.port_num = newxprt->sc_cm_id->port_num;
+	qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
 	qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
 	qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
 	qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
-- 
cgit v1.2.3


From 6b19cc5ca2f78ebc88f5d39ba6a94197bb392fcc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:06:33 -0400
Subject: svcrdma: Clean up RDMA_ERROR path

Now that svc_rdma_sendto has been renovated, svc_rdma_send_error can
be refactored to reduce code duplication and remove C structure-
based XDR encoding. It is also relocated to the source file that
contains its only caller.

This is a refactoring change only.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/rpc_rdma.h         |  3 ++
 include/linux/sunrpc/svc_rdma.h         |  5 ----
 net/sunrpc/xprtrdma/svc_rdma_marshal.c  | 19 ------------
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 52 ++++++++++++++++++++++++++++++++-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c   | 43 ---------------------------
 5 files changed, 54 insertions(+), 68 deletions(-)

diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index 245fc59b7324..b7e85b341a54 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -143,6 +143,9 @@ enum rpcrdma_proc {
 #define rdma_done	cpu_to_be32(RDMA_DONE)
 #define rdma_error	cpu_to_be32(RDMA_ERROR)
 
+#define err_vers	cpu_to_be32(ERR_VERS)
+#define err_chunk	cpu_to_be32(ERR_CHUNK)
+
 /*
  * Private extension to RPC-over-RDMA Version One.
  * Message passed during RDMA-CM connection set-up.
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 599ee03ee3fb..a770d200f607 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -209,9 +209,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
 
 /* svc_rdma_marshal.c */
 extern int svc_rdma_xdr_decode_req(struct xdr_buf *);
-extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
-				     struct rpcrdma_msg *,
-				     enum rpcrdma_errcode, __be32 *);
 extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
 extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
 					    __be32, __be64, u32);
@@ -244,8 +241,6 @@ extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
 				 struct svc_rdma_op_ctxt *ctxt,
 				 int num_sge, u32 inv_rkey);
 extern int svc_rdma_sendto(struct svc_rqst *);
-extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
-				int);
 
 /* svc_rdma_transport.c */
 extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 1c4aabf0f657..ae58a897eca0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -167,25 +167,6 @@ out_inval:
 	return -EINVAL;
 }
 
-int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
-			      struct rpcrdma_msg *rmsgp,
-			      enum rpcrdma_errcode err, __be32 *va)
-{
-	__be32 *startp = va;
-
-	*va++ = rmsgp->rm_xid;
-	*va++ = rmsgp->rm_vers;
-	*va++ = xprt->sc_fc_credits;
-	*va++ = rdma_error;
-	*va++ = cpu_to_be32(err);
-	if (err == ERR_VERS) {
-		*va++ = rpcrdma_version;
-		*va++ = rpcrdma_version;
-	}
-
-	return (int)((unsigned long)va - (unsigned long)startp);
-}
-
 /**
  * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
  * @rdma_resp: buffer containing Reply transport header
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f7b2daf72a86..7435cb666f42 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -558,6 +558,56 @@ static void rdma_read_complete(struct svc_rqst *rqstp,
 	rqstp->rq_arg.buflen = head->arg.buflen;
 }
 
+static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
+				__be32 *rdma_argp, int status)
+{
+	struct svc_rdma_op_ctxt *ctxt;
+	__be32 *p, *err_msgp;
+	unsigned int length;
+	struct page *page;
+	int ret;
+
+	ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
+	if (ret)
+		return;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page)
+		return;
+	err_msgp = page_address(page);
+
+	p = err_msgp;
+	*p++ = *rdma_argp;
+	*p++ = *(rdma_argp + 1);
+	*p++ = xprt->sc_fc_credits;
+	*p++ = rdma_error;
+	if (status == -EPROTONOSUPPORT) {
+		*p++ = err_vers;
+		*p++ = rpcrdma_version;
+		*p++ = rpcrdma_version;
+	} else {
+		*p++ = err_chunk;
+	}
+	length = (unsigned long)p - (unsigned long)err_msgp;
+
+	/* Map transport header; no RPC message payload */
+	ctxt = svc_rdma_get_context(xprt);
+	ret = svc_rdma_map_reply_hdr(xprt, ctxt, err_msgp, length);
+	if (ret) {
+		dprintk("svcrdma: Error %d mapping send for protocol error\n",
+			ret);
+		return;
+	}
+
+	ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0);
+	if (ret) {
+		dprintk("svcrdma: Error %d posting send for protocol error\n",
+			ret);
+		svc_rdma_unmap_dma(ctxt);
+		svc_rdma_put_context(ctxt, 1);
+	}
+}
+
 /* By convention, backchannel calls arrive via rdma_msg type
  * messages, and never populate the chunk lists. This makes
  * the RPC/RDMA header small and fixed in size, so it is
@@ -686,7 +736,7 @@ complete:
 	return ret;
 
 out_err:
-	svc_rdma_send_error(rdma_xprt, rmsgp, ret);
+	svc_rdma_send_error(rdma_xprt, &rmsgp->rm_xid, ret);
 	svc_rdma_put_context(ctxt, 0);
 	return 0;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index ce62b78e5bc9..0b646e8f23c7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -710,46 +710,3 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	set_bit(XPT_CLOSE, &xprt->xpt_flags);
 	return -ENOTCONN;
 }
-
-void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
-			 int status)
-{
-	struct page *p;
-	struct svc_rdma_op_ctxt *ctxt;
-	enum rpcrdma_errcode err;
-	__be32 *va;
-	int length;
-	int ret;
-
-	ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
-	if (ret)
-		return;
-
-	p = alloc_page(GFP_KERNEL);
-	if (!p)
-		return;
-	va = page_address(p);
-
-	/* XDR encode an error reply */
-	err = ERR_CHUNK;
-	if (status == -EPROTONOSUPPORT)
-		err = ERR_VERS;
-	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
-
-	/* Map transport header; no RPC message payload */
-	ctxt = svc_rdma_get_context(xprt);
-	ret = svc_rdma_map_reply_hdr(xprt, ctxt, &rmsgp->rm_xid, length);
-	if (ret) {
-		dprintk("svcrdma: Error %d mapping send for protocol error\n",
-			ret);
-		return;
-	}
-
-	ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0);
-	if (ret) {
-		dprintk("svcrdma: Error %d posting send for protocol error\n",
-			ret);
-		svc_rdma_unmap_dma(ctxt);
-		svc_rdma_put_context(ctxt, 1);
-	}
-}
-- 
cgit v1.2.3


From 4757d90b15d851b9986bfca745eacc176359c13d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:06:41 -0400
Subject: svcrdma: Report Write/Reply chunk overruns

Observed at Connectathon 2017.

If a client has underestimated the size of a Write or Reply chunk,
the Linux server writes as much payload data as it can, then it
recognizes there was a problem and closes the connection without
sending the transport header.

This creates a couple of problems:

<> The client never receives indication of the server-side failure,
   so it continues to retransmit the bad RPC. Forward progress on
   the transport is blocked.

<> The reply payload pages are not moved out of the svc_rqst, thus
   they can be released by the RPC server before the RDMA Writes
   have completed.

The new rdma_rw-ized helpers return a distinct error code when a
Write/Reply chunk overrun occurs, so it's now easy for the caller
(svc_rdma_sendto) to recognize this case.

Instead of dropping the connection, post an RDMA_ERROR message. The
client now sees an RDMA_ERROR and can properly terminate the RPC
transaction.

As part of the new logic, set up the same delayed release for these
payload pages as would have occurred in the normal case.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 58 +++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 0b646e8f23c7..e514f6864a93 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -621,6 +621,48 @@ err:
 	return ret;
 }
 
+/* Given the client-provided Write and Reply chunks, the server was not
+ * able to form a complete reply. Return an RDMA_ERROR message so the
+ * client can retire this RPC transaction. As above, the Send completion
+ * routine releases payload pages that were part of a previous RDMA Write.
+ *
+ * Remote Invalidation is skipped for simplicity.
+ */
+static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
+				   __be32 *rdma_resp, struct svc_rqst *rqstp)
+{
+	struct svc_rdma_op_ctxt *ctxt;
+	__be32 *p;
+	int ret;
+
+	ctxt = svc_rdma_get_context(rdma);
+
+	/* Replace the original transport header with an
+	 * RDMA_ERROR response. XID etc are preserved.
+	 */
+	p = rdma_resp + 3;
+	*p++ = rdma_error;
+	*p   = err_chunk;
+
+	ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 20);
+	if (ret < 0)
+		goto err;
+
+	svc_rdma_save_io_pages(rqstp, ctxt);
+
+	ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, 0);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	pr_err("svcrdma: failed to post Send WR (%d)\n", ret);
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 1);
+	return ret;
+}
+
 void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
 {
 }
@@ -683,13 +725,13 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 		/* XXX: Presume the client sent only one Write chunk */
 		ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr);
 		if (ret < 0)
-			goto err1;
+			goto err2;
 		svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret);
 	}
 	if (rp_ch) {
 		ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr);
 		if (ret < 0)
-			goto err1;
+			goto err2;
 		svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret);
 	}
 
@@ -702,6 +744,18 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 		goto err0;
 	return 0;
 
+ err2:
+	if (ret != -E2BIG)
+		goto err1;
+
+	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
+	if (ret)
+		goto err1;
+	ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp);
+	if (ret < 0)
+		goto err0;
+	return 0;
+
  err1:
 	put_page(res_page);
  err0:
-- 
cgit v1.2.3


From f5821c76b2c9c2fb98b276c0bf6a101bfe9050a3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:06:49 -0400
Subject: svcrdma: Clean up RPC-over-RDMA backchannel reply processing

Replace C structure-based XDR decoding with pointer arithmetic.
Pointer arithmetic is considered more portable.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h            |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 18 ++++++++++++++----
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c    | 27 +++++++++++++++------------
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index a770d200f607..44d642bbfce6 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -204,7 +204,7 @@ static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma,
 
 /* svc_rdma_backchannel.c */
 extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
-				    struct rpcrdma_msg *rmsgp,
+				    __be32 *rdma_resp,
 				    struct xdr_buf *rcvbuf);
 
 /* svc_rdma_marshal.c */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index bf185b79c98f..c676ed0efb5a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -12,7 +12,17 @@
 
 #undef SVCRDMA_BACKCHANNEL_DEBUG
 
-int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
+/**
+ * svc_rdma_handle_bc_reply - Process incoming backchannel reply
+ * @xprt: controlling backchannel transport
+ * @rdma_resp: pointer to incoming transport header
+ * @rcvbuf: XDR buffer into which to decode the reply
+ *
+ * Returns:
+ *	%0 if @rcvbuf is filled in, xprt_complete_rqst called,
+ *	%-EAGAIN if server should call ->recvfrom again.
+ */
+int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
 			     struct xdr_buf *rcvbuf)
 {
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
@@ -27,13 +37,13 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
 
 	p = (__be32 *)src->iov_base;
 	len = src->iov_len;
-	xid = rmsgp->rm_xid;
+	xid = *rdma_resp;
 
 #ifdef SVCRDMA_BACKCHANNEL_DEBUG
 	pr_info("%s: xid=%08x, length=%zu\n",
 		__func__, be32_to_cpu(xid), len);
 	pr_info("%s: RPC/RDMA: %*ph\n",
-		__func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
+		__func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp);
 	pr_info("%s:      RPC: %*ph\n",
 		__func__, (int)len, p);
 #endif
@@ -53,7 +63,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
 		goto out_unlock;
 	memcpy(dst->iov_base, p, len);
 
-	credits = be32_to_cpu(rmsgp->rm_credit);
+	credits = be32_to_cpup(rdma_resp + 2);
 	if (credits == 0)
 		credits = 1;	/* don't deadlock */
 	else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 7435cb666f42..27a99bf5b1a6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -613,28 +613,30 @@ static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
  * the RPC/RDMA header small and fixed in size, so it is
  * straightforward to check the RPC header's direction field.
  */
-static bool
-svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
+static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt,
+					  __be32 *rdma_resp)
 {
-	__be32 *p = (__be32 *)rmsgp;
+	__be32 *p;
 
 	if (!xprt->xpt_bc_xprt)
 		return false;
 
-	if (rmsgp->rm_type != rdma_msg)
+	p = rdma_resp + 3;
+	if (*p++ != rdma_msg)
 		return false;
-	if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
+
+	if (*p++ != xdr_zero)
 		return false;
-	if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+	if (*p++ != xdr_zero)
 		return false;
-	if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
+	if (*p++ != xdr_zero)
 		return false;
 
-	/* sanity */
-	if (p[7] != rmsgp->rm_xid)
+	/* XID sanity */
+	if (*p++ != *rdma_resp)
 		return false;
 	/* call direction */
-	if (p[8] == cpu_to_be32(RPC_CALL))
+	if (*p == cpu_to_be32(RPC_CALL))
 		return false;
 
 	return true;
@@ -700,8 +702,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		goto out_drop;
 	rqstp->rq_xprt_hlen = ret;
 
-	if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
-		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
+	if (svc_rdma_is_backchannel_reply(xprt, &rmsgp->rm_xid)) {
+		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt,
+					       &rmsgp->rm_xid,
 					       &rqstp->rq_arg);
 		svc_rdma_put_context(ctxt, 0);
 		if (ret)
-- 
cgit v1.2.3


From ded8d19641a605232ab48f5d27f542648beba3cc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:06:57 -0400
Subject: svcrdma: Reduce size of sge array in struct svc_rdma_op_ctxt

The sge array in struct svc_rdma_op_ctxt is no longer used for
sending RDMA Write WRs. It need only accommodate the construction of
Send and Receive WRs. The maximum inline size is the largest payload
it needs to handle now.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 9 +++++++--
 net/sunrpc/xprtrdma/svc_rdma.c  | 6 +++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 44d642bbfce6..e84b77556784 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -48,6 +48,12 @@
 #include <rdma/rdma_cm.h>
 #define SVCRDMA_DEBUG
 
+/* Default and maximum inline threshold sizes */
+enum {
+	RPCRDMA_DEF_INLINE_THRESH = 4096,
+	RPCRDMA_MAX_INLINE_THRESH = 65536
+};
+
 /* RPC/RDMA parameters and stats */
 extern unsigned int svcrdma_ord;
 extern unsigned int svcrdma_max_requests;
@@ -86,7 +92,7 @@ struct svc_rdma_op_ctxt {
 	int count;
 	unsigned int mapped_sges;
 	struct ib_send_wr send_wr;
-	struct ib_sge sge[RPCSVC_MAXPAGES];
+	struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE];
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
@@ -186,7 +192,6 @@ struct svcxprt_rdma {
  * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ.  */
 #define RPCRDMA_ORD             (64/4)
 #define RPCRDMA_MAX_REQUESTS    32
-#define RPCRDMA_MAX_REQ_SIZE    4096
 
 /* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
  * current NFSv4.1 implementation supports one backchannel slot.
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 912444174647..a4a8f6989ee7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -58,9 +58,9 @@ unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
 unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
 static unsigned int min_max_requests = 4;
 static unsigned int max_max_requests = 16384;
-unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
-static unsigned int min_max_inline = 4096;
-static unsigned int max_max_inline = 65536;
+unsigned int svcrdma_max_req_size = RPCRDMA_DEF_INLINE_THRESH;
+static unsigned int min_max_inline = RPCRDMA_DEF_INLINE_THRESH;
+static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH;
 
 atomic_t rdma_stat_recv;
 atomic_t rdma_stat_read;
-- 
cgit v1.2.3


From 68cc4636bbbca89b9fedcf46d8b6bee444fc5e4e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:07:05 -0400
Subject: svcrdma: Remove unused RDMA Write completion handler

Clean up. All RDMA Write completions are now handled by
svc_rdma_wc_write_ctx.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  1 -
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 18 ------------------
 2 files changed, 19 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index e84b77556784..f58c5349beb7 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -249,7 +249,6 @@ extern int svc_rdma_sendto(struct svc_rqst *);
 
 /* svc_rdma_transport.c */
 extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *);
-extern void svc_rdma_wc_write(struct ib_cq *, struct ib_wc *);
 extern void svc_rdma_wc_reg(struct ib_cq *, struct ib_wc *);
 extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *);
 extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 237c377c1e06..1597ca8ba3c0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -473,24 +473,6 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
 	svc_rdma_put_context(ctxt, 1);
 }
 
-/**
- * svc_rdma_wc_write - Invoked by RDMA provider for each polled Write WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_write(struct ib_cq *cq, struct ib_wc *wc)
-{
-	struct ib_cqe *cqe = wc->wr_cqe;
-	struct svc_rdma_op_ctxt *ctxt;
-
-	svc_rdma_send_wc_common_put(cq, wc, "write");
-
-	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
-	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_context(ctxt, 0);
-}
-
 /**
  * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
  * @cq:        completion queue
-- 
cgit v1.2.3


From 2cf32924c68a22783e6f630e1b5345a80aa1a376 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:07:13 -0400
Subject: svcrdma: Remove the req_map cache

req_maps are no longer used by the send path and can thus be removed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          | 34 +------------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 68 --------------------------
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 84 --------------------------------
 3 files changed, 1 insertion(+), 185 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f58c5349beb7..479bb7f65233 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -96,23 +96,6 @@ struct svc_rdma_op_ctxt {
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
-/*
- * NFS_ requests are mapped on the client side by the chunk lists in
- * the RPCRDMA header. During the fetching of the RPC from the client
- * and the writing of the reply to the client, the memory in the
- * client and the memory in the server must be mapped as contiguous
- * vaddr/len for access by the hardware. These data strucures keep
- * these mappings.
- *
- * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the
- * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the
- * 'ch' field maps the read-list of the RPCRDMA header to the 'sge'
- * mapping of the reply.
- */
-struct svc_rdma_chunk_sge {
-	int start;		/* sge no for this chunk */
-	int count;		/* sge count for this chunk */
-};
 struct svc_rdma_fastreg_mr {
 	struct ib_mr *mr;
 	struct scatterlist *sg;
@@ -121,15 +104,7 @@ struct svc_rdma_fastreg_mr {
 	enum dma_data_direction direction;
 	struct list_head frmr_list;
 };
-struct svc_rdma_req_map {
-	struct list_head free;
-	unsigned long count;
-	union {
-		struct kvec sge[RPCSVC_MAXPAGES];
-		struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
-		unsigned long lkey[RPCSVC_MAXPAGES];
-	};
-};
+
 #define RDMACTXT_F_LAST_CTXT	2
 
 #define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
@@ -160,8 +135,6 @@ struct svcxprt_rdma {
 	int		     sc_ctxt_used;
 	spinlock_t	     sc_rw_ctxt_lock;
 	struct list_head     sc_rw_ctxts;
-	spinlock_t	     sc_map_lock;
-	struct list_head     sc_maps;
 
 	struct list_head     sc_rq_dto_q;
 	spinlock_t	     sc_rq_dto_lock;
@@ -237,8 +210,6 @@ extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
 				     struct xdr_buf *xdr);
 
 /* svc_rdma_sendto.c */
-extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
-			    struct svc_rdma_req_map *, bool);
 extern int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma,
 				  struct svc_rdma_op_ctxt *ctxt,
 				  __be32 *rdma_resp, unsigned int len);
@@ -259,9 +230,6 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *);
-extern void svc_rdma_put_req_map(struct svcxprt_rdma *,
-				 struct svc_rdma_req_map *);
 extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
 extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
 			      struct svc_rdma_fastreg_mr *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index e514f6864a93..1736337f3a55 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -240,74 +240,6 @@ static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch,
 	xdr_encode_write_chunk(p, rp_ch, consumed);
 }
 
-int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
-		     struct xdr_buf *xdr,
-		     struct svc_rdma_req_map *vec,
-		     bool write_chunk_present)
-{
-	int sge_no;
-	u32 sge_bytes;
-	u32 page_bytes;
-	u32 page_off;
-	int page_no;
-
-	if (xdr->len !=
-	    (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
-		pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
-		return -EIO;
-	}
-
-	/* Skip the first sge, this is for the RPCRDMA header */
-	sge_no = 1;
-
-	/* Head SGE */
-	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
-	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
-	sge_no++;
-
-	/* pages SGE */
-	page_no = 0;
-	page_bytes = xdr->page_len;
-	page_off = xdr->page_base;
-	while (page_bytes) {
-		vec->sge[sge_no].iov_base =
-			page_address(xdr->pages[page_no]) + page_off;
-		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
-		page_bytes -= sge_bytes;
-		vec->sge[sge_no].iov_len = sge_bytes;
-
-		sge_no++;
-		page_no++;
-		page_off = 0; /* reset for next time through loop */
-	}
-
-	/* Tail SGE */
-	if (xdr->tail[0].iov_len) {
-		unsigned char *base = xdr->tail[0].iov_base;
-		size_t len = xdr->tail[0].iov_len;
-		u32 xdr_pad = xdr_padsize(xdr->page_len);
-
-		if (write_chunk_present && xdr_pad) {
-			base += xdr_pad;
-			len -= xdr_pad;
-		}
-
-		if (len) {
-			vec->sge[sge_no].iov_base = base;
-			vec->sge[sge_no].iov_len = len;
-			sge_no++;
-		}
-	}
-
-	dprintk("svcrdma: %s: sge_no %d page_no %d "
-		"page_base %u page_len %u head_len %zu tail_len %zu\n",
-		__func__, sge_no, page_no, xdr->page_base, xdr->page_len,
-		xdr->head[0].iov_len, xdr->tail[0].iov_len);
-
-	vec->count = sge_no;
-	return 0;
-}
-
 /* Parse the RPC Call's transport header.
  */
 static void svc_rdma_get_write_arrays(__be32 *rdma_argp,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 1597ca8ba3c0..a9d9cb1ba4c6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -272,85 +272,6 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
 	}
 }
 
-static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
-{
-	struct svc_rdma_req_map *map;
-
-	map = kmalloc(sizeof(*map), flags);
-	if (map)
-		INIT_LIST_HEAD(&map->free);
-	return map;
-}
-
-static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
-{
-	unsigned int i;
-
-	/* One for each receive buffer on this connection. */
-	i = xprt->sc_max_requests;
-
-	while (i--) {
-		struct svc_rdma_req_map *map;
-
-		map = alloc_req_map(GFP_KERNEL);
-		if (!map) {
-			dprintk("svcrdma: No memory for request map\n");
-			return false;
-		}
-		list_add(&map->free, &xprt->sc_maps);
-	}
-	return true;
-}
-
-struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
-{
-	struct svc_rdma_req_map *map = NULL;
-
-	spin_lock(&xprt->sc_map_lock);
-	if (list_empty(&xprt->sc_maps))
-		goto out_empty;
-
-	map = list_first_entry(&xprt->sc_maps,
-			       struct svc_rdma_req_map, free);
-	list_del_init(&map->free);
-	spin_unlock(&xprt->sc_map_lock);
-
-out:
-	map->count = 0;
-	return map;
-
-out_empty:
-	spin_unlock(&xprt->sc_map_lock);
-
-	/* Pre-allocation amount was incorrect */
-	map = alloc_req_map(GFP_NOIO);
-	if (map)
-		goto out;
-
-	WARN_ONCE(1, "svcrdma: empty request map list?\n");
-	return NULL;
-}
-
-void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
-			  struct svc_rdma_req_map *map)
-{
-	spin_lock(&xprt->sc_map_lock);
-	list_add(&map->free, &xprt->sc_maps);
-	spin_unlock(&xprt->sc_map_lock);
-}
-
-static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
-{
-	while (!list_empty(&xprt->sc_maps)) {
-		struct svc_rdma_req_map *map;
-
-		map = list_first_entry(&xprt->sc_maps,
-				       struct svc_rdma_req_map, free);
-		list_del(&map->free);
-		kfree(map);
-	}
-}
-
 /* QP event handler */
 static void qp_event_handler(struct ib_event *event, void *context)
 {
@@ -544,7 +465,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
 	INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
-	INIT_LIST_HEAD(&cma_xprt->sc_maps);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
@@ -552,7 +472,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	spin_lock_init(&cma_xprt->sc_frmr_q_lock);
 	spin_lock_init(&cma_xprt->sc_ctxt_lock);
 	spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
-	spin_lock_init(&cma_xprt->sc_map_lock);
 
 	/*
 	 * Note that this implies that the underlying transport support
@@ -1004,8 +923,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 
 	if (!svc_rdma_prealloc_ctxts(newxprt))
 		goto errout;
-	if (!svc_rdma_prealloc_maps(newxprt))
-		goto errout;
 
 	/*
 	 * Limit ORD based on client limit, local device limit, and
@@ -1237,7 +1154,6 @@ static void __svc_rdma_free(struct work_struct *work)
 	rdma_dealloc_frmr_q(rdma);
 	svc_rdma_destroy_rw_ctxts(rdma);
 	svc_rdma_destroy_ctxts(rdma);
-	svc_rdma_destroy_maps(rdma);
 
 	/* Destroy the QP if present (not a listener) */
 	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
-- 
cgit v1.2.3


From dadf3e435debb85dfcf28c157012047153a21a97 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Apr 2017 13:07:21 -0400
Subject: svcrdma: Clean out old XDR encoders

Clean up: These have been replaced and are no longer used.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h        |  4 --
 net/sunrpc/xprtrdma/svc_rdma_marshal.c | 70 ----------------------------------
 2 files changed, 74 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 479bb7f65233..f3787d800ba4 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -187,10 +187,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
 
 /* svc_rdma_marshal.c */
 extern int svc_rdma_xdr_decode_req(struct xdr_buf *);
-extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
-extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
-					    __be32, __be64, u32);
-extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp);
 
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index ae58a897eca0..bdcf7d85a3dc 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -166,73 +166,3 @@ out_inval:
 	dprintk("svcrdma: failed to parse transport header\n");
 	return -EINVAL;
 }
-
-/**
- * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
- * @rdma_resp: buffer containing Reply transport header
- *
- * Returns length of transport header, in bytes.
- */
-unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp)
-{
-	unsigned int nsegs;
-	__be32 *p;
-
-	p = rdma_resp;
-
-	/* RPC-over-RDMA V1 replies never have a Read list. */
-	p += rpcrdma_fixed_maxsz + 1;
-
-	/* Skip Write list. */
-	while (*p++ != xdr_zero) {
-		nsegs = be32_to_cpup(p++);
-		p += nsegs * rpcrdma_segment_maxsz;
-	}
-
-	/* Skip Reply chunk. */
-	if (*p++ != xdr_zero) {
-		nsegs = be32_to_cpup(p++);
-		p += nsegs * rpcrdma_segment_maxsz;
-	}
-
-	return (unsigned long)p - (unsigned long)rdma_resp;
-}
-
-void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
-{
-	struct rpcrdma_write_array *ary;
-
-	/* no read-list */
-	rmsgp->rm_body.rm_chunks[0] = xdr_zero;
-
-	/* write-array discrim */
-	ary = (struct rpcrdma_write_array *)
-		&rmsgp->rm_body.rm_chunks[1];
-	ary->wc_discrim = xdr_one;
-	ary->wc_nchunks = cpu_to_be32(chunks);
-
-	/* write-list terminator */
-	ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
-
-	/* reply-array discriminator */
-	ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
-}
-
-void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
-				 int chunks)
-{
-	ary->wc_discrim = xdr_one;
-	ary->wc_nchunks = cpu_to_be32(chunks);
-}
-
-void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
-				     int chunk_no,
-				     __be32 rs_handle,
-				     __be64 rs_offset,
-				     u32 write_len)
-{
-	struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
-	seg->rs_handle = rs_handle;
-	seg->rs_offset = rs_offset;
-	seg->rs_length = cpu_to_be32(write_len);
-}
-- 
cgit v1.2.3


From e56efe9322c5bec9ad4f301c8102fcd630694b4c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 8 Apr 2017 18:09:59 +0100
Subject: lockd: remove redundant check on block

A null check followed by a return is being performed already, so block
is always non-null at the second check on block, hence we can remove
this redundant null-check (Detected by PVS-Studio).  Also re-work
comment to clean up a check-patch warning.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svclock.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 5581e020644b..3507c80d1d4b 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -870,15 +870,15 @@ nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status)
 	if (!(block = nlmsvc_find_block(cookie)))
 		return;
 
-	if (block) {
-		if (status == nlm_lck_denied_grace_period) {
-			/* Try again in a couple of seconds */
-			nlmsvc_insert_block(block, 10 * HZ);
-		} else {
-			/* Lock is now held by client, or has been rejected.
-			 * In both cases, the block should be removed. */
-			nlmsvc_unlink_block(block);
-		}
+	if (status == nlm_lck_denied_grace_period) {
+		/* Try again in a couple of seconds */
+		nlmsvc_insert_block(block, 10 * HZ);
+	} else {
+		/*
+		 * Lock is now held by client, or has been rejected.
+		 * In both cases, the block should be removed.
+		 */
+		nlmsvc_unlink_block(block);
 	}
 	nlmsvc_release_block(block);
 }
-- 
cgit v1.2.3


From 8bf8f2e8c786f37991bd27332c75edcc524d2232 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 19 Apr 2017 05:12:16 +1000
Subject: powerpc/64s: Revert setting of LPCR[LPES] on POWER9

The XIVE enablement patches included a change to set the LPES (Logical
Partitioning Environment Selector) bit (bit # 3) in LPCR (Logical Partitioning
Control Register) on POWER9 hosts. This bit sets external interrupts to guest
delivery mode, which uses SRR0/1. The host's EE interrupt handler is written to
expect HSRR0/1 (for earlier CPUs). This should be fine because XIVE is
configured not to deliver EEs to the host (Hypervisor Virtulization Interrupt is
used instead) so the EE handler should never be executed.

However a bug in interrupt controller code, hardware, or odd configuration of a
simulator could result in the host getting an EE incorrectly. Keeping the EE
delivery mode matching the host EE handler prevents strange crashes due to using
the wrong exception registers.

KVM will configure the LPCR to set LPES prior to running a guest so that EEs are
delivered to the guest using SRR0/1.

Fixes: 08a1e650cc ("powerpc: Fixup LPCR:PECE and HEIC setting on POWER9")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Massage change log to avoid referring to LPES0 which is now renamed LPES]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/cpu_setup_power.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 7013ae3d1675..1fce4ddd2e6c 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -107,7 +107,7 @@ _GLOBAL(__setup_cpu_power9)
 	or	r3, r3, r4
 	LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
 	andc	r3, r3, r4
-	li	r4,(LPCR_LPES0 >> LPCR_LPES_SH)
+	li	r4,0 /* LPES = 0 */
 	bl	__init_LPCR
 	bl	__init_HFSCR
 	bl	__init_tlb_power9
@@ -131,7 +131,7 @@ _GLOBAL(__restore_cpu_power9)
 	or	r3, r3, r4
 	LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
 	andc	r3, r3, r4
-	li	r4,(LPCR_LPES0 >> LPCR_LPES_SH)
+	li	r4,0 /* LPES = 0 */
 	bl	__init_LPCR
 	bl	__init_HFSCR
 	bl	__init_tlb_power9
-- 
cgit v1.2.3


From fdb980fb17c6a474614dd984b500418a2811d9e7 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 25 Apr 2017 22:36:38 +0300
Subject: dmaengine: virt-dma: Convert to use list_for_each_entry_safe()

Use list_for_each_entry_safe() instead of open coding variants.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/virt-dma.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index e47fc9b0944f..545e97279083 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -86,7 +86,7 @@ EXPORT_SYMBOL_GPL(vchan_find_desc);
 static void vchan_complete(unsigned long arg)
 {
 	struct virt_dma_chan *vc = (struct virt_dma_chan *)arg;
-	struct virt_dma_desc *vd;
+	struct virt_dma_desc *vd, *_vd;
 	struct dmaengine_desc_callback cb;
 	LIST_HEAD(head);
 
@@ -103,8 +103,7 @@ static void vchan_complete(unsigned long arg)
 
 	dmaengine_desc_callback_invoke(&cb, NULL);
 
-	while (!list_empty(&head)) {
-		vd = list_first_entry(&head, struct virt_dma_desc, node);
+	list_for_each_entry_safe(vd, _vd, &head, node) {
 		dmaengine_desc_get_callback(&vd->tx, &cb);
 
 		list_del(&vd->node);
@@ -119,9 +118,9 @@ static void vchan_complete(unsigned long arg)
 
 void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
 {
-	while (!list_empty(head)) {
-		struct virt_dma_desc *vd = list_first_entry(head,
-			struct virt_dma_desc, node);
+	struct virt_dma_desc *vd, *_vd;
+
+	list_for_each_entry_safe(vd, _vd, head, node) {
 		if (dmaengine_desc_test_reuse(&vd->tx)) {
 			list_move_tail(&vd->node, &vc->desc_allocated);
 		} else {
-- 
cgit v1.2.3


From bdf95923086fb359ccb44c815724c3ace1611c90 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Tue, 25 Apr 2017 15:27:52 +0530
Subject: iommu/arm-smmu: Return IOVA in iova_to_phys when SMMU is bypassed

For software initiated address translation, when domain type is
IOMMU_DOMAIN_IDENTITY i.e SMMU is bypassed, mimic HW behavior
i.e return the same IOVA as translated address.

This patch is an extension to Will Deacon's patchset
"Implement SMMU passthrough using the default domain".

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm-smmu-v3.c | 3 +++
 drivers/iommu/arm-smmu.c    | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 803352d78d43..6ef9c3ed4344 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1714,6 +1714,9 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
 
+	if (domain->type == IOMMU_DOMAIN_IDENTITY)
+		return iova;
+
 	if (!ops)
 		return 0;
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 77242afa1efc..a36e80abf084 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1459,6 +1459,9 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
 
+	if (domain->type == IOMMU_DOMAIN_IDENTITY)
+		return iova;
+
 	if (!ops)
 		return 0;
 
-- 
cgit v1.2.3


From 4a99f3c83dc493c8ea84693d78cd792839c8aa64 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 24 Apr 2017 22:26:40 +0300
Subject: ovl: do not set overlay.opaque on non-dir create

The optimization for opaque dir create was wrongly being applied
also to non-dir create.

Fixes: 97c684cc9110 ("ovl: create directories inside merged parent opaque")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Cc: <stable@vger.kernel.org> # v4.10
---
 fs/overlayfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 6515796460df..bfabc65fdc74 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -210,7 +210,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 	if (err)
 		goto out_dput;
 
-	if (ovl_type_merge(dentry->d_parent)) {
+	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
 		/* Setting opaque here is just an optimization, allow to fail */
 		ovl_set_opaque(dentry, newdentry);
 	}
-- 
cgit v1.2.3


From 4dbd80fb9176f23c78cecd0a8285001cd2066425 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Wed, 8 Mar 2017 10:25:51 +0800
Subject: btrfs: Fix metadata underflow caused by btrfs_reloc_clone_csum error

[BUG]
When btrfs_reloc_clone_csum() reports error, it can underflow metadata
and leads to kernel assertion on outstanding extents in
run_delalloc_nocow() and cow_file_range().

 BTRFS info (device vdb5): relocating block group 12582912 flags data
 BTRFS info (device vdb5): found 1 extents
 assertion failed: inode->outstanding_extents >= num_extents, file: fs/btrfs//extent-tree.c, line: 5858

Currently, due to another bug blocking ordered extents, the bug is only
reproducible under certain block group layout and using error injection.

a) Create one data block group with one 4K extent in it.
   To avoid the bug that hangs btrfs due to ordered extent which never
   finishes
b) Make btrfs_reloc_clone_csum() always fail
c) Relocate that block group

[CAUSE]
run_delalloc_nocow() and cow_file_range() handles error from
btrfs_reloc_clone_csum() wrongly:

(The ascii chart shows a more generic case of this bug other than the
bug mentioned above)

|<------------------ delalloc range --------------------------->|
| OE 1 | OE 2 | ... | OE n |
                    |<----------- cleanup range --------------->|
|<-----------  ----------->|
             \/
 btrfs_finish_ordered_io() range

So error handler, which calls extent_clear_unlock_delalloc() with
EXTENT_DELALLOC and EXTENT_DO_ACCOUNT bits, and btrfs_finish_ordered_io()
will both cover OE n, and free its metadata, causing metadata under flow.

[Fix]
The fix is to ensure after calling btrfs_add_ordered_extent(), we only
call error handler after increasing the iteration offset, so that
cleanup range won't cover any created ordered extent.

|<------------------ delalloc range --------------------------->|
| OE 1 | OE 2 | ... | OE n |
|<-----------  ----------->|<---------- cleanup range --------->|
             \/
 btrfs_finish_ordered_io() range

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/inode.c | 51 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 55ed2c4829a8..844bb896f5ac 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -998,15 +998,24 @@ static noinline int cow_file_range(struct inode *inode,
 		    BTRFS_DATA_RELOC_TREE_OBJECTID) {
 			ret = btrfs_reloc_clone_csums(inode, start,
 						      cur_alloc_size);
+			/*
+			 * Only drop cache here, and process as normal.
+			 *
+			 * We must not allow extent_clear_unlock_delalloc()
+			 * at out_unlock label to free meta of this ordered
+			 * extent, as its meta should be freed by
+			 * btrfs_finish_ordered_io().
+			 *
+			 * So we must continue until @start is increased to
+			 * skip current ordered extent.
+			 */
 			if (ret)
-				goto out_drop_extent_cache;
+				btrfs_drop_extent_cache(BTRFS_I(inode), start,
+						start + ram_size - 1, 0);
 		}
 
 		btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 
-		if (disk_num_bytes < cur_alloc_size)
-			break;
-
 		/* we're not doing compressed IO, don't unlock the first
 		 * page (which the caller expects to stay locked), don't
 		 * clear any dirty bits and don't set any writeback bits
@@ -1022,10 +1031,21 @@ static noinline int cow_file_range(struct inode *inode,
 					     delalloc_end, locked_page,
 					     EXTENT_LOCKED | EXTENT_DELALLOC,
 					     op);
-		disk_num_bytes -= cur_alloc_size;
+		if (disk_num_bytes < cur_alloc_size)
+			disk_num_bytes = 0;
+		else
+			disk_num_bytes -= cur_alloc_size;
 		num_bytes -= cur_alloc_size;
 		alloc_hint = ins.objectid + ins.offset;
 		start += cur_alloc_size;
+
+		/*
+		 * btrfs_reloc_clone_csums() error, since start is increased
+		 * extent_clear_unlock_delalloc() at out_unlock label won't
+		 * free metadata of current ordered extent, we're OK to exit.
+		 */
+		if (ret)
+			goto out_unlock;
 	}
 out:
 	return ret;
@@ -1414,15 +1434,14 @@ out_check:
 		BUG_ON(ret); /* -ENOMEM */
 
 		if (root->root_key.objectid ==
-		    BTRFS_DATA_RELOC_TREE_OBJECTID) {
+		    BTRFS_DATA_RELOC_TREE_OBJECTID)
+			/*
+			 * Error handled later, as we must prevent
+			 * extent_clear_unlock_delalloc() in error handler
+			 * from freeing metadata of created ordered extent.
+			 */
 			ret = btrfs_reloc_clone_csums(inode, cur_offset,
 						      num_bytes);
-			if (ret) {
-				if (!nolock && nocow)
-					btrfs_end_write_no_snapshoting(root);
-				goto error;
-			}
-		}
 
 		extent_clear_unlock_delalloc(inode, cur_offset,
 					     cur_offset + num_bytes - 1, end,
@@ -1434,6 +1453,14 @@ out_check:
 		if (!nolock && nocow)
 			btrfs_end_write_no_snapshoting(root);
 		cur_offset = extent_end;
+
+		/*
+		 * btrfs_reloc_clone_csums() error, now we're OK to call error
+		 * handler, as metadata for created ordered extent will only
+		 * be freed by btrfs_finish_ordered_io().
+		 */
+		if (ret)
+			goto error;
 		if (cur_offset > end)
 			break;
 	}
-- 
cgit v1.2.3


From 524272607e882d04e6d1a70d41fcbed819445ab9 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Wed, 8 Mar 2017 10:25:52 +0800
Subject: btrfs: Handle delalloc error correctly to avoid ordered extent hang

[BUG]
If run_delalloc_range() returns error and there is already some ordered
extents created, btrfs will be hanged with the following backtrace:

Call Trace:
 __schedule+0x2d4/0xae0
 schedule+0x3d/0x90
 btrfs_start_ordered_extent+0x160/0x200 [btrfs]
 ? wake_atomic_t_function+0x60/0x60
 btrfs_run_ordered_extent_work+0x25/0x40 [btrfs]
 btrfs_scrubparity_helper+0x1c1/0x620 [btrfs]
 btrfs_flush_delalloc_helper+0xe/0x10 [btrfs]
 process_one_work+0x2af/0x720
 ? process_one_work+0x22b/0x720
 worker_thread+0x4b/0x4f0
 kthread+0x10f/0x150
 ? process_one_work+0x720/0x720
 ? kthread_create_on_node+0x40/0x40
 ret_from_fork+0x2e/0x40

[CAUSE]

|<------------------ delalloc range --------------------------->|
| OE 1 | OE 2 | ... | OE n |
|<>|                       |<---------- cleanup range --------->|
 ||
 \_=> First page handled by end_extent_writepage() in __extent_writepage()

The problem is caused by error handler of run_delalloc_range(), which
doesn't handle any created ordered extents, leaving them waiting on
btrfs_finish_ordered_io() to finish.

However after run_delalloc_range() returns error, __extent_writepage()
won't submit bio, so btrfs_writepage_end_io_hook() won't be triggered
except the first page, and btrfs_finish_ordered_io() won't be triggered
for created ordered extents either.

So OE 2~n will hang forever, and if OE 1 is larger than one page, it
will also hang.

[FIX]
Introduce btrfs_cleanup_ordered_extents() function to cleanup created
ordered extents and finish them manually.

The function is based on existing
btrfs_endio_direct_write_update_ordered() function, and modify it to
act just like btrfs_writepage_endio_hook() but handles specified range
other than one page.

After fix, delalloc error will be handled like:

|<------------------ delalloc range --------------------------->|
| OE 1 | OE 2 | ... | OE n |
|<>|<--------  ----------->|<------ old error handler --------->|
 ||          ||
 ||          \_=> Cleaned up by cleanup_ordered_extents()
 \_=> First page handled by end_extent_writepage() in __extent_writepage()

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/inode.c | 63 ++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 844bb896f5ac..b8b2a3cbdbe1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -115,6 +115,31 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
 				       u64 ram_bytes, int compress_type,
 				       int type);
 
+static void __endio_write_update_ordered(struct inode *inode,
+					 const u64 offset, const u64 bytes,
+					 const bool uptodate);
+
+/*
+ * Cleanup all submitted ordered extents in specified range to handle errors
+ * from the fill_dellaloc() callback.
+ *
+ * NOTE: caller must ensure that when an error happens, it can not call
+ * extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING
+ * and EXTENT_DELALLOC simultaneously, because that causes the reserved metadata
+ * to be released, which we want to happen only when finishing the ordered
+ * extent (btrfs_finish_ordered_io()). Also note that the caller of the
+ * fill_delalloc() callback already does proper cleanup for the first page of
+ * the range, that is, it invokes the callback writepage_end_io_hook() for the
+ * range of the first page.
+ */
+static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
+						 const u64 offset,
+						 const u64 bytes)
+{
+	return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
+					    bytes - PAGE_SIZE, false);
+}
+
 static int btrfs_dirty_inode(struct inode *inode);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
@@ -1536,6 +1561,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
 		ret = cow_file_range_async(inode, locked_page, start, end,
 					   page_started, nr_written);
 	}
+	if (ret)
+		btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
 	return ret;
 }
 
@@ -8154,17 +8181,26 @@ static void btrfs_endio_direct_read(struct bio *bio)
 	bio_put(bio);
 }
 
-static void btrfs_endio_direct_write_update_ordered(struct inode *inode,
-						    const u64 offset,
-						    const u64 bytes,
-						    const int uptodate)
+static void __endio_write_update_ordered(struct inode *inode,
+					 const u64 offset, const u64 bytes,
+					 const bool uptodate)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ordered_extent *ordered = NULL;
+	struct btrfs_workqueue *wq;
+	btrfs_work_func_t func;
 	u64 ordered_offset = offset;
 	u64 ordered_bytes = bytes;
 	int ret;
 
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+		wq = fs_info->endio_freespace_worker;
+		func = btrfs_freespace_write_helper;
+	} else {
+		wq = fs_info->endio_write_workers;
+		func = btrfs_endio_write_helper;
+	}
+
 again:
 	ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
 						   &ordered_offset,
@@ -8173,9 +8209,8 @@ again:
 	if (!ret)
 		goto out_test;
 
-	btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
-			finish_ordered_fn, NULL, NULL);
-	btrfs_queue_work(fs_info->endio_write_workers, &ordered->work);
+	btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
+	btrfs_queue_work(wq, &ordered->work);
 out_test:
 	/*
 	 * our bio might span multiple ordered extents.  If we haven't
@@ -8193,10 +8228,8 @@ static void btrfs_endio_direct_write(struct bio *bio)
 	struct btrfs_dio_private *dip = bio->bi_private;
 	struct bio *dio_bio = dip->dio_bio;
 
-	btrfs_endio_direct_write_update_ordered(dip->inode,
-						dip->logical_offset,
-						dip->bytes,
-						!bio->bi_error);
+	__endio_write_update_ordered(dip->inode, dip->logical_offset,
+				     dip->bytes, !bio->bi_error);
 
 	kfree(dip);
 
@@ -8557,10 +8590,10 @@ free_ordered:
 		io_bio = NULL;
 	} else {
 		if (write)
-			btrfs_endio_direct_write_update_ordered(inode,
+			__endio_write_update_ordered(inode,
 						file_offset,
 						dio_bio->bi_iter.bi_size,
-						0);
+						false);
 		else
 			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
 			      file_offset + dio_bio->bi_iter.bi_size - 1);
@@ -8695,11 +8728,11 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 			 */
 			if (dio_data.unsubmitted_oe_range_start <
 			    dio_data.unsubmitted_oe_range_end)
-				btrfs_endio_direct_write_update_ordered(inode,
+				__endio_write_update_ordered(inode,
 					dio_data.unsubmitted_oe_range_start,
 					dio_data.unsubmitted_oe_range_end -
 					dio_data.unsubmitted_oe_range_start,
-					0);
+					false);
 		} else if (ret >= 0 && (size_t)ret < count)
 			btrfs_delalloc_release_space(inode, offset,
 						     count - (size_t)ret);
-- 
cgit v1.2.3


From a315e68f6e8b3006c29482dbfc4d928f098c449c Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 6 Mar 2017 23:04:20 +0000
Subject: Btrfs: fix invalid attempt to free reserved space on failure to cow
 range

When attempting to COW a file range (we are starting writeback and doing
COW), if we manage to reserve an extent for the range we will write into
but fail after reserving it and before creating the respective ordered
extent, we end up in an error path where we attempt to decrement the
data space's bytes_may_use counter after we already did it while
reserving the extent, leading to a warning/trace like the following:

[  847.621524] ------------[ cut here ]------------
[  847.625441] WARNING: CPU: 5 PID: 4905 at fs/btrfs/extent-tree.c:4316 btrfs_free_reserved_data_space_noquota+0x60/0x9f [btrfs]
[  847.633704] Modules linked in: btrfs crc32c_generic xor raid6_pq acpi_cpufreq i2c_piix4 ppdev psmouse tpm_tis serio_raw pcspkr parport_pc tpm_tis_core i2c_core sg
[  847.644616] CPU: 5 PID: 4905 Comm: xfs_io Not tainted 4.10.0-rc8-btrfs-next-37+ #2
[  847.648601] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
[  847.648601] Call Trace:
[  847.648601]  dump_stack+0x67/0x90
[  847.648601]  __warn+0xc2/0xdd
[  847.648601]  warn_slowpath_null+0x1d/0x1f
[  847.648601]  btrfs_free_reserved_data_space_noquota+0x60/0x9f [btrfs]
[  847.648601]  btrfs_clear_bit_hook+0x140/0x258 [btrfs]
[  847.648601]  clear_state_bit+0x87/0x128 [btrfs]
[  847.648601]  __clear_extent_bit+0x222/0x2b7 [btrfs]
[  847.648601]  clear_extent_bit+0x17/0x19 [btrfs]
[  847.648601]  extent_clear_unlock_delalloc+0x3b/0x6b [btrfs]
[  847.648601]  cow_file_range.isra.39+0x387/0x39a [btrfs]
[  847.648601]  run_delalloc_nocow+0x4d7/0x70e [btrfs]
[  847.648601]  ? arch_local_irq_save+0x9/0xc
[  847.648601]  run_delalloc_range+0xa7/0x2b5 [btrfs]
[  847.648601]  writepage_delalloc.isra.31+0xb9/0x15c [btrfs]
[  847.648601]  __extent_writepage+0x249/0x2e8 [btrfs]
[  847.648601]  extent_write_cache_pages.constprop.33+0x28b/0x36c [btrfs]
[  847.648601]  ? arch_local_irq_save+0x9/0xc
[  847.648601]  ? mark_lock+0x24/0x201
[  847.648601]  extent_writepages+0x4b/0x5c [btrfs]
[  847.648601]  ? btrfs_writepage_start_hook+0xed/0xed [btrfs]
[  847.648601]  btrfs_writepages+0x28/0x2a [btrfs]
[  847.648601]  do_writepages+0x23/0x2c
[  847.648601]  __filemap_fdatawrite_range+0x5a/0x61
[  847.648601]  filemap_fdatawrite_range+0x13/0x15
[  847.648601]  btrfs_fdatawrite_range+0x20/0x46 [btrfs]
[  847.648601]  start_ordered_ops+0x19/0x23 [btrfs]
[  847.648601]  btrfs_sync_file+0x136/0x42c [btrfs]
[  847.648601]  vfs_fsync_range+0x8c/0x9e
[  847.648601]  vfs_fsync+0x1c/0x1e
[  847.648601]  do_fsync+0x31/0x4a
[  847.648601]  SyS_fsync+0x10/0x14
[  847.648601]  entry_SYSCALL_64_fastpath+0x18/0xad
[  847.648601] RIP: 0033:0x7f5b05200800
[  847.648601] RSP: 002b:00007ffe204f71c8 EFLAGS: 00000246 ORIG_RAX: 000000000000004a
[  847.648601] RAX: ffffffffffffffda RBX: ffffffff8109637b RCX: 00007f5b05200800
[  847.648601] RDX: 00000000008bd0a0 RSI: 00000000008bd2e0 RDI: 0000000000000003
[  847.648601] RBP: ffffc90001d67f98 R08: 000000000000ffff R09: 000000000000001f
[  847.648601] R10: 00000000000001f6 R11: 0000000000000246 R12: 0000000000000046
[  847.648601] R13: ffffc90001d67f78 R14: 00007f5b054be740 R15: 00007f5b054be740
[  847.648601]  ? trace_hardirqs_off_caller+0x3f/0xaa
[  847.685787] ---[ end trace 2a4a3e15382508e8 ]---

So fix this by not attempting to decrement the data space info's
bytes_may_use counter if we already reserved the extent and an error
happened before creating the ordered extent. We are already correctly
freeing the reserved extent if an error happens, so there's no additional
measure needed.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/extent_io.h |  4 +++-
 fs/btrfs/inode.c     | 59 +++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 3e4fad4a909d..48a30d0e71fb 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -14,7 +14,7 @@
 #define EXTENT_DEFRAG		(1U << 6)
 #define EXTENT_BOUNDARY		(1U << 9)
 #define EXTENT_NODATASUM	(1U << 10)
-#define EXTENT_DO_ACCOUNTING	(1U << 11)
+#define EXTENT_CLEAR_META_RESV	(1U << 11)
 #define EXTENT_FIRST_DELALLOC	(1U << 12)
 #define EXTENT_NEED_WAIT	(1U << 13)
 #define EXTENT_DAMAGED		(1U << 14)
@@ -22,6 +22,8 @@
 #define EXTENT_QGROUP_RESERVED	(1U << 16)
 #define EXTENT_CLEAR_DATA_RESV	(1U << 17)
 #define EXTENT_IOBITS		(EXTENT_LOCKED | EXTENT_WRITEBACK)
+#define EXTENT_DO_ACCOUNTING    (EXTENT_CLEAR_META_RESV | \
+				 EXTENT_CLEAR_DATA_RESV)
 #define EXTENT_CTLBITS		(EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
 
 /*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b8b2a3cbdbe1..bfe04afa6277 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -943,10 +943,13 @@ static noinline int cow_file_range(struct inode *inode,
 	u64 num_bytes;
 	unsigned long ram_size;
 	u64 disk_num_bytes;
-	u64 cur_alloc_size;
+	u64 cur_alloc_size = 0;
 	u64 blocksize = fs_info->sectorsize;
 	struct btrfs_key ins;
 	struct extent_map *em;
+	unsigned clear_bits;
+	unsigned long page_ops;
+	bool extent_reserved = false;
 	int ret = 0;
 
 	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -991,14 +994,14 @@ static noinline int cow_file_range(struct inode *inode,
 			start + num_bytes - 1, 0);
 
 	while (disk_num_bytes > 0) {
-		unsigned long op;
-
 		cur_alloc_size = disk_num_bytes;
 		ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
 					   fs_info->sectorsize, 0, alloc_hint,
 					   &ins, 1, 1);
 		if (ret < 0)
 			goto out_unlock;
+		cur_alloc_size = ins.offset;
+		extent_reserved = true;
 
 		ram_size = ins.offset;
 		em = create_io_em(inode, start, ins.offset, /* len */
@@ -1013,7 +1016,6 @@ static noinline int cow_file_range(struct inode *inode,
 			goto out_reserve;
 		free_extent_map(em);
 
-		cur_alloc_size = ins.offset;
 		ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
 					       ram_size, cur_alloc_size, 0);
 		if (ret)
@@ -1048,14 +1050,14 @@ static noinline int cow_file_range(struct inode *inode,
 		 * Do set the Private2 bit so we know this page was properly
 		 * setup for writepage
 		 */
-		op = unlock ? PAGE_UNLOCK : 0;
-		op |= PAGE_SET_PRIVATE2;
+		page_ops = unlock ? PAGE_UNLOCK : 0;
+		page_ops |= PAGE_SET_PRIVATE2;
 
 		extent_clear_unlock_delalloc(inode, start,
 					     start + ram_size - 1,
 					     delalloc_end, locked_page,
 					     EXTENT_LOCKED | EXTENT_DELALLOC,
-					     op);
+					     page_ops);
 		if (disk_num_bytes < cur_alloc_size)
 			disk_num_bytes = 0;
 		else
@@ -1063,6 +1065,7 @@ static noinline int cow_file_range(struct inode *inode,
 		num_bytes -= cur_alloc_size;
 		alloc_hint = ins.objectid + ins.offset;
 		start += cur_alloc_size;
+		extent_reserved = false;
 
 		/*
 		 * btrfs_reloc_clone_csums() error, since start is increased
@@ -1081,12 +1084,35 @@ out_reserve:
 	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
 out_unlock:
+	clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DEFRAG |
+		EXTENT_CLEAR_META_RESV;
+	page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
+		PAGE_END_WRITEBACK;
+	/*
+	 * If we reserved an extent for our delalloc range (or a subrange) and
+	 * failed to create the respective ordered extent, then it means that
+	 * when we reserved the extent we decremented the extent's size from
+	 * the data space_info's bytes_may_use counter and incremented the
+	 * space_info's bytes_reserved counter by the same amount. We must make
+	 * sure extent_clear_unlock_delalloc() does not try to decrement again
+	 * the data space_info's bytes_may_use counter, therefore we do not pass
+	 * it the flag EXTENT_CLEAR_DATA_RESV.
+	 */
+	if (extent_reserved) {
+		extent_clear_unlock_delalloc(inode, start,
+					     start + cur_alloc_size,
+					     start + cur_alloc_size,
+					     locked_page,
+					     clear_bits,
+					     page_ops);
+		start += cur_alloc_size;
+		if (start >= end)
+			goto out;
+	}
 	extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
 				     locked_page,
-				     EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
-				     EXTENT_DELALLOC | EXTENT_DEFRAG,
-				     PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
-				     PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
+				     clear_bits | EXTENT_CLEAR_DATA_RESV,
+				     page_ops);
 	goto out;
 }
 
@@ -1776,7 +1802,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 
 		if (*bits & EXTENT_FIRST_DELALLOC) {
 			*bits &= ~EXTENT_FIRST_DELALLOC;
-		} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
+		} else if (!(*bits & EXTENT_CLEAR_META_RESV)) {
 			spin_lock(&inode->lock);
 			inode->outstanding_extents -= num_extents;
 			spin_unlock(&inode->lock);
@@ -1787,7 +1813,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 		 * don't need to call dellalloc_release_metadata if there is an
 		 * error.
 		 */
-		if (*bits & EXTENT_DO_ACCOUNTING &&
+		if (*bits & EXTENT_CLEAR_META_RESV &&
 		    root != fs_info->tree_root)
 			btrfs_delalloc_release_metadata(inode, len);
 
@@ -1795,10 +1821,9 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 		if (btrfs_is_testing(fs_info))
 			return;
 
-		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
-		    && do_list && !(state->state & EXTENT_NORESERVE)
-		    && (*bits & (EXTENT_DO_ACCOUNTING |
-		    EXTENT_CLEAR_DATA_RESV)))
+		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
+		    do_list && !(state->state & EXTENT_NORESERVE) &&
+		    (*bits & EXTENT_CLEAR_DATA_RESV))
 			btrfs_free_reserved_data_space_noquota(
 					&inode->vfs_inode,
 					state->start, len);
-- 
cgit v1.2.3


From 1c81ba237bcecad9bc885a1ddcf02d725ea38482 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 8 Mar 2017 16:43:49 +0000
Subject: Btrfs: fix incorrect space accounting after failure to insert inline
 extent

When using compression, if we fail to insert an inline extent we
incorrectly end up attempting to free the reserved data space twice,
once through extent_clear_unlock_delalloc(), because we pass it the
flag EXTENT_DO_ACCOUNTING, and once through a direct call to
btrfs_free_reserved_data_space_noquota(). This results in a trace
like the following:

[  834.576240] ------------[ cut here ]------------
[  834.576825] WARNING: CPU: 2 PID: 486 at fs/btrfs/extent-tree.c:4316 btrfs_free_reserved_data_space_noquota+0x60/0x9f [btrfs]
[  834.579501] Modules linked in: btrfs crc32c_generic xor raid6_pq ppdev i2c_piix4 acpi_cpufreq psmouse tpm_tis parport_pc pcspkr serio_raw tpm_tis_core sg parport evdev i2c_core tpm button loop autofs4 ext4 crc16 jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix virtio_pci libata virtio_ring virtio scsi_mod e1000 floppy [last unloaded: btrfs]
[  834.592116] CPU: 2 PID: 486 Comm: kworker/u32:4 Not tainted 4.10.0-rc8-btrfs-next-37+ #2
[  834.593316] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
[  834.595273] Workqueue: btrfs-delalloc btrfs_delalloc_helper [btrfs]
[  834.596103] Call Trace:
[  834.596103]  dump_stack+0x67/0x90
[  834.596103]  __warn+0xc2/0xdd
[  834.596103]  warn_slowpath_null+0x1d/0x1f
[  834.596103]  btrfs_free_reserved_data_space_noquota+0x60/0x9f [btrfs]
[  834.596103]  compress_file_range.constprop.42+0x2fa/0x3fc [btrfs]
[  834.596103]  ? submit_compressed_extents+0x3a7/0x3a7 [btrfs]
[  834.596103]  async_cow_start+0x32/0x4d [btrfs]
[  834.596103]  btrfs_scrubparity_helper+0x187/0x3e7 [btrfs]
[  834.596103]  btrfs_delalloc_helper+0xe/0x10 [btrfs]
[  834.596103]  process_one_work+0x273/0x4e4
[  834.596103]  worker_thread+0x1eb/0x2ca
[  834.596103]  ? rescuer_thread+0x2b6/0x2b6
[  834.596103]  kthread+0x100/0x108
[  834.596103]  ? __list_del_entry+0x22/0x22
[  834.596103]  ret_from_fork+0x2e/0x40
[  834.611656] ---[ end trace 719902fe6bdef08f ]---

So fix this by not calling directly btrfs_free_reserved_data_space_noquota()
if an error happened.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/inode.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bfe04afa6277..707fccc5279b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -590,8 +590,10 @@ cont:
 						     PAGE_SET_WRITEBACK |
 						     page_error_op |
 						     PAGE_END_WRITEBACK);
-			btrfs_free_reserved_data_space_noquota(inode, start,
-						end - start + 1);
+			if (ret == 0)
+				btrfs_free_reserved_data_space_noquota(inode,
+							       start,
+							       end - start + 1);
 			goto free_pages_out;
 		}
 	}
-- 
cgit v1.2.3


From be2d253cc98244765323a7c94cc1ac5cd5a17072 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 3 Apr 2017 15:57:17 +0100
Subject: Btrfs: fix extent map leak during fallocate error path

If the call to btrfs_qgroup_reserve_data() failed, we were leaking an
extent map structure. The failure can happen either due to an -ENOMEM
condition or, when quotas are enabled, due to -EDQUOT for example.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 48dfb8e4baf2..56304c400db5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2856,8 +2856,10 @@ static long btrfs_fallocate(struct file *file, int mode,
 			}
 			ret = btrfs_qgroup_reserve_data(inode, cur_offset,
 					last_byte - cur_offset);
-			if (ret < 0)
+			if (ret < 0) {
+				free_extent_map(em);
 				break;
+			}
 		} else {
 			/*
 			 * Do not need to reserve unwritten extent for this
-- 
cgit v1.2.3


From e1cbfd7bf6dabdac561c75d08357571f44040a45 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 4 Apr 2017 20:31:00 +0100
Subject: Btrfs: send, fix file hole not being preserved due to inline extent

Normally we don't have inline extents followed by regular extents, but
there's currently at least one harmless case where this happens. For
example, when the page size is 4Kb and compression is enabled:

  $ mkfs.btrfs -f /dev/sdb
  $ mount -o compress /dev/sdb /mnt
  $ xfs_io -f -c "pwrite -S 0xaa 0 4K" -c "fsync" /mnt/foobar
  $ xfs_io -c "pwrite -S 0xbb 8K 4K" -c "fsync" /mnt/foobar

In this case we get a compressed inline extent, representing 4Kb of
data, followed by a hole extent and then a regular data extent. The
inline extent was not expanded/converted to a regular extent exactly
because it represents 4Kb of data. This does not cause any apparent
problem (such as the issue solved by commit e1699d2d7bf6
("btrfs: add missing memset while reading compressed inline extents"))
except trigger an unexpected case in the incremental send code path
that makes us issue an operation to write a hole when it's not needed,
resulting in more writes at the receiver and wasting space at the
receiver.

So teach the incremental send code to deal with this particular case.

The issue can be currently triggered by running fstests btrfs/137 with
compression enabled (MOUNT_OPTIONS="-o compress" ./check btrfs/137).

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/send.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index a60d5bfb8a49..5b40d617bb03 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5184,13 +5184,19 @@ static int is_extent_unchanged(struct send_ctx *sctx,
 	while (key.offset < ekey->offset + left_len) {
 		ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
 		right_type = btrfs_file_extent_type(eb, ei);
-		if (right_type != BTRFS_FILE_EXTENT_REG) {
+		if (right_type != BTRFS_FILE_EXTENT_REG &&
+		    right_type != BTRFS_FILE_EXTENT_INLINE) {
 			ret = 0;
 			goto out;
 		}
 
 		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
-		right_len = btrfs_file_extent_num_bytes(eb, ei);
+		if (right_type == BTRFS_FILE_EXTENT_INLINE) {
+			right_len = btrfs_file_extent_inline_len(eb, slot, ei);
+			right_len = PAGE_ALIGN(right_len);
+		} else {
+			right_len = btrfs_file_extent_num_bytes(eb, ei);
+		}
 		right_offset = btrfs_file_extent_offset(eb, ei);
 		right_gen = btrfs_file_extent_generation(eb, ei);
 
@@ -5204,6 +5210,19 @@ static int is_extent_unchanged(struct send_ctx *sctx,
 			goto out;
 		}
 
+		/*
+		 * We just wanted to see if when we have an inline extent, what
+		 * follows it is a regular extent (wanted to check the above
+		 * condition for inline extents too). This should normally not
+		 * happen but it's possible for example when we have an inline
+		 * compressed extent representing data with a size matching
+		 * the page size (currently the same as sector size).
+		 */
+		if (right_type == BTRFS_FILE_EXTENT_INLINE) {
+			ret = 0;
+			goto out;
+		}
+
 		left_offset_fixed = left_offset;
 		if (key.offset < ekey->offset) {
 			/* Fix the right offset for 2a and 7. */
-- 
cgit v1.2.3


From a7e3b975a0f9296162b72ac6ab7fad9631a07630 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 3 Apr 2017 10:45:46 +0100
Subject: Btrfs: fix reported number of inode blocks

Currently when there are buffered writes that were not yet flushed and
they fall within allocated ranges of the file (that is, not in holes or
beyond eof assuming there are no prealloc extents beyond eof), btrfs
simply reports an incorrect number of used blocks through the stat(2)
system call (or any of its variants), regardless of mount options or
inode flags (compress, compress-force, nodatacow). This is because the
number of blocks used that is reported is based on the current number
of bytes in the vfs inode plus the number of dealloc bytes in the btrfs
inode. The later covers bytes that both fall within allocated regions
of the file and holes.

Example scenarios where the number of reported blocks is wrong while the
buffered writes are not flushed:

  $ mkfs.btrfs -f /dev/sdc
  $ mount /dev/sdc /mnt/sdc

  $ xfs_io -f -c "pwrite -S 0xaa 0 64K" /mnt/sdc/foo1
  wrote 65536/65536 bytes at offset 0
  64 KiB, 16 ops; 0.0000 sec (259.336 MiB/sec and 66390.0415 ops/sec)

  $ sync

  $ xfs_io -c "pwrite -S 0xbb 0 64K" /mnt/sdc/foo1
  wrote 65536/65536 bytes at offset 0
  64 KiB, 16 ops; 0.0000 sec (192.308 MiB/sec and 49230.7692 ops/sec)

  # The following should have reported 64K...
  $ du -h /mnt/sdc/foo1
  128K	/mnt/sdc/foo1

  $ sync

  # After flushing the buffered write, it now reports the correct value.
  $ du -h /mnt/sdc/foo1
  64K	/mnt/sdc/foo1

  $ xfs_io -f -c "falloc -k 0 128K" -c "pwrite -S 0xaa 0 64K" /mnt/sdc/foo2
  wrote 65536/65536 bytes at offset 0
  64 KiB, 16 ops; 0.0000 sec (520.833 MiB/sec and 133333.3333 ops/sec)

  $ sync

  $ xfs_io -c "pwrite -S 0xbb 64K 64K" /mnt/sdc/foo2
  wrote 65536/65536 bytes at offset 65536
  64 KiB, 16 ops; 0.0000 sec (260.417 MiB/sec and 66666.6667 ops/sec)

  # The following should have reported 128K...
  $ du -h /mnt/sdc/foo2
  192K	/mnt/sdc/foo2

  $ sync

  # After flushing the buffered write, it now reports the correct value.
  $ du -h /mnt/sdc/foo2
  128K	/mnt/sdc/foo2

So the number of used file blocks is simply incorrect, unlike in other
filesystems such as ext4 and xfs for example, but only while the buffered
writes are not flushed.

Fix this by tracking the number of delalloc bytes that fall within holes
and beyond eof of a file, and use instead this new counter when reporting
the number of used blocks for an inode.

Another different problem that exists is that the delalloc bytes counter
is reset when writeback starts (by clearing the EXTENT_DEALLOC flag from
the respective range in the inode's iotree) and the vfs inode's bytes
counter is only incremented when writeback finishes (through
insert_reserved_file_extent()). Therefore while writeback is ongoing we
simply report a wrong number of blocks used by an inode if the write
operation covers a range previously unallocated. While this change does
not fix this problem, it does minimizes it a lot by shortening that time
window, as the new dealloc bytes counter (new_delalloc_bytes) is only
decremented when writeback finishes right before updating the vfs inode's
bytes counter. Fully fixing this second problem is not trivial and will
be addressed later by a different patch.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/btrfs_inode.h |  7 ++++++
 fs/btrfs/extent_io.h   |  1 +
 fs/btrfs/file.c        | 62 ++++++++++++++++++++++++++++++++++++++++++----
 fs/btrfs/inode.c       | 67 ++++++++++++++++++++++++++++++++++++++++----------
 4 files changed, 119 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 0c6baaba0651..b8622e4d1744 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -124,6 +124,13 @@ struct btrfs_inode {
 	 */
 	u64 delalloc_bytes;
 
+	/*
+	 * Total number of bytes pending delalloc that fall within a file
+	 * range that is either a hole or beyond EOF (and no prealloc extent
+	 * exists in the range). This is always <= delalloc_bytes.
+	 */
+	u64 new_delalloc_bytes;
+
 	/*
 	 * total number of bytes pending defrag, used by stat to check whether
 	 * it needs COW.
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 48a30d0e71fb..d5ff51b973a4 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -21,6 +21,7 @@
 #define EXTENT_NORESERVE	(1U << 15)
 #define EXTENT_QGROUP_RESERVED	(1U << 16)
 #define EXTENT_CLEAR_DATA_RESV	(1U << 17)
+#define EXTENT_DELALLOC_NEW	(1U << 18)
 #define EXTENT_IOBITS		(EXTENT_LOCKED | EXTENT_WRITEBACK)
 #define EXTENT_DO_ACCOUNTING    (EXTENT_CLEAR_META_RESV | \
 				 EXTENT_CLEAR_DATA_RESV)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 56304c400db5..f7d022bc7998 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1404,6 +1404,47 @@ fail:
 
 }
 
+static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
+					 const u64 start,
+					 const u64 len,
+					 struct extent_state **cached_state)
+{
+	u64 search_start = start;
+	const u64 end = start + len - 1;
+
+	while (search_start < end) {
+		const u64 search_len = end - search_start + 1;
+		struct extent_map *em;
+		u64 em_len;
+		int ret = 0;
+
+		em = btrfs_get_extent(inode, NULL, 0, search_start,
+				      search_len, 0);
+		if (IS_ERR(em))
+			return PTR_ERR(em);
+
+		if (em->block_start != EXTENT_MAP_HOLE)
+			goto next;
+
+		em_len = em->len;
+		if (em->start < search_start)
+			em_len -= search_start - em->start;
+		if (em_len > search_len)
+			em_len = search_len;
+
+		ret = set_extent_bit(&inode->io_tree, search_start,
+				     search_start + em_len - 1,
+				     EXTENT_DELALLOC_NEW,
+				     NULL, cached_state, GFP_NOFS);
+next:
+		search_start = extent_map_end(em);
+		free_extent_map(em);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
 /*
  * This function locks the extent and properly waits for data=ordered extents
  * to finish before allowing the pages to be modified if need.
@@ -1432,8 +1473,11 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
 		+ round_up(pos + write_bytes - start_pos,
 			   fs_info->sectorsize) - 1;
 
-	if (start_pos < inode->vfs_inode.i_size) {
+	if (start_pos < inode->vfs_inode.i_size ||
+	    (inode->flags & BTRFS_INODE_PREALLOC)) {
 		struct btrfs_ordered_extent *ordered;
+		unsigned int clear_bits;
+
 		lock_extent_bits(&inode->io_tree, start_pos, last_pos,
 				cached_state);
 		ordered = btrfs_lookup_ordered_range(inode, start_pos,
@@ -1454,11 +1498,19 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
 		}
 		if (ordered)
 			btrfs_put_ordered_extent(ordered);
-
+		ret = btrfs_find_new_delalloc_bytes(inode, start_pos,
+						    last_pos - start_pos + 1,
+						    cached_state);
+		clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC |
+			EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG;
+		if (ret)
+			clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED;
 		clear_extent_bit(&inode->io_tree, start_pos,
-				  last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
-				  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
-				  0, 0, cached_state, GFP_NOFS);
+				 last_pos, clear_bits,
+				 (clear_bits & EXTENT_LOCKED) ? 1 : 0,
+				 0, cached_state, GFP_NOFS);
+		if (ret)
+			return ret;
 		*lockstart = start_pos;
 		*lockend = last_pos;
 		ret = 1;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 707fccc5279b..58e5db8ebd5b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -572,7 +572,7 @@ cont:
 		}
 		if (ret <= 0) {
 			unsigned long clear_flags = EXTENT_DELALLOC |
-				EXTENT_DEFRAG;
+				EXTENT_DELALLOC_NEW | EXTENT_DEFRAG;
 			unsigned long page_error_op;
 
 			clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
@@ -879,6 +879,7 @@ out_free:
 				     async_extent->start +
 				     async_extent->ram_size - 1,
 				     NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
+				     EXTENT_DELALLOC_NEW |
 				     EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
 				     PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
 				     PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
@@ -974,6 +975,7 @@ static noinline int cow_file_range(struct inode *inode,
 			extent_clear_unlock_delalloc(inode, start, end,
 				     delalloc_end, NULL,
 				     EXTENT_LOCKED | EXTENT_DELALLOC |
+				     EXTENT_DELALLOC_NEW |
 				     EXTENT_DEFRAG, PAGE_UNLOCK |
 				     PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
 				     PAGE_END_WRITEBACK);
@@ -1086,8 +1088,8 @@ out_reserve:
 	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
 out_unlock:
-	clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DEFRAG |
-		EXTENT_CLEAR_META_RESV;
+	clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+		EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
 	page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
 		PAGE_END_WRITEBACK;
 	/*
@@ -1775,6 +1777,14 @@ static void btrfs_set_bit_hook(struct inode *inode,
 			btrfs_add_delalloc_inodes(root, inode);
 		spin_unlock(&BTRFS_I(inode)->lock);
 	}
+
+	if (!(state->state & EXTENT_DELALLOC_NEW) &&
+	    (*bits & EXTENT_DELALLOC_NEW)) {
+		spin_lock(&BTRFS_I(inode)->lock);
+		BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
+			state->start;
+		spin_unlock(&BTRFS_I(inode)->lock);
+	}
 }
 
 /*
@@ -1840,6 +1850,14 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 			btrfs_del_delalloc_inode(root, inode);
 		spin_unlock(&inode->lock);
 	}
+
+	if ((state->state & EXTENT_DELALLOC_NEW) &&
+	    (*bits & EXTENT_DELALLOC_NEW)) {
+		spin_lock(&inode->lock);
+		ASSERT(inode->new_delalloc_bytes >= len);
+		inode->new_delalloc_bytes -= len;
+		spin_unlock(&inode->lock);
+	}
 }
 
 /*
@@ -2872,6 +2890,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	u64 logical_len = ordered_extent->len;
 	bool nolock;
 	bool truncated = false;
+	bool range_locked = false;
+	bool clear_new_delalloc_bytes = false;
+
+	if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+	    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
+	    !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
+		clear_new_delalloc_bytes = true;
 
 	nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
 
@@ -2920,6 +2945,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		goto out;
 	}
 
+	range_locked = true;
 	lock_extent_bits(io_tree, ordered_extent->file_offset,
 			 ordered_extent->file_offset + ordered_extent->len - 1,
 			 &cached_state);
@@ -2945,7 +2971,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
 		trans = NULL;
-		goto out_unlock;
+		goto out;
 	}
 
 	trans->block_rsv = &fs_info->delalloc_block_rsv;
@@ -2977,7 +3003,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			   trans->transid);
 	if (ret < 0) {
 		btrfs_abort_transaction(trans, ret);
-		goto out_unlock;
+		goto out;
 	}
 
 	add_pending_csums(trans, inode, &ordered_extent->list);
@@ -2986,14 +3012,26 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	ret = btrfs_update_inode_fallback(trans, root, inode);
 	if (ret) { /* -ENOMEM or corruption */
 		btrfs_abort_transaction(trans, ret);
-		goto out_unlock;
+		goto out;
 	}
 	ret = 0;
-out_unlock:
-	unlock_extent_cached(io_tree, ordered_extent->file_offset,
-			     ordered_extent->file_offset +
-			     ordered_extent->len - 1, &cached_state, GFP_NOFS);
 out:
+	if (range_locked || clear_new_delalloc_bytes) {
+		unsigned int clear_bits = 0;
+
+		if (range_locked)
+			clear_bits |= EXTENT_LOCKED;
+		if (clear_new_delalloc_bytes)
+			clear_bits |= EXTENT_DELALLOC_NEW;
+		clear_extent_bit(&BTRFS_I(inode)->io_tree,
+				 ordered_extent->file_offset,
+				 ordered_extent->file_offset +
+				 ordered_extent->len - 1,
+				 clear_bits,
+				 (clear_bits & EXTENT_LOCKED) ? 1 : 0,
+				 0, &cached_state, GFP_NOFS);
+	}
+
 	if (root != fs_info->tree_root)
 		btrfs_delalloc_release_metadata(BTRFS_I(inode),
 				ordered_extent->len);
@@ -8906,6 +8944,7 @@ again:
 		if (!inode_evicting)
 			clear_extent_bit(tree, start, end,
 					 EXTENT_DIRTY | EXTENT_DELALLOC |
+					 EXTENT_DELALLOC_NEW |
 					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
 					 EXTENT_DEFRAG, 1, 0, &cached_state,
 					 GFP_NOFS);
@@ -8963,8 +9002,8 @@ again:
 	if (!inode_evicting) {
 		clear_extent_bit(tree, page_start, page_end,
 				 EXTENT_LOCKED | EXTENT_DIRTY |
-				 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
-				 EXTENT_DEFRAG, 1, 1,
+				 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
 				 &cached_state, GFP_NOFS);
 
 		__btrfs_releasepage(page, GFP_NOFS);
@@ -9335,6 +9374,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 	ei->last_sub_trans = 0;
 	ei->logged_trans = 0;
 	ei->delalloc_bytes = 0;
+	ei->new_delalloc_bytes = 0;
 	ei->defrag_bytes = 0;
 	ei->disk_i_size = 0;
 	ei->flags = 0;
@@ -9400,6 +9440,7 @@ void btrfs_destroy_inode(struct inode *inode)
 	WARN_ON(BTRFS_I(inode)->outstanding_extents);
 	WARN_ON(BTRFS_I(inode)->reserved_extents);
 	WARN_ON(BTRFS_I(inode)->delalloc_bytes);
+	WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
 	WARN_ON(BTRFS_I(inode)->csum_bytes);
 	WARN_ON(BTRFS_I(inode)->defrag_bytes);
 
@@ -9523,7 +9564,7 @@ static int btrfs_getattr(struct vfsmount *mnt,
 	stat->dev = BTRFS_I(inode)->root->anon_dev;
 
 	spin_lock(&BTRFS_I(inode)->lock);
-	delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
+	delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
 	spin_unlock(&BTRFS_I(inode)->lock);
 	stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
 			ALIGN(delalloc_bytes, blocksize)) >> 9;
-- 
cgit v1.2.3


From 54551d85ad48b5b5f5735b9b76c147096828b626 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 26 Apr 2017 12:21:49 -0400
Subject: NFS: Add a few more fatal I/O errors to nfs_error_is_fatal()

EACCES, EDQUOT, EFBIG and ESTALE are all fatal errors as far as NFS
I/O is concerned. They need to be reported back to the application.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7b38fedb7e03..31b26cf1b476 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -756,9 +756,13 @@ static inline bool nfs_error_is_fatal(int err)
 {
 	switch (err) {
 	case -ERESTARTSYS:
+	case -EACCES:
+	case -EDQUOT:
+	case -EFBIG:
 	case -EIO:
 	case -ENOSPC:
 	case -EROFS:
+	case -ESTALE:
 	case -E2BIG:
 		return true;
 	default:
-- 
cgit v1.2.3


From c373fff7bd252ec36e8a895c58a584088f1d38bc Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 26 Apr 2017 12:26:22 -0400
Subject: NFSv4: Don't special case "launder"

If the client receives a fatal server error from nfs_pageio_add_request(),
then we should always truncate the page on which the error occurred.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c          |  2 +-
 fs/nfs/write.c         | 27 +++++++++++----------------
 include/linux/nfs_fs.h | 14 +-------------
 3 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index bebed885b6e4..5713eb32a45e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -482,7 +482,7 @@ static int nfs_launder_page(struct page *page)
 		inode->i_ino, (long long)page_offset(page));
 
 	nfs_fscache_wait_on_page_write(nfsi, page);
-	return nfs_wb_launder_page(inode, page);
+	return nfs_wb_page(inode, page);
 }
 
 static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2e654940478f..59e21cc0a266 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -586,8 +586,7 @@ nfs_error_is_fatal_on_server(int err)
  * May return an error if the user signalled nfs_wait_on_request().
  */
 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
-				struct page *page, bool nonblock,
-				bool launder)
+				struct page *page, bool nonblock)
 {
 	struct nfs_page *req;
 	int ret = 0;
@@ -610,13 +609,11 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 	if (!nfs_pageio_add_request(pgio, req)) {
 		ret = pgio->pg_error;
 		/*
-		 * Remove the problematic req upon fatal errors
-		 * in launder case, while other dirty pages can
-		 * still be around until they get flushed.
+		 * Remove the problematic req upon fatal errors on the server
 		 */
 		if (nfs_error_is_fatal(ret)) {
 			nfs_context_set_write_error(req->wb_context, ret);
-			if (launder)
+			if (nfs_error_is_fatal_on_server(ret))
 				goto out_launder;
 		}
 		nfs_redirty_request(req);
@@ -632,13 +629,12 @@ out_launder:
 }
 
 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
-			    struct nfs_pageio_descriptor *pgio, bool launder)
+			    struct nfs_pageio_descriptor *pgio)
 {
 	int ret;
 
 	nfs_pageio_cond_complete(pgio, page_index(page));
-	ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
-				   launder);
+	ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
 	if (ret == -EAGAIN) {
 		redirty_page_for_writepage(wbc, page);
 		ret = 0;
@@ -650,8 +646,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
  * Write an mmapped page to the server.
  */
 static int nfs_writepage_locked(struct page *page,
-				struct writeback_control *wbc,
-				bool launder)
+				struct writeback_control *wbc)
 {
 	struct nfs_pageio_descriptor pgio;
 	struct inode *inode = page_file_mapping(page)->host;
@@ -660,7 +655,7 @@ static int nfs_writepage_locked(struct page *page,
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
 	nfs_pageio_init_write(&pgio, inode, 0,
 				false, &nfs_async_write_completion_ops);
-	err = nfs_do_writepage(page, wbc, &pgio, launder);
+	err = nfs_do_writepage(page, wbc, &pgio);
 	nfs_pageio_complete(&pgio);
 	if (err < 0)
 		return err;
@@ -673,7 +668,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	int ret;
 
-	ret = nfs_writepage_locked(page, wbc, false);
+	ret = nfs_writepage_locked(page, wbc);
 	unlock_page(page);
 	return ret;
 }
@@ -682,7 +677,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
 {
 	int ret;
 
-	ret = nfs_do_writepage(page, wbc, data, false);
+	ret = nfs_do_writepage(page, wbc, data);
 	unlock_page(page);
 	return ret;
 }
@@ -2013,7 +2008,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 /*
  * Write back all requests on one page - we do this before reading it.
  */
-int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
+int nfs_wb_page(struct inode *inode, struct page *page)
 {
 	loff_t range_start = page_file_offset(page);
 	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
@@ -2030,7 +2025,7 @@ int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
 	for (;;) {
 		wait_on_page_writeback(page);
 		if (clear_page_dirty_for_io(page)) {
-			ret = nfs_writepage_locked(page, &wbc, launder);
+			ret = nfs_writepage_locked(page, &wbc);
 			if (ret < 0)
 				goto out_error;
 			continue;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 9aa044e76820..bb0eb2c9acca 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -500,24 +500,12 @@ extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
  */
 extern int nfs_sync_inode(struct inode *inode);
 extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder);
+extern int nfs_wb_page(struct inode *inode, struct page *page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
 extern void nfs_commit_free(struct nfs_commit_data *data);
 
-static inline int
-nfs_wb_launder_page(struct inode *inode, struct page *page)
-{
-	return nfs_wb_single_page(inode, page, true);
-}
-
-static inline int
-nfs_wb_page(struct inode *inode, struct page *page)
-{
-	return nfs_wb_single_page(inode, page, false);
-}
-
 static inline int
 nfs_have_writebacks(struct inode *inode)
 {
-- 
cgit v1.2.3


From c7e88067c1ae89e7bcbed070fb2c4e30bc39b51f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 18 Apr 2017 16:01:46 -0700
Subject: srcu: Exact tracking of srcu_data structures containing callbacks

The current Tree SRCU implementation schedules a workqueue for every
srcu_data covered by a given leaf srcu_node structure having callbacks,
even if only one of those srcu_data structures actually contains
callbacks.  This is clearly inefficient for workloads that don't feature
callbacks everywhere all the time.  This commit therefore adds an array
of masks that are used by the leaf srcu_node structures to track exactly
which srcu_data structures contain callbacks.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Mike Galbraith <efault@gmx.de>
---
 include/linux/srcutree.h |  4 ++++
 kernel/rcu/srcutree.c    | 29 +++++++++++++++++++++++------
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 0400e211aa44..94515ff226fb 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -47,6 +47,8 @@ struct srcu_data {
 	struct delayed_work work;		/* Context for CB invoking. */
 	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
 	struct srcu_node *mynode;		/* Leaf srcu_node. */
+	unsigned long grpmask;			/* Mask for leaf srcu_node */
+						/*  ->srcu_data_have_cbs[]. */
 	int cpu;
 	struct srcu_struct *sp;
 };
@@ -59,6 +61,8 @@ struct srcu_node {
 	unsigned long srcu_have_cbs[4];		/* GP seq for children */
 						/*  having CBs, but only */
 						/*  is > ->srcu_gq_seq. */
+	unsigned long srcu_data_have_cbs[4];	/* Which srcu_data structs */
+						/*  have CBs for given GP? */
 	struct srcu_node *srcu_parent;		/* Next up in tree. */
 	int grplo;				/* Least CPU for node. */
 	int grphi;				/* Biggest CPU for node. */
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 9ecf0acc18eb..1c2c1004b3b1 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -66,8 +66,12 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 	/* Each pass through this loop initializes one srcu_node structure. */
 	rcu_for_each_node_breadth_first(sp, snp) {
 		spin_lock_init(&snp->lock);
-		for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++)
+		WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
+			     ARRAY_SIZE(snp->srcu_data_have_cbs));
+		for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
 			snp->srcu_have_cbs[i] = 0;
+			snp->srcu_data_have_cbs[i] = 0;
+		}
 		snp->grplo = -1;
 		snp->grphi = -1;
 		if (snp == &sp->node[0]) {
@@ -107,6 +111,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 		sdp->cpu = cpu;
 		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
 		sdp->sp = sp;
+		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
 		if (is_static)
 			continue;
 
@@ -434,16 +439,21 @@ static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
 
 /*
  * Schedule callback invocation for all srcu_data structures associated
- * with the specified srcu_node structure, if possible, on the corresponding
- * CPUs.
+ * with the specified srcu_node structure that have callbacks for the
+ * just-completed grace period, the one corresponding to idx.  If possible,
+ * schedule this invocation on the corresponding CPUs.
  */
-static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp)
+static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp,
+				  unsigned long mask)
 {
 	int cpu;
 
-	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++)
+	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
+		if (!(mask & (1 << (cpu - snp->grplo))))
+			continue;
 		srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu),
 				      atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL);
+	}
 }
 
 /*
@@ -461,6 +471,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	unsigned long gpseq;
 	int idx;
 	int idxnext;
+	unsigned long mask;
 	struct srcu_node *snp;
 
 	/* Prevent more than one additional grace period. */
@@ -486,10 +497,12 @@ static void srcu_gp_end(struct srcu_struct *sp)
 			cbs = snp->srcu_have_cbs[idx] == gpseq;
 		snp->srcu_have_cbs[idx] = gpseq;
 		rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
+		mask = snp->srcu_data_have_cbs[idx];
+		snp->srcu_data_have_cbs[idx] = 0;
 		spin_unlock_irq(&snp->lock);
 		if (cbs) {
 			smp_mb(); /* GP end before CB invocation. */
-			srcu_schedule_cbs_snp(sp, snp);
+			srcu_schedule_cbs_snp(sp, snp, mask);
 		}
 	}
 
@@ -536,6 +549,8 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp,
 		spin_lock_irqsave(&snp->lock, flags);
 		if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
 			snp_seq = snp->srcu_have_cbs[idx];
+			if (snp == sdp->mynode && snp_seq == s)
+				snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
 			spin_unlock_irqrestore(&snp->lock, flags);
 			if (snp == sdp->mynode && snp_seq != s) {
 				smp_mb(); /* CBs after GP! */
@@ -544,6 +559,8 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp,
 			return;
 		}
 		snp->srcu_have_cbs[idx] = s;
+		if (snp == sdp->mynode)
+			snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
 		spin_unlock_irqrestore(&snp->lock, flags);
 	}
 
-- 
cgit v1.2.3


From 7f6733c3c648ddd6cf459c1b80ad388a95452955 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 18 Apr 2017 17:17:35 -0700
Subject: srcu: Make rcutorture writer stalls print SRCU GP state

In the past, SRCU was simple enough that there was little point in
making the rcutorture writer stall messages print the SRCU grace-period
number state.  With the advent of Tree SRCU, this has changed.  This
commit therefore makes Classic, Tiny, and Tree SRCU report this state
to rcutorture as needed.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Mike Galbraith <efault@gmx.de>
---
 include/linux/srcuclassic.h | 14 ++++++++++++++
 include/linux/srcutiny.h    | 12 ++++++++++++
 include/linux/srcutree.h    |  4 ++++
 kernel/rcu/rcutorture.c     |  8 +++++---
 kernel/rcu/srcutree.c       | 13 +++++++++++++
 kernel/rcu/tree.c           | 12 ++++--------
 6 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h
index 41cf99930f34..5753f7322262 100644
--- a/include/linux/srcuclassic.h
+++ b/include/linux/srcuclassic.h
@@ -98,4 +98,18 @@ void synchronize_srcu_expedited(struct srcu_struct *sp);
 void srcu_barrier(struct srcu_struct *sp);
 unsigned long srcu_batches_completed(struct srcu_struct *sp);
 
+static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
+					   struct srcu_struct *sp, int *flags,
+					   unsigned long *gpnum,
+					   unsigned long *completed)
+{
+	if (test_type != SRCU_FLAVOR)
+		return;
+	*flags = 0;
+	*completed = sp->completed;
+	*gpnum = *completed;
+	if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head)
+		(*gpnum)++;
+}
+
 #endif
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 4f284e4f4d8c..42311ee0334f 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -78,4 +78,16 @@ static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
 	return 0;
 }
 
+static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
+					   struct srcu_struct *sp, int *flags,
+					   unsigned long *gpnum,
+					   unsigned long *completed)
+{
+	if (test_type != SRCU_FLAVOR)
+		return;
+	*flags = 0;
+	*completed = sp->srcu_gp_seq;
+	*gpnum = *completed;
+}
+
 #endif
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 94515ff226fb..3865717df124 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -140,4 +140,8 @@ void synchronize_srcu_expedited(struct srcu_struct *sp);
 void srcu_barrier(struct srcu_struct *sp);
 unsigned long srcu_batches_completed(struct srcu_struct *sp);
 
+void srcutorture_get_gp_data(enum rcutorture_type test_type,
+			     struct srcu_struct *sp, int *flags,
+			     unsigned long *gpnum, unsigned long *completed);
+
 #endif
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index e9d4527cdd43..ae6e574d4cf5 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1360,12 +1360,14 @@ rcu_torture_stats_print(void)
 		cur_ops->stats();
 	if (rtcv_snap == rcu_torture_current_version &&
 	    rcu_torture_current != NULL) {
-		int __maybe_unused flags;
-		unsigned long __maybe_unused gpnum;
-		unsigned long __maybe_unused completed;
+		int __maybe_unused flags = 0;
+		unsigned long __maybe_unused gpnum = 0;
+		unsigned long __maybe_unused completed = 0;
 
 		rcutorture_get_gp_data(cur_ops->ttype,
 				       &flags, &gpnum, &completed);
+		srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
+					&flags, &gpnum, &completed);
 		wtp = READ_ONCE(writer_task);
 		pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n",
 			 rcu_torture_writer_state_getname(),
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 1c2c1004b3b1..72b6cce5f591 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -1011,3 +1011,16 @@ void process_srcu(struct work_struct *work)
 	srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL);
 }
 EXPORT_SYMBOL_GPL(process_srcu);
+
+void srcutorture_get_gp_data(enum rcutorture_type test_type,
+					   struct srcu_struct *sp, int *flags,
+					   unsigned long *gpnum,
+					   unsigned long *completed)
+{
+	if (test_type != SRCU_FLAVOR)
+		return;
+	*flags = 0;
+	*completed = rcu_seq_ctr(sp->srcu_gp_seq);
+	*gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed);
+}
+EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 23aa02587d0f..91fff49d5869 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -704,15 +704,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
 	default:
 		break;
 	}
-	if (rsp != NULL) {
-		*flags = READ_ONCE(rsp->gp_flags);
-		*gpnum = READ_ONCE(rsp->gpnum);
-		*completed = READ_ONCE(rsp->completed);
+	if (rsp == NULL)
 		return;
-	}
-	*flags = 0;
-	*gpnum = 0;
-	*completed = 0;
+	*flags = READ_ONCE(rsp->gp_flags);
+	*gpnum = READ_ONCE(rsp->gpnum);
+	*completed = READ_ONCE(rsp->completed);
 }
 EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
 
-- 
cgit v1.2.3


From 7de96c3e759e49b1d176722ed26c14c057bac34f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 26 Apr 2017 15:27:52 -0300
Subject: perf tools: Move HAS_BOOL define to where perl headers are used

This is a perl specific hack, so move it from util.h to where perl
headers are used.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-4igctbinuom2sr6g4b03jqht@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/scripting-engines/trace-event-perl.c | 4 +++-
 tools/perf/util/util.h                               | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 2b12bdb3ce33..7b79c413486b 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -28,7 +28,9 @@
 #include <linux/bitmap.h>
 #include <linux/time64.h>
 
-#include "../util.h"
+#include <stdbool.h>
+/* perl needs the following define, right after including stdbool.h */
+#define HAS_BOOL
 #include <EXTERN.h>
 #include <perl.h>
 
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 6855c454e5bc..622c2d251ad3 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -5,7 +5,6 @@
 #define _BSD_SOURCE 1
 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
 #define _DEFAULT_SOURCE 1
-#define HAS_BOOL
 
 #include <fcntl.h>
 #include <stdbool.h>
-- 
cgit v1.2.3


From 5068b52f732157385d9dccd205bc4043da3a3cce Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 26 Apr 2017 15:31:57 -0300
Subject: perf ui gtk: Move gtk .so name to the only place where it is used

No need to pollute util.h with this.

Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-kec0chbdtgrd71o3oi2kz2zt@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/setup.c  | 3 +++
 tools/perf/util/util.h | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c
index 5ea0b40c4fc2..caf1ce6f5152 100644
--- a/tools/perf/ui/setup.c
+++ b/tools/perf/ui/setup.c
@@ -10,7 +10,10 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
 void *perf_gtk_handle;
 int use_browser = -1;
 
+#define PERF_GTK_DSO "libperf-gtk.so"
+
 #ifdef HAVE_GTK2_SUPPORT
+
 static int setup_gtk_browser(void)
 {
 	int (*perf_ui_init)(void);
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 622c2d251ad3..dabdc810ffdc 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -22,8 +22,6 @@
 #endif
 #endif
 
-#define PERF_GTK_DSO  "libperf-gtk.so"
-
 /* General helper functions */
 void usage(const char *err) NORETURN;
 void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
-- 
cgit v1.2.3


From 3caeafce5392a8eba7b36d0d097d403cacc66e2d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 26 Apr 2017 15:40:31 -0300
Subject: perf units: Move parse_tag_value() to units.[ch]

Its basically to do units handling, so move to a more appropriately
named object.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-90ob9vfepui24l8l2makhd9u@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/units.c | 29 +++++++++++++++++++++++++++++
 tools/perf/util/units.h |  7 +++++++
 tools/perf/util/util.c  | 27 ---------------------------
 tools/perf/util/util.h  |  7 -------
 4 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
index f6a2a3d117d5..4767ec2c5ef6 100644
--- a/tools/perf/util/units.c
+++ b/tools/perf/util/units.c
@@ -1,8 +1,37 @@
 #include "units.h"
 #include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
 #include <linux/kernel.h>
 #include <linux/time64.h>
 
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
+{
+	struct parse_tag *i = tags;
+
+	while (i->tag) {
+		char *s = strchr(str, i->tag);
+
+		if (s) {
+			unsigned long int value;
+			char *endptr;
+
+			value = strtoul(str, &endptr, 10);
+			if (s != endptr)
+				break;
+
+			if (value > ULONG_MAX / i->mult)
+				break;
+			value *= i->mult;
+			return value;
+		}
+		i++;
+	}
+
+	return (unsigned long) -1;
+}
+
 unsigned long convert_unit(unsigned long value, char *unit)
 {
 	*unit = ' ';
diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h
index 3ed7774afaa9..f02c87317150 100644
--- a/tools/perf/util/units.h
+++ b/tools/perf/util/units.h
@@ -4,6 +4,13 @@
 #include <stddef.h>
 #include <linux/types.h>
 
+struct parse_tag {
+	char tag;
+	int  mult;
+};
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
+
 unsigned long convert_unit(unsigned long value, char *unit);
 int unit_number__scnprintf(char *buf, size_t size, u64 n);
 
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index b460f0db84d1..28c9f335006c 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -334,33 +334,6 @@ int hex2u64(const char *ptr, u64 *long_val)
 	return p - ptr;
 }
 
-unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
-{
-	struct parse_tag *i = tags;
-
-	while (i->tag) {
-		char *s;
-
-		s = strchr(str, i->tag);
-		if (s) {
-			unsigned long int value;
-			char *endptr;
-
-			value = strtoul(str, &endptr, 10);
-			if (s != endptr)
-				break;
-
-			if (value > ULONG_MAX / i->mult)
-				break;
-			value *= i->mult;
-			return value;
-		}
-		i++;
-	}
-
-	return (unsigned long) -1;
-}
-
 int perf_event_paranoid(void)
 {
 	int value;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index dabdc810ffdc..d620719775a8 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -59,13 +59,6 @@ int hex2u64(const char *ptr, u64 *val);
 extern unsigned int page_size;
 extern int cacheline_size;
 
-struct parse_tag {
-	char tag;
-	int mult;
-};
-
-unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
-
 bool find_process(const char *name);
 
 int fetch_kernel_version(unsigned int *puint,
-- 
cgit v1.2.3


From 96395cbbc7e94cbbe4f76cf64cf122fabc19123d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 26 Apr 2017 15:49:21 -0300
Subject: tools lib string: Adopt prefixcmp() from perf and subcmd

Both had copies originating from git.git, move those to
tools/lib/string.c, getting both tools/lib/subcmd/ and tools/perf/ to
use it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-uidwtticro1qhttzd2rkrkg1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/linux/string.h     | 2 ++
 tools/lib/string.c               | 9 +++++++++
 tools/lib/subcmd/help.c          | 1 +
 tools/lib/subcmd/parse-options.c | 1 +
 tools/lib/subcmd/subcmd-util.h   | 9 ---------
 tools/perf/util/strbuf.c         | 9 ---------
 tools/perf/util/util.h           | 2 --
 7 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index f436d2420a18..d62b56cf8c12 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -18,4 +18,6 @@ extern size_t strlcpy(char *dest, const char *src, size_t size);
 
 char *str_error_r(int errnum, char *buf, size_t buflen);
 
+int prefixcmp(const char *str, const char *prefix);
+
 #endif /* _LINUX_STRING_H_ */
diff --git a/tools/lib/string.c b/tools/lib/string.c
index bd239bc1d557..8e678af1c6ee 100644
--- a/tools/lib/string.c
+++ b/tools/lib/string.c
@@ -87,3 +87,12 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size)
 	}
 	return ret;
 }
+
+int prefixcmp(const char *str, const char *prefix)
+{
+	for (; ; str++, prefix++)
+		if (!*prefix)
+			return 0;
+		else if (*str != *prefix)
+			return (unsigned char)*prefix - (unsigned char)*str;
+}
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index e228c3cb3716..ba970a73d053 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <linux/string.h>
 #include <termios.h>
 #include <sys/ioctl.h>
 #include <sys/types.h>
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index 6bc24025d054..359bfa77f39c 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -1,4 +1,5 @@
 #include <linux/compiler.h>
+#include <linux/string.h>
 #include <linux/types.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
index fc2e45d8aaf1..8fa5f036eff0 100644
--- a/tools/lib/subcmd/subcmd-util.h
+++ b/tools/lib/subcmd/subcmd-util.h
@@ -79,13 +79,4 @@ static inline void astrcat(char **out, const char *add)
 	free(tmp);
 }
 
-static inline int prefixcmp(const char *str, const char *prefix)
-{
-	for (; ; str++, prefix++)
-		if (!*prefix)
-			return 0;
-		else if (*str != *prefix)
-			return (unsigned char)*prefix - (unsigned char)*str;
-}
-
 #endif /* __SUBCMD_UTIL_H */
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index e91b5e86f027..aafe908b82b5 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -3,15 +3,6 @@
 #include <linux/kernel.h>
 #include <errno.h>
 
-int prefixcmp(const char *str, const char *prefix)
-{
-	for (; ; str++, prefix++)
-		if (!*prefix)
-			return 0;
-		else if (*str != *prefix)
-			return (unsigned char)*prefix - (unsigned char)*str;
-}
-
 /*
  * Used as the default ->buf value, so that people can always assume
  * buf is non NULL and ->buf is NUL terminated even for a freshly
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index d620719775a8..5dfb9bb6482d 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -30,8 +30,6 @@ void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
 
 void set_warning_routine(void (*routine)(const char *err, va_list params));
 
-int prefixcmp(const char *str, const char *prefix);
-
 static inline void *zalloc(size_t size)
 {
 	return calloc(1, size);
-- 
cgit v1.2.3


From bfd20f1cc85010d2f2d77e544da05cd8c149ba9b Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 26 Apr 2017 09:18:35 -0700
Subject: x86, iommu/vt-d: Add an option to disable Intel IOMMU force on

IOMMU harms performance signficantly when we run very fast networking
workloads. It's 40GB networking doing XDP test. Software overhead is
almost unaware, but it's the IOTLB miss (based on our analysis) which
kills the performance. We observed the same performance issue even with
software passthrough (identity mapping), only the hardware passthrough
survives. The pps with iommu (with software passthrough) is only about
~30% of that without it. This is a limitation in hardware based on our
observation, so we'd like to disable the IOMMU force on, but we do want
to use TBOOT and we can sacrifice the DMA security bought by IOMMU. I
must admit I know nothing about TBOOT, but TBOOT guys (cc-ed) think not
eabling IOMMU is totally ok.

So introduce a new boot option to disable the force on. It's kind of
silly we need to run into intel_iommu_init even without force on, but we
need to disable TBOOT PMR registers. For system without the boot option,
nothing is changed.

Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 Documentation/admin-guide/kernel-parameters.txt |  9 +++++++++
 arch/x86/kernel/tboot.c                         |  3 +++
 drivers/iommu/intel-iommu.c                     | 18 ++++++++++++++++++
 include/linux/dma_remapping.h                   |  1 +
 4 files changed, 31 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2ba45caabada..17135bfade6a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1578,6 +1578,15 @@
 			extended tables themselves, and also PASID support. With
 			this option set, extended tables will not be used even
 			on hardware which claims to support them.
+		tboot_noforce [Default Off]
+			Do not force the Intel IOMMU enabled under tboot.
+			By default, tboot will force Intel IOMMU on, which
+			could harm performance of some high-throughput
+			devices like 40GBit network cards, even if identity
+			mapping is enabled.
+			Note that using this option lowers the security
+			provided by tboot because it makes the system
+			vulnerable to DMA attacks.
 
 	intel_idle.max_cstate=	[KNL,HW,ACPI,X86]
 			0	disables intel_idle and fall back on acpi_idle.
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index b868fa1b812b..edbdfe6ab60a 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -510,6 +510,9 @@ int tboot_force_iommu(void)
 	if (!tboot_enabled())
 		return 0;
 
+	if (!intel_iommu_tboot_noforce)
+		return 1;
+
 	if (no_iommu || swiotlb || dmar_disabled)
 		pr_warning("Forcing Intel-IOMMU to enabled\n");
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 5f08ba13972b..b0ced1c13713 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -183,6 +183,7 @@ static int rwbf_quirk;
  * (used when kernel is launched w/ TXT)
  */
 static int force_on = 0;
+int intel_iommu_tboot_noforce;
 
 /*
  * 0: Present
@@ -607,6 +608,10 @@ static int __init intel_iommu_setup(char *str)
 				"Intel-IOMMU: enable pre-production PASID support\n");
 			intel_iommu_pasid28 = 1;
 			iommu_identity_mapping |= IDENTMAP_GFX;
+		} else if (!strncmp(str, "tboot_noforce", 13)) {
+			printk(KERN_INFO
+				"Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
+			intel_iommu_tboot_noforce = 1;
 		}
 
 		str += strcspn(str, ",");
@@ -4850,6 +4855,19 @@ int __init intel_iommu_init(void)
 	}
 
 	if (no_iommu || dmar_disabled) {
+		/*
+		 * We exit the function here to ensure IOMMU's remapping and
+		 * mempool aren't setup, which means that the IOMMU's PMRs
+		 * won't be disabled via the call to init_dmars(). So disable
+		 * it explicitly here. The PMRs were setup by tboot prior to
+		 * calling SENTER, but the kernel is expected to reset/tear
+		 * down the PMRs.
+		 */
+		if (intel_iommu_tboot_noforce) {
+			for_each_iommu(iommu, drhd)
+				iommu_disable_protect_mem_regions(iommu);
+		}
+
 		/*
 		 * Make sure the IOMMUs are switched off, even when we
 		 * boot into a kexec kernel and the previous kernel left
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 187c10299722..90884072fa73 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -39,6 +39,7 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu);
 extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
 extern int dmar_disabled;
 extern int intel_iommu_enabled;
+extern int intel_iommu_tboot_noforce;
 #else
 static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
 {
-- 
cgit v1.2.3


From b7ecf663c75eed1e764f57281f9508c49c18516e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 21 Apr 2017 12:47:40 +0200
Subject: ACPI / bus: Introduce a list of ids for "always present" devices

Several Bay / Cherry Trail devices (all of which ship with Windows 10) hide
the LPSS PWM controller in ACPI, typically the _STA method looks like this:

    Method (_STA, 0, NotSerialized)  // _STA: Status
    {
        If (OSID == One)
        {
            Return (Zero)
        }

        Return (0x0F)
    }

Where OSID is some dark magic seen in all Cherry Trail ACPI tables making
the machine behave differently depending on which OS it *thinks* it is
booting, this gets set in a number of ways which we cannot control, on
some newer machines it simple hardcoded to "One" aka win10.

This causes the PWM controller to get hidden, which means Linux cannot
control the backlight level on cht based tablets / laptops.

Since loading the driver for this does no harm (the only in kernel user
of it is the i915 driver, which will only uses it when it needs it), this
commit makes acpi_bus_get_status() always set status to ACPI_STA_DEFAULT
for the LPSS PWM device, fixing the lack of backlight control.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
[ rjw: Rename the new file to utils.c ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/Makefile    |  1 +
 drivers/acpi/bus.c       |  5 +++
 drivers/acpi/x86/utils.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/acpi/acpi_bus.h  |  9 +++++
 4 files changed, 100 insertions(+)
 create mode 100644 drivers/acpi/x86/utils.c

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index d94f92f88ca1..2a81d278bcf3 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -50,6 +50,7 @@ acpi-$(CONFIG_ACPI_REDUCED_HARDWARE_ONLY) += evged.o
 acpi-y				+= sysfs.o
 acpi-y				+= property.o
 acpi-$(CONFIG_X86)		+= acpi_cmos_rtc.o
+acpi-$(CONFIG_X86)		+= x86/utils.o
 acpi-$(CONFIG_DEBUG_FS)		+= debugfs.o
 acpi-$(CONFIG_ACPI_NUMA)	+= numa.o
 acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 34fbe027e73a..784bda663d16 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -114,6 +114,11 @@ int acpi_bus_get_status(struct acpi_device *device)
 	acpi_status status;
 	unsigned long long sta;
 
+	if (acpi_device_always_present(device)) {
+		acpi_set_device_status(device, ACPI_STA_DEFAULT);
+		return 0;
+	}
+
 	status = acpi_bus_get_status_handle(device->handle, &sta);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
new file mode 100644
index 000000000000..c8e90ef4485f
--- /dev/null
+++ b/drivers/acpi/x86/utils.c
@@ -0,0 +1,85 @@
+/*
+ * X86 ACPI Utility Functions
+ *
+ * Copyright (C) 2017 Hans de Goede <hdegoede@redhat.com>
+ *
+ * Based on various non upstream patches to support the CHT Whiskey Cove PMIC:
+ * Copyright (C) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include "../internal.h"
+
+/*
+ * Some ACPI devices are hidden (status == 0x0) in recent BIOS-es because
+ * some recent Windows drivers bind to one device but poke at multiple
+ * devices at the same time, so the others get hidden.
+ * We work around this by always reporting ACPI_STA_DEFAULT for these
+ * devices. Note this MUST only be done for devices where this is safe.
+ *
+ * This forcing of devices to be present is limited to specific CPU (SoC)
+ * models both to avoid potentially causing trouble on other models and
+ * because some HIDs are re-used on different SoCs for completely
+ * different devices.
+ */
+struct always_present_id {
+	struct acpi_device_id hid[2];
+	struct x86_cpu_id cpu_ids[2];
+	const char *uid;
+};
+
+#define ICPU(model)	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
+
+#define ENTRY(hid, uid, cpu_models) {					\
+	{ { hid, }, {} },						\
+	{ cpu_models, {} },						\
+	uid,								\
+}
+
+static const struct always_present_id always_present_ids[] = {
+	/*
+	 * Bay / Cherry Trail PWM directly poked by GPU driver in win10,
+	 * but Linux uses a separate PWM driver, harmless if not used.
+	 */
+	ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT1)),
+	ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT)),
+};
+
+bool acpi_device_always_present(struct acpi_device *adev)
+{
+	u32 *status = (u32 *)&adev->status;
+	u32 old_status = *status;
+	bool ret = false;
+	unsigned int i;
+
+	/* acpi_match_device_ids checks status, so set it to default */
+	*status = ACPI_STA_DEFAULT;
+	for (i = 0; i < ARRAY_SIZE(always_present_ids); i++) {
+		if (acpi_match_device_ids(adev, always_present_ids[i].hid))
+			continue;
+
+		if (!adev->pnp.unique_id ||
+		    strcmp(adev->pnp.unique_id, always_present_ids[i].uid))
+			continue;
+
+		if (!x86_match_cpu(always_present_ids[i].cpu_ids))
+			continue;
+
+		if (old_status != ACPI_STA_DEFAULT) /* Log only once */
+			dev_info(&adev->dev,
+				 "Device [%s] is in always present list\n",
+				 adev->pnp.bus_id);
+
+		ret = true;
+		break;
+	}
+	*status = old_status;
+
+	return ret;
+}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index ef0ae8aaa567..b86b90489140 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -586,6 +586,15 @@ struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
 int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
 int acpi_disable_wakeup_device_power(struct acpi_device *dev);
 
+#ifdef CONFIG_X86
+bool acpi_device_always_present(struct acpi_device *adev);
+#else
+static inline bool acpi_device_always_present(struct acpi_device *adev)
+{
+	return false;
+}
+#endif
+
 #ifdef CONFIG_PM
 acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev,
 				 void (*work_func)(struct work_struct *work));
-- 
cgit v1.2.3


From 753a448c27a0238dc1db58cd6954e8ef511a97e1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 21 Apr 2017 12:47:41 +0200
Subject: ACPI / bus: Add INT0002 to list of always-present devices

The INT0002 device is necessary to clear wakeup interrupt sources
on Cherry Trail devices, without it we get nobody cared IRQ msgs
and some systems don't properly resume at all without it.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/utils.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index c8e90ef4485f..bd86b809c848 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -49,6 +49,11 @@ static const struct always_present_id always_present_ids[] = {
 	 */
 	ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT1)),
 	ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT)),
+	/*
+	 * The INT0002 device is necessary to clear wakeup interrupt sources
+	 * on Cherry Trail devices, without it we get nobody cared IRQ msgs.
+	 */
+	ENTRY("INT0002", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT)),
 };
 
 bool acpi_device_always_present(struct acpi_device *adev)
-- 
cgit v1.2.3


From 6e14cf361a0cf3bbe0026557427a7eb2f64d3157 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <hanjun.guo@linaro.org>
Date: Sat, 22 Apr 2017 11:23:43 +0800
Subject: ACPI / APD: Add clock frequency for Hisilicon Hip07/08 I2C controller

I2C clock frequency of Designware ip for Hisilicon Hip07 is 200M,
but 250M for Hip08, use two ACPI IDs to differentiate them.

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_apd.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index 26696b693e63..8f57648f318b 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -106,6 +106,16 @@ static const struct apd_device_desc vulcan_spi_desc = {
 	.setup = acpi_apd_setup,
 	.fixed_clk_rate = 133000000,
 };
+
+static const struct apd_device_desc hip07_i2c_desc = {
+	.setup = acpi_apd_setup,
+	.fixed_clk_rate = 200000000,
+};
+
+static const struct apd_device_desc hip08_i2c_desc = {
+	.setup = acpi_apd_setup,
+	.fixed_clk_rate = 250000000,
+};
 #endif
 
 #else
@@ -169,6 +179,8 @@ static const struct acpi_device_id acpi_apd_device_ids[] = {
 #ifdef CONFIG_ARM64
 	{ "APMC0D0F", APD_ADDR(xgene_i2c_desc) },
 	{ "BRCM900D", APD_ADDR(vulcan_spi_desc) },
+	{ "HISI0A21", APD_ADDR(hip07_i2c_desc) },
+	{ "HISI0A22", APD_ADDR(hip08_i2c_desc) },
 #endif
 	{ }
 };
-- 
cgit v1.2.3


From 58dd8abfad028633d36e2ba81109bd62f6e6e215 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <hanjun.guo@linaro.org>
Date: Sat, 22 Apr 2017 11:23:44 +0800
Subject: i2c: designware: Add ACPI HID for Hisilicon Hip07/08 I2C controller

Add ACPI HID HISI02A1 and HISI02A2 for Hisilicon Hip07/08,
which have different clock frequency.

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/i2c/busses/i2c-designware-platdrv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 79c4b4ea0539..90c7c101b7fd 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -129,6 +129,8 @@ static const struct acpi_device_id dw_i2c_acpi_match[] = {
 	{ "AMDI0010", ACCESS_INTR_MASK },
 	{ "AMDI0510", 0 },
 	{ "APMC0D0F", 0 },
+	{ "HISI02A1", 0 },
+	{ "HISI02A2", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(acpi, dw_i2c_acpi_match);
-- 
cgit v1.2.3


From da28e1955d7fc228870f437693cfae8ce486ad9f Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 26 Apr 2017 16:17:43 +0800
Subject: ACPICA: Disassembler: Enhance resource descriptor detection

ACPICA commit ba5020b2dbe1538e4ccd7ac2dfd8843a690c007f

This change enhances the detection of resource descriptors
within a buffer object. For the end_tag opcode, the second byte
is defined to be either a checksum or zero. All known ASL compilers
insert a zero for this byte. The disassembler now ensures this
byte is zero before deciding that a buffer should be disassembled
to a resource descriptor. This helps eliminate incorrect decisions
when attempting to disassemble a buffer to a resource descriptor.

Link: https://github.com/acpica/acpica/commit/ba5020b2
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utresrc.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/acpi/acpica/utresrc.c b/drivers/acpi/acpica/utresrc.c
index ff096d9755b9..e0587c85bafd 100644
--- a/drivers/acpi/acpica/utresrc.c
+++ b/drivers/acpi/acpica/utresrc.c
@@ -474,6 +474,15 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state,
 				return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
 			}
 
+			/*
+			 * The end_tag opcode must be followed by a zero byte.
+			 * Although this byte is technically defined to be a checksum,
+			 * in practice, all ASL compilers set this byte to zero.
+			 */
+			if (*(aml + 1) != 0) {
+				return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
+			}
+
 			/* Return the pointer to the end_tag if requested */
 
 			if (!user_function) {
-- 
cgit v1.2.3


From a20286249e4120c7997353ff8980248302c653b5 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 26 Apr 2017 16:17:50 +0800
Subject: ACPICA: Update some function headers, no funtional change

ACPICA commit 57c1b2d3e2f9ff7f465b0f08bfb38294101fe0b3

utxferror, update function headers.

Link: https://github.com/acpica/acpica/commit/57c1b2d3
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utxferror.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/acpica/utxferror.c b/drivers/acpi/acpica/utxferror.c
index a16bd9eac653..950a1e500bfa 100644
--- a/drivers/acpi/acpica/utxferror.c
+++ b/drivers/acpi/acpica/utxferror.c
@@ -91,7 +91,7 @@ ACPI_EXPORT_SYMBOL(acpi_error)
  *
  * PARAMETERS:  module_name         - Caller's module name (for error output)
  *              line_number         - Caller's line number (for error output)
- *              status              - Status to be formatted
+ *              status              - Status value to be decoded/formatted
  *              format              - Printf format string + additional args
  *
  * RETURN:      None
@@ -132,8 +132,8 @@ ACPI_EXPORT_SYMBOL(acpi_exception)
  *
  * FUNCTION:    acpi_warning
  *
- * PARAMETERS:  module_name         - Caller's module name (for error output)
- *              line_number         - Caller's line number (for error output)
+ * PARAMETERS:  module_name         - Caller's module name (for warning output)
+ *              line_number         - Caller's line number (for warning output)
  *              format              - Printf format string + additional args
  *
  * RETURN:      None
@@ -163,17 +163,13 @@ ACPI_EXPORT_SYMBOL(acpi_warning)
  *
  * FUNCTION:    acpi_info
  *
- * PARAMETERS:  module_name         - Caller's module name (for error output)
- *              line_number         - Caller's line number (for error output)
- *              format              - Printf format string + additional args
+ * PARAMETERS:  format              - Printf format string + additional args
  *
  * RETURN:      None
  *
  * DESCRIPTION: Print generic "ACPI:" information message. There is no
  *              module/line/version info in order to keep the message simple.
  *
- * TBD: module_name and line_number args are not needed, should be removed.
- *
  ******************************************************************************/
 void ACPI_INTERNAL_VAR_XFACE acpi_info(const char *format, ...)
 {
@@ -229,8 +225,8 @@ ACPI_EXPORT_SYMBOL(acpi_bios_error)
  *
  * FUNCTION:    acpi_bios_warning
  *
- * PARAMETERS:  module_name         - Caller's module name (for error output)
- *              line_number         - Caller's line number (for error output)
+ * PARAMETERS:  module_name         - Caller's module name (for warning output)
+ *              line_number         - Caller's line number (for warning output)
  *              format              - Printf format string + additional args
  *
  * RETURN:      None
-- 
cgit v1.2.3


From 1b8f77aa0deca22a995a4bdd65914aa24bda0386 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 26 Apr 2017 16:17:56 +0800
Subject: ACPICA: Fix a module for excessive debug output

ACPICA commit 5ecc479f62a57ab1e9d25ec3b0b84682fdf8a543

hwvalid.c - no trace needed for validate I/O function.

Link: https://github.com/acpica/acpica/commit/5ecc479f
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/hwvalid.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c
index 531620abed80..0b102e071802 100644
--- a/drivers/acpi/acpica/hwvalid.c
+++ b/drivers/acpi/acpica/hwvalid.c
@@ -102,7 +102,7 @@ static const struct acpi_port_info acpi_protected_ports[] = {
 	{"PCI", 0x0CF8, 0x0CFF, ACPI_OSI_WIN_XP}
 };
 
-#define ACPI_PORT_INFO_ENTRIES  ACPI_ARRAY_LENGTH (acpi_protected_ports)
+#define ACPI_PORT_INFO_ENTRIES      ACPI_ARRAY_LENGTH (acpi_protected_ports)
 
 /******************************************************************************
  *
@@ -128,14 +128,14 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 	acpi_io_address last_address;
 	const struct acpi_port_info *port_info;
 
-	ACPI_FUNCTION_TRACE(hw_validate_io_request);
+	ACPI_FUNCTION_NAME(hw_validate_io_request);
 
 	/* Supported widths are 8/16/32 */
 
 	if ((bit_width != 8) && (bit_width != 16) && (bit_width != 32)) {
 		ACPI_ERROR((AE_INFO,
 			    "Bad BitWidth parameter: %8.8X", bit_width));
-		return (AE_BAD_PARAMETER);
+		return_ACPI_STATUS(AE_BAD_PARAMETER);
 	}
 
 	port_info = acpi_protected_ports;
@@ -167,7 +167,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 	for (i = 0; i < ACPI_PORT_INFO_ENTRIES; i++, port_info++) {
 		/*
 		 * Check if the requested address range will write to a reserved
-		 * port. Four cases to consider:
+		 * port. There are four cases to consider:
 		 *
 		 * 1) Address range is contained completely in the port address range
 		 * 2) Address range overlaps port range at the port range start
@@ -206,7 +206,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
  * FUNCTION:    acpi_hw_read_port
  *
  * PARAMETERS:  Address             Address of I/O port/register to read
- *              Value               Where value is placed
+ *              Value               Where value (data) is returned
  *              Width               Number of bits
  *
  * RETURN:      Status and value read from port
@@ -244,7 +244,7 @@ acpi_status acpi_hw_read_port(acpi_io_address address, u32 *value, u32 width)
 	/*
 	 * There has been a protection violation within the request. Fall
 	 * back to byte granularity port I/O and ignore the failing bytes.
-	 * This provides Windows compatibility.
+	 * This provides compatibility with other ACPI implementations.
 	 */
 	for (i = 0, *value = 0; i < width; i += 8) {
 
@@ -307,7 +307,7 @@ acpi_status acpi_hw_write_port(acpi_io_address address, u32 value, u32 width)
 	/*
 	 * There has been a protection violation within the request. Fall
 	 * back to byte granularity port I/O and ignore the failing bytes.
-	 * This provides Windows compatibility.
+	 * This provides compatibility with other ACPI implementations.
 	 */
 	for (i = 0; i < width; i += 8) {
 
-- 
cgit v1.2.3


From 2e337c72a3c24f0cf9bf639dc93de0f887d56dba Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 26 Apr 2017 16:18:02 +0800
Subject: ACPICA: Fix several incorrect invocations of ACPICA return macro

ACPICA commit 521bedc49b42e59116de1b54dcd95d30d36cac90

Not needed since there is no function tracing for the
validation function in hwvalid.c

Link: https://github.com/acpica/acpica/commit/521bedc4
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/hwvalid.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c
index 0b102e071802..3094cec4eab4 100644
--- a/drivers/acpi/acpica/hwvalid.c
+++ b/drivers/acpi/acpica/hwvalid.c
@@ -135,7 +135,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 	if ((bit_width != 8) && (bit_width != 16) && (bit_width != 32)) {
 		ACPI_ERROR((AE_INFO,
 			    "Bad BitWidth parameter: %8.8X", bit_width));
-		return_ACPI_STATUS(AE_BAD_PARAMETER);
+		return (AE_BAD_PARAMETER);
 	}
 
 	port_info = acpi_protected_ports;
@@ -153,13 +153,13 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 		ACPI_ERROR((AE_INFO,
 			    "Illegal I/O port address/length above 64K: %8.8X%8.8X/0x%X",
 			    ACPI_FORMAT_UINT64(address), byte_width));
-		return_ACPI_STATUS(AE_LIMIT);
+		return (AE_LIMIT);
 	}
 
 	/* Exit if requested address is not within the protected port table */
 
 	if (address > acpi_protected_ports[ACPI_PORT_INFO_ENTRIES - 1].end) {
-		return_ACPI_STATUS(AE_OK);
+		return (AE_OK);
 	}
 
 	/* Check request against the list of protected I/O ports */
@@ -198,7 +198,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 		}
 	}
 
-	return_ACPI_STATUS(AE_OK);
+	return (AE_OK);
 }
 
 /******************************************************************************
-- 
cgit v1.2.3


From 3b2d69114fefa474fca542e51119036dceb4aa6f Mon Sep 17 00:00:00 2001
From: Seunghun Han <kkamagui@gmail.com>
Date: Wed, 26 Apr 2017 16:18:08 +0800
Subject: ACPICA: Namespace: fix operand cache leak

ACPICA commit a23325b2e583556eae88ed3f764e457786bf4df6

I found some ACPI operand cache leaks in ACPI early abort cases.

Boot log of ACPI operand cache leak is as follows:
>[    0.174332] ACPI: Added _OSI(Module Device)
>[    0.175504] ACPI: Added _OSI(Processor Device)
>[    0.176010] ACPI: Added _OSI(3.0 _SCP Extensions)
>[    0.177032] ACPI: Added _OSI(Processor Aggregator Device)
>[    0.178284] ACPI: SCI (IRQ16705) allocation failed
>[    0.179352] ACPI Exception: AE_NOT_ACQUIRED, Unable to install
System Control Interrupt handler (20160930/evevent-131)
>[    0.180008] ACPI: Unable to start the ACPI Interpreter
>[    0.181125] ACPI Error: Could not remove SCI handler
(20160930/evmisc-281)
>[    0.184068] kmem_cache_destroy Acpi-Operand: Slab cache still has
objects
>[    0.185358] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.10.0-rc3 #2
>[    0.186820] Hardware name: innotek gmb_h virtual_box/virtual_box, BIOS
virtual_box 12/01/2006
>[    0.188000] Call Trace:
>[    0.188000]  ? dump_stack+0x5c/0x7d
>[    0.188000]  ? kmem_cache_destroy+0x224/0x230
>[    0.188000]  ? acpi_sleep_proc_init+0x22/0x22
>[    0.188000]  ? acpi_os_delete_cache+0xa/0xd
>[    0.188000]  ? acpi_ut_delete_caches+0x3f/0x7b
>[    0.188000]  ? acpi_terminate+0x5/0xf
>[    0.188000]  ? acpi_init+0x288/0x32e
>[    0.188000]  ? __class_create+0x4c/0x80
>[    0.188000]  ? video_setup+0x7a/0x7a
>[    0.188000]  ? do_one_initcall+0x4e/0x1b0
>[    0.188000]  ? kernel_init_freeable+0x194/0x21a
>[    0.188000]  ? rest_init+0x80/0x80
>[    0.188000]  ? kernel_init+0xa/0x100
>[    0.188000]  ? ret_from_fork+0x25/0x30

When early abort is occurred due to invalid ACPI information, Linux kernel
terminates ACPI by calling acpi_terminate() function. The function calls
acpi_ns_terminate() function to delete namespace data and ACPI operand cache
(acpi_gbl_module_code_list).

But the deletion code in acpi_ns_terminate() function is wrapped in
ACPI_EXEC_APP definition, therefore the code is only executed when the
definition exists. If the define doesn't exist, ACPI operand cache
(acpi_gbl_module_code_list) is leaked, and stack dump is shown in kernel log.

This causes a security threat because the old kernel (<= 4.9) shows memory
locations of kernel functions in stack dump, therefore kernel ASLR can be
neutralized.

To fix ACPI operand leak for enhancing security, I made a patch which
removes the ACPI_EXEC_APP define in acpi_ns_terminate() function for
executing the deletion code unconditionally.

Link: https://github.com/acpica/acpica/commit/a23325b2
Signed-off-by: Seunghun Han <kkamagui@gmail.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/nsutils.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 661676714f7b..b5a291488e3f 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -594,25 +594,20 @@ struct acpi_namespace_node *acpi_ns_validate_handle(acpi_handle handle)
 void acpi_ns_terminate(void)
 {
 	acpi_status status;
+	union acpi_operand_object *prev;
+	union acpi_operand_object *next;
 
 	ACPI_FUNCTION_TRACE(ns_terminate);
 
-#ifdef ACPI_EXEC_APP
-	{
-		union acpi_operand_object *prev;
-		union acpi_operand_object *next;
+	/* Delete any module-level code blocks */
 
-		/* Delete any module-level code blocks */
-
-		next = acpi_gbl_module_code_list;
-		while (next) {
-			prev = next;
-			next = next->method.mutex;
-			prev->method.mutex = NULL;	/* Clear the Mutex (cheated) field */
-			acpi_ut_remove_reference(prev);
-		}
+	next = acpi_gbl_module_code_list;
+	while (next) {
+		prev = next;
+		next = next->method.mutex;
+		prev->method.mutex = NULL;	/* Clear the Mutex (cheated) field */
+		acpi_ut_remove_reference(prev);
 	}
-#endif
 
 	/*
 	 * Free the entire namespace -- all nodes and all objects
-- 
cgit v1.2.3


From ed7f8bc91a8df96662b21accc9a5abe2c292014a Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 26 Apr 2017 16:18:16 +0800
Subject: ACPICA: Update for automatic repair code for objects returned by
 evaluate_object

ACPICA commit 6b58810b9aad7358fbf1a0f4057fefa8d29838d3

This change fixes two instances where the repair code made an incorrect
assumption about how reference counts are assigned to package objects.
Resolves issues where a warning was issued about a "large reference
count" -- which usually indicates an attempt to delete an object
that has previously been poisoned and released into the object cache.

Link: https://github.com/acpica/acpica/commit/6b58810b
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/nsrepair.c  | 16 +++-------------
 drivers/acpi/acpica/nsrepair2.c |  6 +-----
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index 38316266521e..418ef2ac82ab 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -290,22 +290,12 @@ object_repaired:
 	/* Object was successfully repaired */
 
 	if (package_index != ACPI_NOT_PACKAGE_ELEMENT) {
-		/*
-		 * The original object is a package element. We need to
-		 * decrement the reference count of the original object,
-		 * for removing it from the package.
-		 *
-		 * However, if the original object was just wrapped with a
-		 * package object as part of the repair, we don't need to
-		 * change the reference count.
-		 */
+
+		/* Update reference count of new object */
+
 		if (!(info->return_flags & ACPI_OBJECT_WRAPPED)) {
 			new_object->common.reference_count =
 			    return_object->common.reference_count;
-
-			if (return_object->common.reference_count > 1) {
-				return_object->common.reference_count--;
-			}
 		}
 
 		ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 352265498e90..06037e044694 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -403,16 +403,12 @@ acpi_ns_repair_CID(struct acpi_evaluate_info *info,
 			return (status);
 		}
 
-		/* Take care with reference counts */
-
 		if (original_element != *element_ptr) {
 
-			/* Element was replaced */
+			/* Update reference count of new object */
 
 			(*element_ptr)->common.reference_count =
 			    original_ref_count;
-
-			acpi_ut_remove_reference(original_element);
 		}
 
 		element_ptr++;
-- 
cgit v1.2.3


From 1f67ef69941e252d5a3558426338a84acce73d89 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 26 Apr 2017 16:18:25 +0800
Subject: ACPICA: debugger: fix memory leak on Pathname

ACPICA commit 1db14dc88f308119634d77ab9dcb6586b9fe4777

On the error return path when acpi_get_object_info fails the allocated
pathname is not free'd leading to a memory leak.  Free pathname
to fix this.

Link: https://github.com/acpica/acpica/commit/1db14dc8
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/dbmethod.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/acpica/dbmethod.c b/drivers/acpi/acpica/dbmethod.c
index 15c8237b8a80..df62c9245efc 100644
--- a/drivers/acpi/acpica/dbmethod.c
+++ b/drivers/acpi/acpica/dbmethod.c
@@ -422,6 +422,7 @@ acpi_db_walk_for_execute(acpi_handle obj_handle,
 
 	status = acpi_get_object_info(obj_handle, &obj_info);
 	if (ACPI_FAILURE(status)) {
+		ACPI_FREE(pathname);
 		return (status);
 	}
 
-- 
cgit v1.2.3


From 069f9bf454a61d6fafa415dc2a8713938196d650 Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Wed, 26 Apr 2017 16:18:33 +0800
Subject: ACPICA: Debugger: Add interpreter blocking mark for single-step mode

ACPICA commit 91af5d18cd40b35f9d5568fb95fc403ff12474e5

When the single-step mode is used, evaluation is actually split by the
single-step command prompts, so this patch correctly marks the evaluation
segment with interpreter lock release/acquire.
This in return fixes an issue that in the single-step command prompt,
commands requiring to hold the namespace lock (ex. namespace) cannot be
executed. ACPICA BZ 1362, fixed by Lv Zheng.

Link: https://github.com/acpica/acpica/commit/91af5d18
Link: https://bugs.acpica.org/show_bug.cgi?id=1362
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/dbxface.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/acpi/acpica/dbxface.c b/drivers/acpi/acpica/dbxface.c
index 205b8e0eded5..9b91cea63f46 100644
--- a/drivers/acpi/acpica/dbxface.c
+++ b/drivers/acpi/acpica/dbxface.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "amlcode.h"
 #include "acdebug.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_CA_DEBUGGER
 ACPI_MODULE_NAME("dbxface")
@@ -368,7 +369,9 @@ acpi_db_single_step(struct acpi_walk_state *walk_state,
 		walk_state->method_breakpoint = 1;	/* Must be non-zero! */
 	}
 
+	acpi_ex_exit_interpreter();
 	status = acpi_db_start_command(walk_state, op);
+	acpi_ex_enter_interpreter();
 
 	/* User commands complete, continue execution of the interrupted method */
 
-- 
cgit v1.2.3


From 9ff5a21a50301ef16aeb2e3937867de7eb0af030 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 26 Apr 2017 16:18:40 +0800
Subject: ACPICA: Cleanup AML opcode definitions, no functional change

ACPICA commit ec969d38fef3be95358e65f0dd071b5f2c045b6b

This change is a cleanup and further standardization of the AML
opcode defines in amlcode.h

Improves the readability and maintainability of the source code.

Link: https://github.com/acpica/acpica/commit/ec969d38
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/amlcode.h   | 97 ++++++++++++++++++-----------------------
 drivers/acpi/acpica/dbxface.c   |  2 +-
 drivers/acpi/acpica/dscontrol.c |  2 +-
 drivers/acpi/acpica/dsmthdat.c  |  3 +-
 drivers/acpi/acpica/dsobject.c  | 15 ++++---
 drivers/acpi/acpica/dsopcode.c  |  4 +-
 drivers/acpi/acpica/dsutils.c   |  8 ++--
 drivers/acpi/acpica/dswexec.c   |  2 +-
 drivers/acpi/acpica/dswload2.c  |  2 +-
 drivers/acpi/acpica/exmisc.c    | 16 +++----
 drivers/acpi/acpica/exnames.c   |  4 +-
 drivers/acpi/acpica/exoparg1.c  | 17 ++++----
 drivers/acpi/acpica/exoparg2.c  |  4 +-
 drivers/acpi/acpica/exoparg6.c  | 16 +++----
 drivers/acpi/acpica/exresolv.c  |  3 +-
 drivers/acpi/acpica/exstore.c   |  5 ++-
 drivers/acpi/acpica/exstoren.c  |  2 +-
 drivers/acpi/acpica/nsaccess.c  |  2 +-
 drivers/acpi/acpica/nsutils.c   |  6 +--
 drivers/acpi/acpica/psargs.c    |  6 +--
 drivers/acpi/acpica/psloop.c    |  2 +-
 drivers/acpi/acpica/psopcode.c  | 10 ++---
 drivers/acpi/acpica/psparse.c   |  6 +--
 drivers/acpi/acpica/pstree.c    |  2 +-
 24 files changed, 114 insertions(+), 122 deletions(-)

diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h
index b536fd471292..7e2b369487ae 100644
--- a/drivers/acpi/acpica/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h
@@ -48,11 +48,8 @@
 
 /* primary opcodes */
 
-#define AML_NULL_CHAR               (u16) 0x00
-
 #define AML_ZERO_OP                 (u16) 0x00
 #define AML_ONE_OP                  (u16) 0x01
-#define AML_UNASSIGNED              (u16) 0x02
 #define AML_ALIAS_OP                (u16) 0x06
 #define AML_NAME_OP                 (u16) 0x08
 #define AML_BYTE_OP                 (u16) 0x0a
@@ -63,17 +60,15 @@
 #define AML_SCOPE_OP                (u16) 0x10
 #define AML_BUFFER_OP               (u16) 0x11
 #define AML_PACKAGE_OP              (u16) 0x12
-#define AML_VAR_PACKAGE_OP          (u16) 0x13	/* ACPI 2.0 */
+#define AML_VARIABLE_PACKAGE_OP     (u16) 0x13	/* ACPI 2.0 */
 #define AML_METHOD_OP               (u16) 0x14
 #define AML_EXTERNAL_OP             (u16) 0x15	/* ACPI 6.0 */
 #define AML_DUAL_NAME_PREFIX        (u16) 0x2e
-#define AML_MULTI_NAME_PREFIX_OP    (u16) 0x2f
-#define AML_NAME_CHAR_SUBSEQ        (u16) 0x30
-#define AML_NAME_CHAR_FIRST         (u16) 0x41
-#define AML_EXTENDED_OP_PREFIX      (u16) 0x5b
+#define AML_MULTI_NAME_PREFIX       (u16) 0x2f
+#define AML_EXTENDED_PREFIX         (u16) 0x5b
 #define AML_ROOT_PREFIX             (u16) 0x5c
 #define AML_PARENT_PREFIX           (u16) 0x5e
-#define AML_LOCAL_OP                (u16) 0x60
+#define AML_FIRST_LOCAL_OP          (u16) 0x60	/* Used for Local op # calculations */
 #define AML_LOCAL0                  (u16) 0x60
 #define AML_LOCAL1                  (u16) 0x61
 #define AML_LOCAL2                  (u16) 0x62
@@ -82,7 +77,7 @@
 #define AML_LOCAL5                  (u16) 0x65
 #define AML_LOCAL6                  (u16) 0x66
 #define AML_LOCAL7                  (u16) 0x67
-#define AML_ARG_OP                  (u16) 0x68
+#define AML_FIRST_ARG_OP            (u16) 0x68	/* Used for Arg op # calculations */
 #define AML_ARG0                    (u16) 0x68
 #define AML_ARG1                    (u16) 0x69
 #define AML_ARG2                    (u16) 0x6a
@@ -93,7 +88,7 @@
 #define AML_STORE_OP                (u16) 0x70
 #define AML_REF_OF_OP               (u16) 0x71
 #define AML_ADD_OP                  (u16) 0x72
-#define AML_CONCAT_OP               (u16) 0x73
+#define AML_CONCATENATE_OP          (u16) 0x73
 #define AML_SUBTRACT_OP             (u16) 0x74
 #define AML_INCREMENT_OP            (u16) 0x75
 #define AML_DECREMENT_OP            (u16) 0x76
@@ -110,7 +105,7 @@
 #define AML_FIND_SET_LEFT_BIT_OP    (u16) 0x81
 #define AML_FIND_SET_RIGHT_BIT_OP   (u16) 0x82
 #define AML_DEREF_OF_OP             (u16) 0x83
-#define AML_CONCAT_RES_OP           (u16) 0x84	/* ACPI 2.0 */
+#define AML_CONCATENATE_TEMPLATE_OP (u16) 0x84	/* ACPI 2.0 */
 #define AML_MOD_OP                  (u16) 0x85	/* ACPI 2.0 */
 #define AML_NOTIFY_OP               (u16) 0x86
 #define AML_SIZE_OF_OP              (u16) 0x87
@@ -122,18 +117,18 @@
 #define AML_CREATE_BIT_FIELD_OP     (u16) 0x8d
 #define AML_OBJECT_TYPE_OP          (u16) 0x8e
 #define AML_CREATE_QWORD_FIELD_OP   (u16) 0x8f	/* ACPI 2.0 */
-#define AML_LAND_OP                 (u16) 0x90
-#define AML_LOR_OP                  (u16) 0x91
-#define AML_LNOT_OP                 (u16) 0x92
-#define AML_LEQUAL_OP               (u16) 0x93
-#define AML_LGREATER_OP             (u16) 0x94
-#define AML_LLESS_OP                (u16) 0x95
+#define AML_LOGICAL_AND_OP          (u16) 0x90
+#define AML_LOGICAL_OR_OP           (u16) 0x91
+#define AML_LOGICAL_NOT_OP          (u16) 0x92
+#define AML_LOGICAL_EQUAL_OP        (u16) 0x93
+#define AML_LOGICAL_GREATER_OP      (u16) 0x94
+#define AML_LOGICAL_LESS_OP         (u16) 0x95
 #define AML_TO_BUFFER_OP            (u16) 0x96	/* ACPI 2.0 */
-#define AML_TO_DECSTRING_OP         (u16) 0x97	/* ACPI 2.0 */
-#define AML_TO_HEXSTRING_OP         (u16) 0x98	/* ACPI 2.0 */
+#define AML_TO_DECIMAL_STRING_OP    (u16) 0x97	/* ACPI 2.0 */
+#define AML_TO_HEX_STRING_OP        (u16) 0x98	/* ACPI 2.0 */
 #define AML_TO_INTEGER_OP           (u16) 0x99	/* ACPI 2.0 */
 #define AML_TO_STRING_OP            (u16) 0x9c	/* ACPI 2.0 */
-#define AML_COPY_OP                 (u16) 0x9d	/* ACPI 2.0 */
+#define AML_COPY_OBJECT_OP          (u16) 0x9d	/* ACPI 2.0 */
 #define AML_MID_OP                  (u16) 0x9e	/* ACPI 2.0 */
 #define AML_CONTINUE_OP             (u16) 0x9f	/* ACPI 2.0 */
 #define AML_IF_OP                   (u16) 0xa0
@@ -142,18 +137,26 @@
 #define AML_NOOP_OP                 (u16) 0xa3
 #define AML_RETURN_OP               (u16) 0xa4
 #define AML_BREAK_OP                (u16) 0xa5
-#define AML_BREAK_POINT_OP          (u16) 0xcc
+#define AML_BREAKPOINT_OP           (u16) 0xcc
 #define AML_ONES_OP                 (u16) 0xff
 
-/* prefixed opcodes */
+/*
+ * Combination opcodes (actually two one-byte opcodes)
+ * Used by the disassembler and iASL compiler
+ */
+#define AML_LOGICAL_GREATER_EQUAL_OP (u16) 0x9295	/* LNot (LLess) */
+#define AML_LOGICAL_LESS_EQUAL_OP    (u16) 0x9294	/* LNot (LGreater) */
+#define AML_LOGICAL_NOT_EQUAL_OP     (u16) 0x9293	/* LNot (LEqual) */
+
+/* Prefixed (2-byte) opcodes (with AML_EXTENDED_PREFIX) */
 
-#define AML_EXTENDED_OPCODE         (u16) 0x5b00	/* prefix for 2-byte opcodes */
+#define AML_EXTENDED_OPCODE         (u16) 0x5b00	/* Prefix for 2-byte opcodes */
 
 #define AML_MUTEX_OP                (u16) 0x5b01
 #define AML_EVENT_OP                (u16) 0x5b02
-#define AML_SHIFT_RIGHT_BIT_OP      (u16) 0x5b10
-#define AML_SHIFT_LEFT_BIT_OP       (u16) 0x5b11
-#define AML_COND_REF_OF_OP          (u16) 0x5b12
+#define AML_SHIFT_RIGHT_BIT_OP      (u16) 0x5b10	/* Obsolete, not in ACPI spec */
+#define AML_SHIFT_LEFT_BIT_OP       (u16) 0x5b11	/* Obsolete, not in ACPI spec */
+#define AML_CONDITIONAL_REF_OF_OP   (u16) 0x5b12
 #define AML_CREATE_FIELD_OP         (u16) 0x5b13
 #define AML_LOAD_TABLE_OP           (u16) 0x5b1f	/* ACPI 2.0 */
 #define AML_LOAD_OP                 (u16) 0x5b20
@@ -175,20 +178,12 @@
 #define AML_FIELD_OP                (u16) 0x5b81
 #define AML_DEVICE_OP               (u16) 0x5b82
 #define AML_PROCESSOR_OP            (u16) 0x5b83
-#define AML_POWER_RES_OP            (u16) 0x5b84
+#define AML_POWER_RESOURCE_OP       (u16) 0x5b84
 #define AML_THERMAL_ZONE_OP         (u16) 0x5b85
 #define AML_INDEX_FIELD_OP          (u16) 0x5b86
 #define AML_BANK_FIELD_OP           (u16) 0x5b87
 #define AML_DATA_REGION_OP          (u16) 0x5b88	/* ACPI 2.0 */
 
-/*
- * Combination opcodes (actually two one-byte opcodes)
- * Used by the disassembler and iASL compiler
- */
-#define AML_LGREATEREQUAL_OP        (u16) 0x9295
-#define AML_LLESSEQUAL_OP           (u16) 0x9294
-#define AML_LNOTEQUAL_OP            (u16) 0x9293
-
 /*
  * Opcodes for "Field" operators
  */
@@ -308,24 +303,19 @@
 #define ARGI_INVALID_OPCODE         0xFFFFFFFF
 
 /*
- * hash offsets
- */
-#define AML_EXTOP_HASH_OFFSET       22
-#define AML_LNOT_HASH_OFFSET        19
-
-/*
- * opcode groups and types
+ * Some of the flags and types below are of the form:
+ *
+ * AML_FLAGS_EXEC_#A_#T,#R, or
+ * AML_TYPE_EXEC_#A_#T,#R where:
+ *
+ *      #A is the number of required arguments
+ *      #T is the number of target operands
+ *      #R indicates whether there is a return value
  */
-#define OPGRP_NAMED                 0x01
-#define OPGRP_FIELD                 0x02
-#define OPGRP_BYTELIST              0x04
 
 /*
- * Opcode information
+ * Opcode information flags
  */
-
-/* Opcode flags */
-
 #define AML_LOGICAL                 0x0001
 #define AML_LOGICAL_NUMERIC         0x0002
 #define AML_MATH                    0x0004
@@ -342,7 +332,7 @@
 #define AML_CONSTANT                0x2000
 #define AML_NO_OPERAND_RESOLVE      0x4000
 
-/* Convenient flag groupings */
+/* Convenient flag groupings of the flags above */
 
 #define AML_FLAGS_EXEC_0A_0T_1R                                     AML_HAS_RETVAL
 #define AML_FLAGS_EXEC_1A_0T_0R     AML_HAS_ARGS	/* Monadic1  */
@@ -359,7 +349,7 @@
 
 /*
  * The opcode Type is used in a dispatch table, do not change
- * without updating the table.
+ * or add anything new without updating the table.
  */
 #define AML_TYPE_EXEC_0A_0T_1R      0x00
 #define AML_TYPE_EXEC_1A_0T_0R      0x01	/* Monadic1  */
@@ -385,7 +375,7 @@
 
 #define AML_TYPE_METHOD_CALL        0x10
 
-/* Misc */
+/* Miscellaneous types */
 
 #define AML_TYPE_CREATE_FIELD       0x11
 #define AML_TYPE_CREATE_OBJECT      0x12
@@ -395,7 +385,6 @@
 #define AML_TYPE_NAMED_SIMPLE       0x16
 #define AML_TYPE_NAMED_COMPLEX      0x17
 #define AML_TYPE_RETURN             0x18
-
 #define AML_TYPE_UNDEFINED          0x19
 #define AML_TYPE_BOGUS              0x1A
 
diff --git a/drivers/acpi/acpica/dbxface.c b/drivers/acpi/acpica/dbxface.c
index 9b91cea63f46..8f665d94b8b5 100644
--- a/drivers/acpi/acpica/dbxface.c
+++ b/drivers/acpi/acpica/dbxface.c
@@ -126,7 +126,7 @@ error_exit:
  *
  * RETURN:      Status
  *
- * DESCRIPTION: Called for AML_BREAK_POINT_OP
+ * DESCRIPTION: Called for AML_BREAKPOINT_OP
  *
  ******************************************************************************/
 
diff --git a/drivers/acpi/acpica/dscontrol.c b/drivers/acpi/acpica/dscontrol.c
index d31b49feaa79..f470e81b0499 100644
--- a/drivers/acpi/acpica/dscontrol.c
+++ b/drivers/acpi/acpica/dscontrol.c
@@ -347,7 +347,7 @@ acpi_ds_exec_end_control_op(struct acpi_walk_state *walk_state,
 
 		break;
 
-	case AML_BREAK_POINT_OP:
+	case AML_BREAKPOINT_OP:
 
 		acpi_db_signal_break_point(walk_state);
 
diff --git a/drivers/acpi/acpica/dsmthdat.c b/drivers/acpi/acpica/dsmthdat.c
index adcc72cd53a7..27a7de95f7b0 100644
--- a/drivers/acpi/acpica/dsmthdat.c
+++ b/drivers/acpi/acpica/dsmthdat.c
@@ -672,7 +672,8 @@ acpi_ds_store_object_to_local(u8 type,
  *
  * FUNCTION:    acpi_ds_method_data_get_type
  *
- * PARAMETERS:  opcode              - Either AML_LOCAL_OP or AML_ARG_OP
+ * PARAMETERS:  opcode              - Either AML_FIRST LOCAL_OP or
+ *                                    AML_FIRST_ARG_OP
  *              index               - Which Local or Arg whose type to get
  *              walk_state          - Current walk state object
  *
diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c
index 8deaa16493a0..7df3152ed856 100644
--- a/drivers/acpi/acpica/dsobject.c
+++ b/drivers/acpi/acpica/dsobject.c
@@ -114,7 +114,7 @@ acpi_ds_build_internal_object(struct acpi_walk_state *walk_state,
 				    ((op->common.parent->common.aml_opcode ==
 				      AML_PACKAGE_OP)
 				     || (op->common.parent->common.aml_opcode ==
-					 AML_VAR_PACKAGE_OP))) {
+					 AML_VARIABLE_PACKAGE_OP))) {
 					/*
 					 * We didn't find the target and we are populating elements
 					 * of a package - ignore if slack enabled. Some ASL code
@@ -144,7 +144,7 @@ acpi_ds_build_internal_object(struct acpi_walk_state *walk_state,
 
 		if ((op->common.parent->common.aml_opcode == AML_PACKAGE_OP) ||
 		    (op->common.parent->common.aml_opcode ==
-		     AML_VAR_PACKAGE_OP)) {
+		     AML_VARIABLE_PACKAGE_OP)) {
 			/*
 			 * Attempt to resolve the node to a value before we insert it into
 			 * the package. If this is a reference to a common data type,
@@ -398,7 +398,7 @@ acpi_ds_build_internal_package_obj(struct acpi_walk_state *walk_state,
 
 	parent = op->common.parent;
 	while ((parent->common.aml_opcode == AML_PACKAGE_OP) ||
-	       (parent->common.aml_opcode == AML_VAR_PACKAGE_OP)) {
+	       (parent->common.aml_opcode == AML_VARIABLE_PACKAGE_OP)) {
 		parent = parent->common.parent;
 	}
 
@@ -769,10 +769,10 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state,
 		switch (op_info->type) {
 		case AML_TYPE_LOCAL_VARIABLE:
 
-			/* Local ID (0-7) is (AML opcode - base AML_LOCAL_OP) */
+			/* Local ID (0-7) is (AML opcode - base AML_FIRST_LOCAL_OP) */
 
 			obj_desc->reference.value =
-			    ((u32)opcode) - AML_LOCAL_OP;
+			    ((u32)opcode) - AML_FIRST_LOCAL_OP;
 			obj_desc->reference.class = ACPI_REFCLASS_LOCAL;
 
 #ifndef ACPI_NO_METHOD_EXECUTION
@@ -790,9 +790,10 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state,
 
 		case AML_TYPE_METHOD_ARGUMENT:
 
-			/* Arg ID (0-6) is (AML opcode - base AML_ARG_OP) */
+			/* Arg ID (0-6) is (AML opcode - base AML_FIRST_ARG_OP) */
 
-			obj_desc->reference.value = ((u32)opcode) - AML_ARG_OP;
+			obj_desc->reference.value =
+			    ((u32)opcode) - AML_FIRST_ARG_OP;
 			obj_desc->reference.class = ACPI_REFCLASS_ARG;
 
 #ifndef ACPI_NO_METHOD_EXECUTION
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index 148523205d41..9a8f8a992b3e 100644
--- a/drivers/acpi/acpica/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -639,7 +639,7 @@ acpi_ds_eval_data_object_operands(struct acpi_walk_state *walk_state,
 		break;
 
 	case AML_PACKAGE_OP:
-	case AML_VAR_PACKAGE_OP:
+	case AML_VARIABLE_PACKAGE_OP:
 
 		status =
 		    acpi_ds_build_internal_package_obj(walk_state, op, length,
@@ -660,7 +660,7 @@ acpi_ds_eval_data_object_operands(struct acpi_walk_state *walk_state,
 		if ((!op->common.parent) ||
 		    ((op->common.parent->common.aml_opcode != AML_PACKAGE_OP) &&
 		     (op->common.parent->common.aml_opcode !=
-		      AML_VAR_PACKAGE_OP)
+		      AML_VARIABLE_PACKAGE_OP)
 		     && (op->common.parent->common.aml_opcode !=
 			 AML_NAME_OP))) {
 			walk_state->result_obj = obj_desc;
diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c
index 049fbab4e5a6..406edec20de7 100644
--- a/drivers/acpi/acpica/dsutils.c
+++ b/drivers/acpi/acpica/dsutils.c
@@ -275,9 +275,9 @@ acpi_ds_is_result_used(union acpi_parse_object * op,
 		if ((op->common.parent->common.aml_opcode == AML_REGION_OP) ||
 		    (op->common.parent->common.aml_opcode == AML_DATA_REGION_OP)
 		    || (op->common.parent->common.aml_opcode == AML_PACKAGE_OP)
-		    || (op->common.parent->common.aml_opcode ==
-			AML_VAR_PACKAGE_OP)
 		    || (op->common.parent->common.aml_opcode == AML_BUFFER_OP)
+		    || (op->common.parent->common.aml_opcode ==
+			AML_VARIABLE_PACKAGE_OP)
 		    || (op->common.parent->common.aml_opcode ==
 			AML_INT_EVAL_SUBTREE_OP)
 		    || (op->common.parent->common.aml_opcode ==
@@ -551,7 +551,7 @@ acpi_ds_create_operand(struct acpi_walk_state *walk_state,
 			 */
 			if (status == AE_NOT_FOUND) {
 				if (parent_op->common.aml_opcode ==
-				    AML_COND_REF_OF_OP) {
+				    AML_CONDITIONAL_REF_OF_OP) {
 					/*
 					 * For the Conditional Reference op, it's OK if
 					 * the name is not found;  We just need a way to
@@ -806,7 +806,7 @@ acpi_status acpi_ds_evaluate_name_path(struct acpi_walk_state *walk_state)
 	}
 
 	if ((op->common.parent->common.aml_opcode == AML_PACKAGE_OP) ||
-	    (op->common.parent->common.aml_opcode == AML_VAR_PACKAGE_OP) ||
+	    (op->common.parent->common.aml_opcode == AML_VARIABLE_PACKAGE_OP) ||
 	    (op->common.parent->common.aml_opcode == AML_REF_OF_OP)) {
 
 		/* TBD: Should we specify this feature as a bit of op_info->Flags of these opcodes? */
diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c
index 78f8e6a4f72f..a2ff8ad70d58 100644
--- a/drivers/acpi/acpica/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -497,7 +497,7 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state)
 			if ((op->asl.parent) &&
 			    ((op->asl.parent->asl.aml_opcode == AML_PACKAGE_OP)
 			     || (op->asl.parent->asl.aml_opcode ==
-				 AML_VAR_PACKAGE_OP))) {
+				 AML_VARIABLE_PACKAGE_OP))) {
 				ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
 						  "Method Reference in a Package, Op=%p\n",
 						  op));
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 44d4553dfbdd..8d510c7e20c8 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -528,7 +528,7 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state)
 			status = acpi_ex_create_processor(walk_state);
 			break;
 
-		case AML_POWER_RES_OP:
+		case AML_POWER_RESOURCE_OP:
 
 			status = acpi_ex_create_power_resource(walk_state);
 			break;
diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c
index 1a6f59079ea5..f222a80ca38e 100644
--- a/drivers/acpi/acpica/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c
@@ -249,14 +249,14 @@ acpi_ex_do_logical_numeric_op(u16 opcode,
 	ACPI_FUNCTION_TRACE(ex_do_logical_numeric_op);
 
 	switch (opcode) {
-	case AML_LAND_OP:	/* LAnd (Integer0, Integer1) */
+	case AML_LOGICAL_AND_OP:	/* LAnd (Integer0, Integer1) */
 
 		if (integer0 && integer1) {
 			local_result = TRUE;
 		}
 		break;
 
-	case AML_LOR_OP:	/* LOr (Integer0, Integer1) */
+	case AML_LOGICAL_OR_OP:	/* LOr (Integer0, Integer1) */
 
 		if (integer0 || integer1) {
 			local_result = TRUE;
@@ -365,21 +365,21 @@ acpi_ex_do_logical_op(u16 opcode,
 		integer1 = local_operand1->integer.value;
 
 		switch (opcode) {
-		case AML_LEQUAL_OP:	/* LEqual (Operand0, Operand1) */
+		case AML_LOGICAL_EQUAL_OP:	/* LEqual (Operand0, Operand1) */
 
 			if (integer0 == integer1) {
 				local_result = TRUE;
 			}
 			break;
 
-		case AML_LGREATER_OP:	/* LGreater (Operand0, Operand1) */
+		case AML_LOGICAL_GREATER_OP:	/* LGreater (Operand0, Operand1) */
 
 			if (integer0 > integer1) {
 				local_result = TRUE;
 			}
 			break;
 
-		case AML_LLESS_OP:	/* LLess (Operand0, Operand1) */
+		case AML_LOGICAL_LESS_OP:	/* LLess (Operand0, Operand1) */
 
 			if (integer0 < integer1) {
 				local_result = TRUE;
@@ -408,7 +408,7 @@ acpi_ex_do_logical_op(u16 opcode,
 				 (length0 > length1) ? length1 : length0);
 
 		switch (opcode) {
-		case AML_LEQUAL_OP:	/* LEqual (Operand0, Operand1) */
+		case AML_LOGICAL_EQUAL_OP:	/* LEqual (Operand0, Operand1) */
 
 			/* Length and all bytes must be equal */
 
@@ -420,7 +420,7 @@ acpi_ex_do_logical_op(u16 opcode,
 			}
 			break;
 
-		case AML_LGREATER_OP:	/* LGreater (Operand0, Operand1) */
+		case AML_LOGICAL_GREATER_OP:	/* LGreater (Operand0, Operand1) */
 
 			if (compare > 0) {
 				local_result = TRUE;
@@ -437,7 +437,7 @@ acpi_ex_do_logical_op(u16 opcode,
 			}
 			break;
 
-		case AML_LLESS_OP:	/* LLess (Operand0, Operand1) */
+		case AML_LOGICAL_LESS_OP:	/* LLess (Operand0, Operand1) */
 
 			if (compare > 0) {
 				goto cleanup;	/* FALSE */
diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c
index ee7b62a86661..caa5ed1f65ec 100644
--- a/drivers/acpi/acpica/exnames.c
+++ b/drivers/acpi/acpica/exnames.c
@@ -122,7 +122,7 @@ static char *acpi_ex_allocate_name_string(u32 prefix_count, u32 num_name_segs)
 
 		/* Set up multi prefixes   */
 
-		*temp_ptr++ = AML_MULTI_NAME_PREFIX_OP;
+		*temp_ptr++ = AML_MULTI_NAME_PREFIX;
 		*temp_ptr++ = (char)num_name_segs;
 	} else if (2 == num_name_segs) {
 
@@ -342,7 +342,7 @@ acpi_ex_get_name_string(acpi_object_type data_type,
 			}
 			break;
 
-		case AML_MULTI_NAME_PREFIX_OP:
+		case AML_MULTI_NAME_PREFIX:
 
 			ACPI_DEBUG_PRINT((ACPI_DB_LOAD,
 					  "MultiNamePrefix at %p\n",
diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
index af73fcde7e5c..e327349675cd 100644
--- a/drivers/acpi/acpica/exoparg1.c
+++ b/drivers/acpi/acpica/exoparg1.c
@@ -274,7 +274,7 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 	case AML_FIND_SET_RIGHT_BIT_OP:
 	case AML_FROM_BCD_OP:
 	case AML_TO_BCD_OP:
-	case AML_COND_REF_OF_OP:
+	case AML_CONDITIONAL_REF_OF_OP:
 
 		/* Create a return object of type Integer for these opcodes */
 
@@ -405,7 +405,7 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 			}
 			break;
 
-		case AML_COND_REF_OF_OP:	/* cond_ref_of (source_object, Result) */
+		case AML_CONDITIONAL_REF_OF_OP:	/* cond_ref_of (source_object, Result) */
 			/*
 			 * This op is a little strange because the internal return value is
 			 * different than the return value stored in the result descriptor
@@ -475,14 +475,14 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 		/*
 		 * ACPI 2.0 Opcodes
 		 */
-	case AML_COPY_OP:	/* Copy (Source, Target) */
+	case AML_COPY_OBJECT_OP:	/* copy_object (Source, Target) */
 
 		status =
 		    acpi_ut_copy_iobject_to_iobject(operand[0], &return_desc,
 						    walk_state);
 		break;
 
-	case AML_TO_DECSTRING_OP:	/* to_decimal_string (Data, Result) */
+	case AML_TO_DECIMAL_STRING_OP:	/* to_decimal_string (Data, Result) */
 
 		status =
 		    acpi_ex_convert_to_string(operand[0], &return_desc,
@@ -495,7 +495,7 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 		}
 		break;
 
-	case AML_TO_HEXSTRING_OP:	/* to_hex_string (Data, Result) */
+	case AML_TO_HEX_STRING_OP:	/* to_hex_string (Data, Result) */
 
 		status =
 		    acpi_ex_convert_to_string(operand[0], &return_desc,
@@ -603,7 +603,7 @@ acpi_status acpi_ex_opcode_1A_0T_1R(struct acpi_walk_state *walk_state)
 	/* Examine the AML opcode */
 
 	switch (walk_state->opcode) {
-	case AML_LNOT_OP:	/* LNot (Operand) */
+	case AML_LOGICAL_NOT_OP:	/* LNot (Operand) */
 
 		return_desc = acpi_ut_create_integer_object((u64) 0);
 		if (!return_desc) {
@@ -652,9 +652,8 @@ acpi_status acpi_ex_opcode_1A_0T_1R(struct acpi_walk_state *walk_state)
 		 * NOTE:  We use LNOT_OP here in order to force resolution of the
 		 * reference operand to an actual integer.
 		 */
-		status =
-		    acpi_ex_resolve_operands(AML_LNOT_OP, &temp_desc,
-					     walk_state);
+		status = acpi_ex_resolve_operands(AML_LOGICAL_NOT_OP,
+						  &temp_desc, walk_state);
 		if (ACPI_FAILURE(status)) {
 			ACPI_EXCEPTION((AE_INFO, status,
 					"While resolving operands for [%s]",
diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c
index 44ecba50c0da..eecb3bff7fd7 100644
--- a/drivers/acpi/acpica/exoparg2.c
+++ b/drivers/acpi/acpica/exoparg2.c
@@ -298,7 +298,7 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state)
 					NULL, &return_desc->integer.value);
 		break;
 
-	case AML_CONCAT_OP:	/* Concatenate (Data1, Data2, Result) */
+	case AML_CONCATENATE_OP:	/* Concatenate (Data1, Data2, Result) */
 
 		status =
 		    acpi_ex_do_concatenate(operand[0], operand[1], &return_desc,
@@ -343,7 +343,7 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state)
 		       operand[0]->buffer.pointer, length);
 		break;
 
-	case AML_CONCAT_RES_OP:
+	case AML_CONCATENATE_TEMPLATE_OP:
 
 		/* concatenate_res_template (Buffer, Buffer, Result) (ACPI 2.0) */
 
diff --git a/drivers/acpi/acpica/exoparg6.c b/drivers/acpi/acpica/exoparg6.c
index 31e4df97cbe1..688032b58a21 100644
--- a/drivers/acpi/acpica/exoparg6.c
+++ b/drivers/acpi/acpica/exoparg6.c
@@ -124,8 +124,8 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:     (M == P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LEQUAL_OP, match_obj, package_obj,
-					  &logical_result);
+		    acpi_ex_do_logical_op(AML_LOGICAL_EQUAL_OP, match_obj,
+					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
 		}
@@ -137,8 +137,8 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:                  (M >= P[i]) (M not_less than P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LLESS_OP, match_obj, package_obj,
-					  &logical_result);
+		    acpi_ex_do_logical_op(AML_LOGICAL_LESS_OP, match_obj,
+					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
 		}
@@ -151,7 +151,7 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:         (M > P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LGREATER_OP, match_obj,
+		    acpi_ex_do_logical_op(AML_LOGICAL_GREATER_OP, match_obj,
 					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
@@ -164,7 +164,7 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:                     (M <= P[i]) (M not_greater than P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LGREATER_OP, match_obj,
+		    acpi_ex_do_logical_op(AML_LOGICAL_GREATER_OP, match_obj,
 					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
@@ -178,8 +178,8 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:            (M < P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LLESS_OP, match_obj, package_obj,
-					  &logical_result);
+		    acpi_ex_do_logical_op(AML_LOGICAL_LESS_OP, match_obj,
+					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
 		}
diff --git a/drivers/acpi/acpica/exresolv.c b/drivers/acpi/acpica/exresolv.c
index 7fecefc2e1b4..aa8c6fd74cc3 100644
--- a/drivers/acpi/acpica/exresolv.c
+++ b/drivers/acpi/acpica/exresolv.c
@@ -196,7 +196,8 @@ acpi_ex_resolve_object_to_value(union acpi_operand_object **stack_ptr,
 
 				if ((walk_state->opcode ==
 				     AML_INT_METHODCALL_OP)
-				    || (walk_state->opcode == AML_COPY_OP)) {
+				    || (walk_state->opcode ==
+					AML_COPY_OBJECT_OP)) {
 					break;
 				}
 
diff --git a/drivers/acpi/acpica/exstore.c b/drivers/acpi/acpica/exstore.c
index a2f8001aeb86..bdd43cde8f36 100644
--- a/drivers/acpi/acpica/exstore.c
+++ b/drivers/acpi/acpica/exstore.c
@@ -416,7 +416,7 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
 
 	/* Only limited target types possible for everything except copy_object */
 
-	if (walk_state->opcode != AML_COPY_OP) {
+	if (walk_state->opcode != AML_COPY_OBJECT_OP) {
 		/*
 		 * Only copy_object allows all object types to be overwritten. For
 		 * target_ref(s), there are restrictions on the object types that
@@ -499,7 +499,8 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
 	case ACPI_TYPE_STRING:
 	case ACPI_TYPE_BUFFER:
 
-		if ((walk_state->opcode == AML_COPY_OP) || !implicit_conversion) {
+		if ((walk_state->opcode == AML_COPY_OBJECT_OP) ||
+		    !implicit_conversion) {
 			/*
 			 * However, copy_object and Stores to arg_x do not perform
 			 * an implicit conversion, as per the ACPI specification.
diff --git a/drivers/acpi/acpica/exstoren.c b/drivers/acpi/acpica/exstoren.c
index 85db4716a043..56f59cf5da29 100644
--- a/drivers/acpi/acpica/exstoren.c
+++ b/drivers/acpi/acpica/exstoren.c
@@ -107,7 +107,7 @@ acpi_ex_resolve_object(union acpi_operand_object **source_desc_ptr,
 
 		/* For copy_object, no further validation necessary */
 
-		if (walk_state->opcode == AML_COPY_OP) {
+		if (walk_state->opcode == AML_COPY_OBJECT_OP) {
 			break;
 		}
 
diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index 498bb8f70e6b..fb265b5737de 100644
--- a/drivers/acpi/acpica/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c
@@ -485,7 +485,7 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
 					  flags));
 			break;
 
-		case AML_MULTI_NAME_PREFIX_OP:
+		case AML_MULTI_NAME_PREFIX:
 
 			/* More than one name_seg, search rules do not apply */
 
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index b5a291488e3f..2fe87d0dd9d5 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -252,7 +252,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
 			internal_name[1] = AML_DUAL_NAME_PREFIX;
 			result = &internal_name[2];
 		} else {
-			internal_name[1] = AML_MULTI_NAME_PREFIX_OP;
+			internal_name[1] = AML_MULTI_NAME_PREFIX;
 			internal_name[2] = (char)num_segments;
 			result = &internal_name[3];
 		}
@@ -274,7 +274,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
 			internal_name[i] = AML_DUAL_NAME_PREFIX;
 			result = &internal_name[(acpi_size)i + 1];
 		} else {
-			internal_name[i] = AML_MULTI_NAME_PREFIX_OP;
+			internal_name[i] = AML_MULTI_NAME_PREFIX;
 			internal_name[(acpi_size)i + 1] = (char)num_segments;
 			result = &internal_name[(acpi_size)i + 2];
 		}
@@ -450,7 +450,7 @@ acpi_ns_externalize_name(u32 internal_name_length,
 	 */
 	if (prefix_length < internal_name_length) {
 		switch (internal_name[prefix_length]) {
-		case AML_MULTI_NAME_PREFIX_OP:
+		case AML_MULTI_NAME_PREFIX:
 
 			/* <count> 4-byte names */
 
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index 05b62ad44c3e..cda1e151dbc7 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -186,7 +186,7 @@ char *acpi_ps_get_next_namestring(struct acpi_parse_state *parser_state)
 		end += 1 + (2 * ACPI_NAME_SIZE);
 		break;
 
-	case AML_MULTI_NAME_PREFIX_OP:
+	case AML_MULTI_NAME_PREFIX:
 
 		/* Multiple name segments, 4 chars each, count in next byte */
 
@@ -339,7 +339,7 @@ acpi_ps_get_next_namepath(struct acpi_walk_state *walk_state,
 		/* 2) not_found during a cond_ref_of(x) is ok by definition */
 
 		else if (walk_state->op->common.aml_opcode ==
-			 AML_COND_REF_OF_OP) {
+			 AML_CONDITIONAL_REF_OF_OP) {
 			status = AE_OK;
 		}
 
@@ -352,7 +352,7 @@ acpi_ps_get_next_namepath(struct acpi_walk_state *walk_state,
 			 ((arg->common.parent->common.aml_opcode ==
 			   AML_PACKAGE_OP)
 			  || (arg->common.parent->common.aml_opcode ==
-			      AML_VAR_PACKAGE_OP))) {
+			      AML_VARIABLE_PACKAGE_OP))) {
 			status = AE_OK;
 		}
 	}
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index 14d689606d2f..b7da881b66da 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -254,7 +254,7 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 
 		case AML_BUFFER_OP:
 		case AML_PACKAGE_OP:
-		case AML_VAR_PACKAGE_OP:
+		case AML_VARIABLE_PACKAGE_OP:
 
 			if ((op->common.parent) &&
 			    (op->common.parent->common.aml_opcode ==
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 451b672915f1..b8f0617fd421 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -69,7 +69,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_FIELD_OP
 	AML_INDEX_FIELD_OP
@@ -95,7 +95,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_FIELD_OP
 	AML_INDEX_FIELD_OP
@@ -113,7 +113,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_NAME_OP
 	AML_ALIAS_OP
@@ -136,7 +136,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_NAME_OP
 	AML_ALIAS_OP
@@ -149,7 +149,7 @@ ACPI_MODULE_NAME("psopcode")
   must be deferred until needed
 
 	AML_METHOD_OP
-	AML_VAR_PACKAGE_OP
+	AML_VARIABLE_PACKAGE_OP
 	AML_CREATE_FIELD_OP
 	AML_CREATE_BIT_FIELD_OP
 	AML_CREATE_BYTE_FIELD_OP
diff --git a/drivers/acpi/acpica/psparse.c b/drivers/acpi/acpica/psparse.c
index a813bbbd5a8b..8116a670de39 100644
--- a/drivers/acpi/acpica/psparse.c
+++ b/drivers/acpi/acpica/psparse.c
@@ -105,7 +105,7 @@ u16 acpi_ps_peek_opcode(struct acpi_parse_state * parser_state)
 	aml = parser_state->aml;
 	opcode = (u16) ACPI_GET8(aml);
 
-	if (opcode == AML_EXTENDED_OP_PREFIX) {
+	if (opcode == AML_EXTENDED_PREFIX) {
 
 		/* Extended opcode, get the second opcode byte */
 
@@ -210,7 +210,7 @@ acpi_ps_complete_this_op(struct acpi_walk_state *walk_state,
 			    || (op->common.parent->common.aml_opcode ==
 				AML_BANK_FIELD_OP)
 			    || (op->common.parent->common.aml_opcode ==
-				AML_VAR_PACKAGE_OP)) {
+				AML_VARIABLE_PACKAGE_OP)) {
 				replacement_op =
 				    acpi_ps_alloc_op(AML_INT_RETURN_VALUE_OP,
 						     op->common.aml);
@@ -225,7 +225,7 @@ acpi_ps_complete_this_op(struct acpi_walk_state *walk_state,
 				if ((op->common.aml_opcode == AML_BUFFER_OP)
 				    || (op->common.aml_opcode == AML_PACKAGE_OP)
 				    || (op->common.aml_opcode ==
-					AML_VAR_PACKAGE_OP)) {
+					AML_VARIABLE_PACKAGE_OP)) {
 					replacement_op =
 					    acpi_ps_alloc_op(op->common.
 							     aml_opcode,
diff --git a/drivers/acpi/acpica/pstree.c b/drivers/acpi/acpica/pstree.c
index 9677fff8fd47..a11d475b79cd 100644
--- a/drivers/acpi/acpica/pstree.c
+++ b/drivers/acpi/acpica/pstree.c
@@ -296,7 +296,7 @@ union acpi_parse_object *acpi_ps_get_child(union acpi_parse_object *op)
 		child = acpi_ps_get_arg(op, 1);
 		break;
 
-	case AML_POWER_RES_OP:
+	case AML_POWER_RESOURCE_OP:
 	case AML_INDEX_FIELD_OP:
 
 		child = acpi_ps_get_arg(op, 2);
-- 
cgit v1.2.3


From bb1e23e66e6237ff7a1824b37366540a89149c33 Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Wed, 26 Apr 2017 16:18:49 +0800
Subject: ACPICA: iasl: Fix IORT SMMU GSI disassembling

ACPICA commit 637b88de24a78c20478728d9d66632b06fcaa5bf

If the IORT template is compiled and then iort.aml binary disassembled to
iort.dsl, SMMUv1 node lists incorrect offset for SMMU_Nsg_cfg_irpt Interrupt:
[0ECh 0236   8]       SMMU_Nsg_irpt Interrupt : 0000000000000000
[0ECh 0236   8]    SMMU_Nsg_cfg_irpt Interrupt : 0000000000000000
This is because iasl hasn't implemented SMMU GSI decoding yet.

This patch fixes this issue by preparing structures for decoding IORT SMMU
GSI. ACPICA BZ 1340, reported by Alexei Fedorov, fixed by Lv Zheng.

Link: https://github.com/acpica/acpica/commit/637b88de
Link: https://bugs.acpica.org/show_bug.cgi?id=1340
Reported-by: Alexei Fedorov <Alexei.Fedorov@arm.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 7aee9fb3bd1f..83666cb3ec2c 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -783,6 +783,15 @@ struct acpi_iort_smmu {
 #define ACPI_IORT_SMMU_DVM_SUPPORTED    (1)
 #define ACPI_IORT_SMMU_COHERENT_WALK    (1<<1)
 
+/* Global interrupt format */
+
+struct acpi_iort_smmu_gsi {
+	u32 nsg_irpt;
+	u32 nsg_irpt_flags;
+	u32 nsg_cfg_irpt;
+	u32 nsg_cfg_irpt_flags;
+};
+
 struct acpi_iort_smmu_v3 {
 	u64 base_address;	/* SMMUv3 base address */
 	u32 flags;
-- 
cgit v1.2.3


From bee900db6d4d9822d8ce6b44f77d23a3c0aa3131 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Wed, 26 Apr 2017 16:18:55 +0800
Subject: ACPICA: Disassembler: Do not unconditionally remove temporary names

ACPICA commit c46f496df41e53a368f877f88b70bdfc9bd6fdbe

Change the Switch disassembly code to check if the conversion can be
done before removing temporary (_T_x) names. Prevents invalid
disassembly of AML created by older compilers (circa 2005).

Link: https://github.com/acpica/acpica/commit/c46f496d
Link: https://bugs.acpica.org/show_bug.cgi?id=1358
Link: https://bugs.acpica.org/show_bug.cgi?id=1360
Reported-by: racerrehabman@gmail.com
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/aclocal.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 8fd495e8fdce..ca984261acbb 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -784,9 +784,10 @@ union acpi_parse_value {
 #define ACPI_DASM_LNOT_SUFFIX           0x09	/* End  of a Lnot_equal (etc.) pair of opcodes */
 #define ACPI_DASM_HID_STRING            0x0A	/* String is a _HID or _CID */
 #define ACPI_DASM_IGNORE_SINGLE         0x0B	/* Ignore the opcode but not it's children */
-#define ACPI_DASM_SWITCH_PREDICATE      0x0C	/* Object is a predicate for a Switch or Case block */
-#define ACPI_DASM_CASE                  0x0D	/* If/Else is a Case in a Switch/Case block */
-#define ACPI_DASM_DEFAULT               0x0E	/* Else is a Default in a Switch/Case block */
+#define ACPI_DASM_SWITCH                0x0C	/* While is a Switch */
+#define ACPI_DASM_SWITCH_PREDICATE      0x0D	/* Object is a predicate for a Switch or Case block */
+#define ACPI_DASM_CASE                  0x0E	/* If/Else is a Case in a Switch/Case block */
+#define ACPI_DASM_DEFAULT               0x0F	/* Else is a Default in a Switch/Case block */
 
 /*
  * Generic operation (for example:  If, While, Store)
-- 
cgit v1.2.3


From c8e8ab1e4c1e0879f21e39504bf7fc01c0b5865f Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 26 Apr 2017 16:19:10 +0800
Subject: ACPICA: Local cache support: Allow small cache objects

ACPICA commit 9c54b8bbd483421ef2fef5225c00f1655b4a491c

Remove apparently arbitrary restriction on the size of the cache
objects to 16 (in acpi_os_create_cache). Now, the input object
size must be simply non-zero.

Link: https://github.com/acpica/acpica/commit/9c54b8bb
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c
index 11c7f72f2d56..531493306dee 100644
--- a/drivers/acpi/acpica/utcache.c
+++ b/drivers/acpi/acpica/utcache.c
@@ -71,7 +71,7 @@ acpi_os_create_cache(char *cache_name,
 
 	ACPI_FUNCTION_ENTRY();
 
-	if (!cache_name || !return_cache || (object_size < 16)) {
+	if (!cache_name || !return_cache || !object_size) {
 		return (AE_BAD_PARAMETER);
 	}
 
-- 
cgit v1.2.3


From 1e9a038b7fe9a8c10ef1238f4e695d5fbe0dd594 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 24 Apr 2017 16:02:09 -0700
Subject: srcu: Expedited grace periods with reduced memory contention

Commit f60d231a87c5 ("srcu: Crude control of expedited grace periods")
introduced a per-srcu_struct atomic counter to track outstanding
requests for grace periods.  This works, but represents a memory-contention
bottleneck.  This commit therefore uses the srcu_node combining tree
to remove this bottleneck.

This commit adds new ->srcu_gp_seq_needed_exp fields to the
srcu_data, srcu_node, and srcu_struct structures, which track the
farthest-in-the-future grace period that must be expedited, which in
turn requires that all nearer-term grace periods also be expedited.
Requests for expediting start with the srcu_data structure, run up
through the srcu_node tree, and end at the srcu_struct structure.
Note that it may be necessary to expedite a grace period that just
now started, and this is handled by a new srcu_funnel_exp_start()
function, which is invoked when the grace period itself is already
in its way, but when that grace period was not marked as expedited.

A new srcu_get_delay() function returns zero if there is at least one
expedited SRCU grace period in flight, or SRCU_INTERVAL otherwise.
This function is used to calculate delays:  Normal grace periods
are allowed to extend in order to cover more requests with a given
grace-period computation, which decreases per-request overhead.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Mike Galbraith <efault@gmx.de>
---
 include/linux/srcutree.h |   4 +-
 kernel/rcu/srcutree.c    | 135 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 98 insertions(+), 41 deletions(-)

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 3865717df124..86df48d3e97b 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -43,6 +43,7 @@ struct srcu_data {
 	spinlock_t lock ____cacheline_internodealigned_in_smp;
 	struct rcu_segcblist srcu_cblist;	/* List of callbacks.*/
 	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
 	bool srcu_cblist_invoking;		/* Invoking these CBs? */
 	struct delayed_work work;		/* Context for CB invoking. */
 	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
@@ -63,6 +64,7 @@ struct srcu_node {
 						/*  is > ->srcu_gq_seq. */
 	unsigned long srcu_data_have_cbs[4];	/* Which srcu_data structs */
 						/*  have CBs for given GP? */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
 	struct srcu_node *srcu_parent;		/* Next up in tree. */
 	int grplo;				/* Least CPU for node. */
 	int grphi;				/* Biggest CPU for node. */
@@ -81,7 +83,7 @@ struct srcu_struct {
 	unsigned int srcu_idx;			/* Current rdr array element. */
 	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
 	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
-	atomic_t srcu_exp_cnt;			/* # ongoing expedited GPs. */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
 	struct srcu_data __percpu *sda;		/* Per-CPU srcu_data array. */
 	unsigned long srcu_barrier_seq;		/* srcu_barrier seq #. */
 	struct mutex srcu_barrier_mutex;	/* Serialize barrier ops. */
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 72b6cce5f591..4b98e6f45166 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -72,6 +72,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 			snp->srcu_have_cbs[i] = 0;
 			snp->srcu_data_have_cbs[i] = 0;
 		}
+		snp->srcu_gp_seq_needed_exp = 0;
 		snp->grplo = -1;
 		snp->grphi = -1;
 		if (snp == &sp->node[0]) {
@@ -102,6 +103,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 		rcu_segcblist_init(&sdp->srcu_cblist);
 		sdp->srcu_cblist_invoking = false;
 		sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
+		sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq;
 		sdp->mynode = &snp_first[cpu / levelspread[level]];
 		for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
 			if (snp->grplo < 0)
@@ -135,7 +137,6 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
 	mutex_init(&sp->srcu_gp_mutex);
 	sp->srcu_idx = 0;
 	sp->srcu_gp_seq = 0;
-	atomic_set(&sp->srcu_exp_cnt, 0);
 	sp->srcu_barrier_seq = 0;
 	mutex_init(&sp->srcu_barrier_mutex);
 	atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
@@ -143,6 +144,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
 	if (!is_static)
 		sp->sda = alloc_percpu(struct srcu_data);
 	init_srcu_struct_nodes(sp, is_static);
+	sp->srcu_gp_seq_needed_exp = 0;
 	smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */
 	return sp->sda ? 0 : -ENOMEM;
 }
@@ -307,6 +309,18 @@ static bool srcu_readers_active(struct srcu_struct *sp)
 
 #define SRCU_INTERVAL		1
 
+/*
+ * Return grace-period delay, zero if there are expedited grace
+ * periods pending, SRCU_INTERVAL otherwise.
+ */
+static unsigned long srcu_get_delay(struct srcu_struct *sp)
+{
+	if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq),
+			 READ_ONCE(sp->srcu_gp_seq_needed_exp)))
+		return 0;
+	return SRCU_INTERVAL;
+}
+
 /**
  * cleanup_srcu_struct - deconstruct a sleep-RCU structure
  * @sp: structure to clean up.
@@ -318,7 +332,8 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
 {
 	int cpu;
 
-	WARN_ON_ONCE(atomic_read(&sp->srcu_exp_cnt));
+	if (WARN_ON(!srcu_get_delay(sp)))
+		return; /* Leakage unless caller handles error. */
 	if (WARN_ON(srcu_readers_active(sp)))
 		return; /* Leakage unless caller handles error. */
 	flush_delayed_work(&sp->work);
@@ -444,15 +459,14 @@ static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
  * schedule this invocation on the corresponding CPUs.
  */
 static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp,
-				  unsigned long mask)
+				  unsigned long mask, unsigned long delay)
 {
 	int cpu;
 
 	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
 		if (!(mask & (1 << (cpu - snp->grplo))))
 			continue;
-		srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu),
-				      atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL);
+		srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay);
 	}
 }
 
@@ -467,6 +481,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp,
  */
 static void srcu_gp_end(struct srcu_struct *sp)
 {
+	unsigned long cbdelay;
 	bool cbs;
 	unsigned long gpseq;
 	int idx;
@@ -481,8 +496,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	spin_lock_irq(&sp->gp_lock);
 	idx = rcu_seq_state(sp->srcu_gp_seq);
 	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
+	cbdelay = srcu_get_delay(sp);
 	rcu_seq_end(&sp->srcu_gp_seq);
 	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
+	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
+		sp->srcu_gp_seq_needed_exp = gpseq;
 	spin_unlock_irq(&sp->gp_lock);
 	mutex_unlock(&sp->srcu_gp_mutex);
 	/* A new grace period can start at this point.  But only one. */
@@ -497,12 +515,14 @@ static void srcu_gp_end(struct srcu_struct *sp)
 			cbs = snp->srcu_have_cbs[idx] == gpseq;
 		snp->srcu_have_cbs[idx] = gpseq;
 		rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
+		if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq))
+			snp->srcu_gp_seq_needed_exp = gpseq;
 		mask = snp->srcu_data_have_cbs[idx];
 		snp->srcu_data_have_cbs[idx] = 0;
 		spin_unlock_irq(&snp->lock);
 		if (cbs) {
 			smp_mb(); /* GP end before CB invocation. */
-			srcu_schedule_cbs_snp(sp, snp, mask);
+			srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
 		}
 	}
 
@@ -517,15 +537,43 @@ static void srcu_gp_end(struct srcu_struct *sp)
 		srcu_gp_start(sp);
 		spin_unlock_irq(&sp->gp_lock);
 		/* Throttle expedited grace periods: Should be rare! */
-		srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) &&
-				    rcu_seq_ctr(gpseq) & 0xf
-				    ? 0
-				    : SRCU_INTERVAL);
+		srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
+				    ? 0 : SRCU_INTERVAL);
 	} else {
 		spin_unlock_irq(&sp->gp_lock);
 	}
 }
 
+/*
+ * Funnel-locking scheme to scalably mediate many concurrent expedited
+ * grace-period requests.  This function is invoked for the first known
+ * expedited request for a grace period that has already been requested,
+ * but without expediting.  To start a completely new grace period,
+ * whether expedited or not, use srcu_funnel_gp_start() instead.
+ */
+static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
+				  unsigned long s)
+{
+	unsigned long flags;
+
+	for (; snp != NULL; snp = snp->srcu_parent) {
+		if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
+		    ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
+			return;
+		spin_lock_irqsave(&snp->lock, flags);
+		if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
+			spin_unlock_irqrestore(&snp->lock, flags);
+			return;
+		}
+		WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
+		spin_unlock_irqrestore(&snp->lock, flags);
+	}
+	spin_lock_irqsave(&sp->gp_lock, flags);
+	if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
+		sp->srcu_gp_seq_needed_exp = s;
+	spin_unlock_irqrestore(&sp->gp_lock, flags);
+}
+
 /*
  * Funnel-locking scheme to scalably mediate many concurrent grace-period
  * requests.  The winner has to do the work of actually starting grace
@@ -533,9 +581,8 @@ static void srcu_gp_end(struct srcu_struct *sp)
  * number is recorded on at least their leaf srcu_node structure, or they
  * must take steps to invoke their own callbacks.
  */
-static void srcu_funnel_gp_start(struct srcu_struct *sp,
-				 struct srcu_data *sdp,
-				 unsigned long s)
+static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
+				 unsigned long s, bool do_norm)
 {
 	unsigned long flags;
 	int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
@@ -554,13 +601,20 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp,
 			spin_unlock_irqrestore(&snp->lock, flags);
 			if (snp == sdp->mynode && snp_seq != s) {
 				smp_mb(); /* CBs after GP! */
-				srcu_schedule_cbs_sdp(sdp, 0);
+				srcu_schedule_cbs_sdp(sdp, do_norm
+							   ? SRCU_INTERVAL
+							   : 0);
+				return;
 			}
+			if (!do_norm)
+				srcu_funnel_exp_start(sp, snp, s);
 			return;
 		}
 		snp->srcu_have_cbs[idx] = s;
 		if (snp == sdp->mynode)
 			snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
+		if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
+			snp->srcu_gp_seq_needed_exp = s;
 		spin_unlock_irqrestore(&snp->lock, flags);
 	}
 
@@ -573,6 +627,8 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp,
 		 */
 		smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/
 	}
+	if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
+		sp->srcu_gp_seq_needed_exp = s;
 
 	/* If grace period not already done and none in progress, start it. */
 	if (!rcu_seq_done(&sp->srcu_gp_seq, s) &&
@@ -580,9 +636,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp,
 		WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
 		srcu_gp_start(sp);
 		queue_delayed_work(system_power_efficient_wq, &sp->work,
-				   atomic_read(&sp->srcu_exp_cnt)
-				   ? 0
-				   : SRCU_INTERVAL);
+				   srcu_get_delay(sp));
 	}
 	spin_unlock_irqrestore(&sp->gp_lock, flags);
 }
@@ -597,7 +651,7 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
 	for (;;) {
 		if (srcu_readers_active_idx_check(sp, idx))
 			return true;
-		if (--trycount + !!atomic_read(&sp->srcu_exp_cnt) <= 0)
+		if (--trycount + !srcu_get_delay(sp) <= 0)
 			return false;
 		udelay(SRCU_RETRY_CHECK_DELAY);
 	}
@@ -650,10 +704,11 @@ static void srcu_flip(struct srcu_struct *sp)
  * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
  * srcu_struct structure.
  */
-void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
-	       rcu_callback_t func)
+void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
+		 rcu_callback_t func, bool do_norm)
 {
 	unsigned long flags;
+	bool needexp = false;
 	bool needgp = false;
 	unsigned long s;
 	struct srcu_data *sdp;
@@ -672,16 +727,28 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 		sdp->srcu_gp_seq_needed = s;
 		needgp = true;
 	}
+	if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) {
+		sdp->srcu_gp_seq_needed_exp = s;
+		needexp = true;
+	}
 	spin_unlock_irqrestore(&sdp->lock, flags);
 	if (needgp)
-		srcu_funnel_gp_start(sp, sdp, s);
+		srcu_funnel_gp_start(sp, sdp, s, do_norm);
+	else if (needexp)
+		srcu_funnel_exp_start(sp, sdp->mynode, s);
+}
+
+void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
+	       rcu_callback_t func)
+{
+	__call_srcu(sp, rhp, func, true);
 }
 EXPORT_SYMBOL_GPL(call_srcu);
 
 /*
  * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
  */
-static void __synchronize_srcu(struct srcu_struct *sp)
+static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
 {
 	struct rcu_synchronize rcu;
 
@@ -697,7 +764,7 @@ static void __synchronize_srcu(struct srcu_struct *sp)
 	check_init_srcu_struct(sp);
 	init_completion(&rcu.completion);
 	init_rcu_head_on_stack(&rcu.head);
-	call_srcu(sp, &rcu.head, wakeme_after_rcu);
+	__call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm);
 	wait_for_completion(&rcu.completion);
 	destroy_rcu_head_on_stack(&rcu.head);
 }
@@ -714,18 +781,7 @@ static void __synchronize_srcu(struct srcu_struct *sp)
  */
 void synchronize_srcu_expedited(struct srcu_struct *sp)
 {
-	bool do_norm = rcu_gp_is_normal();
-
-	check_init_srcu_struct(sp);
-	if (!do_norm) {
-		atomic_inc(&sp->srcu_exp_cnt);
-		smp_mb__after_atomic(); /* increment before GP. */
-	}
-	__synchronize_srcu(sp);
-	if (!do_norm) {
-		smp_mb__before_atomic(); /* GP before decrement. */
-		WARN_ON_ONCE(atomic_dec_return(&sp->srcu_exp_cnt) < 0);
-	}
+	__synchronize_srcu(sp, rcu_gp_is_normal());
 }
 EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
 
@@ -773,7 +829,7 @@ void synchronize_srcu(struct srcu_struct *sp)
 	if (rcu_gp_is_expedited())
 		synchronize_srcu_expedited(sp);
 	else
-		__synchronize_srcu(sp);
+		__synchronize_srcu(sp, true);
 }
 EXPORT_SYMBOL_GPL(synchronize_srcu);
 
@@ -1008,14 +1064,13 @@ void process_srcu(struct work_struct *work)
 	sp = container_of(work, struct srcu_struct, work.work);
 
 	srcu_advance_state(sp);
-	srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL);
+	srcu_reschedule(sp, srcu_get_delay(sp));
 }
 EXPORT_SYMBOL_GPL(process_srcu);
 
 void srcutorture_get_gp_data(enum rcutorture_type test_type,
-					   struct srcu_struct *sp, int *flags,
-					   unsigned long *gpnum,
-					   unsigned long *completed)
+			     struct srcu_struct *sp, int *flags,
+			     unsigned long *gpnum, unsigned long *completed)
 {
 	if (test_type != SRCU_FLAVOR)
 		return;
-- 
cgit v1.2.3


From 2da4b2a7fd8de52c44fa83c59f00d38a0d90caae Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 25 Apr 2017 11:34:40 -0700
Subject: srcu: Expedite first synchronize_srcu() when idle

Classic SRCU in effect expedites the first synchronize_srcu() when SRCU
is idle, and Mike Galbraith demonstrated that some use cases do in fact
rely on this behavior.  In particular, Mike showed that Steven Rostedt's
hotplug stress script takes 55 seconds with Classic SRCU and more than
16 -minutes- when running Tree SRCU.  Assuming that each Tree SRCU's call
to synchronize_srcu() takes four milliseconds, this implies that Steven's
test invokes synchronize_srcu() in isolation, but more than once per
200 microseconds.  Mike used ftrace to demonstrate that the time between
successive calls to synchronize_srcu() ranged from 118 to 342 microseconds,
with one outlier at 80 milliseconds.  This data clearly indicates that
Tree SRCU needs to expedite the first invocation of synchronize_srcu()
during an SRCU idle period.

This commit therefor introduces a srcu_might_be_idle() function that
probabilistically checks whether or not SRCU is idle.  This function is
used by synchronize_rcu() as an additional criterion in deciding whether
or not to expedite.

(Hat trick to Peter Zijlstra for his earlier suggestion that this might
in fact be a problem.  Which for all I know might have motivated Mike to
look into it.)

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Mike Galbraith <efault@gmx.de>
---
 kernel/rcu/srcutree.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 4b98e6f45166..2286e06fd159 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -402,6 +402,7 @@ static void srcu_gp_start(struct srcu_struct *sp)
 			      rcu_seq_current(&sp->srcu_gp_seq));
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
 				       rcu_seq_snap(&sp->srcu_gp_seq));
+	smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
 	rcu_seq_start(&sp->srcu_gp_seq);
 	state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
 	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
@@ -676,6 +677,57 @@ static void srcu_flip(struct srcu_struct *sp)
 	smp_mb(); /* D */  /* Pairs with C. */
 }
 
+/*
+ * If SRCU is likely idle, return true, otherwise return false.
+ *
+ * Note that it is OK for several current from-idle requests for a new
+ * grace period from idle to specify expediting because they will all end
+ * up requesting the same grace period anyhow.  So no loss.
+ *
+ * Note also that if any CPU (including the current one) is still invoking
+ * callbacks, this function will nevertheless say "idle".  This is not
+ * ideal, but the overhead of checking all CPUs' callback lists is even
+ * less ideal, especially on large systems.  Furthermore, the wakeup
+ * can happen before the callback is fully removed, so we have no choice
+ * but to accept this type of error.
+ *
+ * This function is also subject to counter-wrap errors, but let's face
+ * it, if this function was preempted for enough time for the counters
+ * to wrap, it really doesn't matter whether or not we expedite the grace
+ * period.  The extra overhead of a needlessly expedited grace period is
+ * negligible when amoritized over that time period, and the extra latency
+ * of a needlessly non-expedited grace period is similarly negligible.
+ */
+static bool srcu_might_be_idle(struct srcu_struct *sp)
+{
+	unsigned long flags;
+	struct srcu_data *sdp;
+	unsigned long curseq;
+
+	/* If the local srcu_data structure has callbacks, not idle.  */
+	local_irq_save(flags);
+	sdp = this_cpu_ptr(sp->sda);
+	if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
+		local_irq_restore(flags);
+		return false; /* Callbacks already present, so not idle. */
+	}
+	local_irq_restore(flags);
+
+	/*
+	 * No local callbacks, so probabalistically probe global state.
+	 * Exact information would require acquiring locks, which would
+	 * kill scalability, hence the probabalistic nature of the probe.
+	 */
+	curseq = rcu_seq_current(&sp->srcu_gp_seq);
+	smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
+	if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed)))
+		return false; /* Grace period in progress, so not idle. */
+	smp_mb(); /* Order ->srcu_gp_seq with prior access. */
+	if (curseq != rcu_seq_current(&sp->srcu_gp_seq))
+		return false; /* GP # changed, so not idle. */
+	return true; /* With reasonable probability, idle! */
+}
+
 /*
  * Enqueue an SRCU callback on the srcu_data structure associated with
  * the current CPU and the specified srcu_struct structure, initiating
@@ -823,10 +875,15 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
  * Of course, these memory-ordering guarantees apply only when
  * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
  * passed the same srcu_struct structure.
+ *
+ * If SRCU is likely idle, expedite the first request.  This semantic
+ * was provided by Classic SRCU, and is relied upon by its users, so TREE
+ * SRCU must also provide it.  Note that detecting idleness is heuristic
+ * and subject to both false positives and negatives.
  */
 void synchronize_srcu(struct srcu_struct *sp)
 {
-	if (rcu_gp_is_expedited())
+	if (srcu_might_be_idle(sp) || rcu_gp_is_expedited())
 		synchronize_srcu_expedited(sp);
 	else
 		__synchronize_srcu(sp, true);
-- 
cgit v1.2.3


From 22607d66bbc3e81140d3bcf08894f4378eb36428 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 25 Apr 2017 14:03:11 -0700
Subject: srcu: Specify auto-expedite holdoff time

On small systems, in the absence of readers, expedited SRCU grace
periods can complete in less than a microsecond.  This means that an
eight-CPU system can have all CPUs doing synchronize_srcu() in a tight
loop and almost always expedite.  This might actually be desirable in
some situations, but in general it is a good way to needlessly burn
CPU cycles.  And in those situations where it is desirable, your friend
is the function synchronize_srcu_expedited().

For other situations, this commit adds a kernel parameter that specifies
a holdoff between completing the last SRCU grace period and auto-expediting
the next.  If the next grace period starts before the holdoff expires,
auto-expediting is disabled.  The holdoff is 50 microseconds by default,
and can be tuned to the desired number of nanoseconds.  A value of zero
disables auto-expediting.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Mike Galbraith <efault@gmx.de>
---
 Documentation/admin-guide/kernel-parameters.txt |  8 ++++++++
 include/linux/srcutree.h                        |  1 +
 kernel/rcu/srcutree.c                           | 18 +++++++++++++++++-
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index facc20a3f962..4a4b9266c4de 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3779,6 +3779,14 @@
 	spia_pedr=
 	spia_peddr=
 
+	srcutree.exp_holdoff [KNL]
+			Specifies how many nanoseconds must elapse
+			since the end of the last SRCU grace period for
+			a given srcu_struct until the next normal SRCU
+			grace period will be considered for automatic
+			expediting.  Set to zero to disable automatic
+			expediting.
+
 	stacktrace	[FTRACE]
 			Enabled the stack tracer on boot up.
 
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 86df48d3e97b..32e86d85fd11 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -84,6 +84,7 @@ struct srcu_struct {
 	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
 	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
 	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
+	unsigned long srcu_last_gp_end;		/* Last GP end timestamp (ns) */
 	struct srcu_data __percpu *sda;		/* Per-CPU srcu_data array. */
 	unsigned long srcu_barrier_seq;		/* srcu_barrier seq #. */
 	struct mutex srcu_barrier_mutex;	/* Serialize barrier ops. */
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 2286e06fd159..74c283f9d15e 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -34,10 +34,14 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/delay.h>
+#include <linux/module.h>
 #include <linux/srcu.h>
 
 #include "rcu.h"
 
+ulong exp_holdoff = 50 * 1000; /* Holdoff (ns) for auto-expediting. */
+module_param(exp_holdoff, ulong, 0444);
+
 static void srcu_invoke_callbacks(struct work_struct *work);
 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
 
@@ -145,6 +149,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
 		sp->sda = alloc_percpu(struct srcu_data);
 	init_srcu_struct_nodes(sp, is_static);
 	sp->srcu_gp_seq_needed_exp = 0;
+	sp->srcu_last_gp_end = ktime_get_mono_fast_ns();
 	smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */
 	return sp->sda ? 0 : -ENOMEM;
 }
@@ -498,6 +503,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	idx = rcu_seq_state(sp->srcu_gp_seq);
 	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
 	cbdelay = srcu_get_delay(sp);
+	sp->srcu_last_gp_end = ktime_get_mono_fast_ns();
 	rcu_seq_end(&sp->srcu_gp_seq);
 	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
 	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
@@ -700,9 +706,10 @@ static void srcu_flip(struct srcu_struct *sp)
  */
 static bool srcu_might_be_idle(struct srcu_struct *sp)
 {
+	unsigned long curseq;
 	unsigned long flags;
 	struct srcu_data *sdp;
-	unsigned long curseq;
+	unsigned long t;
 
 	/* If the local srcu_data structure has callbacks, not idle.  */
 	local_irq_save(flags);
@@ -718,6 +725,15 @@ static bool srcu_might_be_idle(struct srcu_struct *sp)
 	 * Exact information would require acquiring locks, which would
 	 * kill scalability, hence the probabalistic nature of the probe.
 	 */
+
+	/* First, see if enough time has passed since the last GP. */
+	t = ktime_get_mono_fast_ns();
+	if (exp_holdoff == 0 ||
+	    time_in_range_open(t, sp->srcu_last_gp_end,
+			       sp->srcu_last_gp_end + exp_holdoff))
+		return false; /* Too soon after last GP. */
+
+	/* Next, check for probable idleness. */
 	curseq = rcu_seq_current(&sp->srcu_gp_seq);
 	smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
 	if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed)))
-- 
cgit v1.2.3


From e8245c1b1a3bb8474f91c69ccd13637d3589bb2c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 26 Apr 2017 15:34:06 +0200
Subject: iommu: Include device.h in iommu.h

We make use of 'struct device' in iommu.h, so include
device.h to make it available explicitly.

Re-order the other headers while at it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 6a6de187ddc0..3b4fe4b79d20 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -19,11 +19,13 @@
 #ifndef __LINUX_IOMMU_H
 #define __LINUX_IOMMU_H
 
+#include <linux/scatterlist.h>
+#include <linux/device.h>
+#include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/types.h>
-#include <linux/scatterlist.h>
+
 #include <trace/events/iommu.h>
 
 #define IOMMU_READ	(1 << 0)
-- 
cgit v1.2.3


From 207c6e36f122ebb1164d611c9f34f128313f47d5 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 26 Apr 2017 15:39:28 +0200
Subject: iommu: Move report_iommu_fault() to iommu.c

The function is in no fast-path, there is no need for it to
be static inline in a header file. This also removes the
need to include iommu trace-points in iommu.h.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/iommu.h | 41 ++---------------------------------------
 2 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9170fd498f46..4cb5792cbc21 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1655,6 +1655,48 @@ void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr)
 }
 EXPORT_SYMBOL_GPL(iommu_domain_window_disable);
 
+/**
+ * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
+ * @domain: the iommu domain where the fault has happened
+ * @dev: the device where the fault has happened
+ * @iova: the faulting address
+ * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...)
+ *
+ * This function should be called by the low-level IOMMU implementations
+ * whenever IOMMU faults happen, to allow high-level users, that are
+ * interested in such events, to know about them.
+ *
+ * This event may be useful for several possible use cases:
+ * - mere logging of the event
+ * - dynamic TLB/PTE loading
+ * - if restarting of the faulting device is required
+ *
+ * Returns 0 on success and an appropriate error code otherwise (if dynamic
+ * PTE/TLB loading will one day be supported, implementations will be able
+ * to tell whether it succeeded or not according to this return value).
+ *
+ * Specifically, -ENOSYS is returned if a fault handler isn't installed
+ * (though fault handlers can also return -ENOSYS, in case they want to
+ * elicit the default behavior of the IOMMU drivers).
+ */
+int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
+		       unsigned long iova, int flags)
+{
+	int ret = -ENOSYS;
+
+	/*
+	 * if upper layers showed interest and installed a fault handler,
+	 * invoke it.
+	 */
+	if (domain->handler)
+		ret = domain->handler(domain, dev, iova, flags,
+						domain->handler_token);
+
+	trace_io_page_fault(dev, iova, flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(report_iommu_fault);
+
 static int __init iommu_init(void)
 {
 	iommu_group_kset = kset_create_and_add("iommu_groups",
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3b4fe4b79d20..abaa0ca848bc 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -330,46 +330,9 @@ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
 				      phys_addr_t offset, u64 size,
 				      int prot);
 extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr);
-/**
- * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
- * @domain: the iommu domain where the fault has happened
- * @dev: the device where the fault has happened
- * @iova: the faulting address
- * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...)
- *
- * This function should be called by the low-level IOMMU implementations
- * whenever IOMMU faults happen, to allow high-level users, that are
- * interested in such events, to know about them.
- *
- * This event may be useful for several possible use cases:
- * - mere logging of the event
- * - dynamic TLB/PTE loading
- * - if restarting of the faulting device is required
- *
- * Returns 0 on success and an appropriate error code otherwise (if dynamic
- * PTE/TLB loading will one day be supported, implementations will be able
- * to tell whether it succeeded or not according to this return value).
- *
- * Specifically, -ENOSYS is returned if a fault handler isn't installed
- * (though fault handlers can also return -ENOSYS, in case they want to
- * elicit the default behavior of the IOMMU drivers).
- */
-static inline int report_iommu_fault(struct iommu_domain *domain,
-		struct device *dev, unsigned long iova, int flags)
-{
-	int ret = -ENOSYS;
 
-	/*
-	 * if upper layers showed interest and installed a fault handler,
-	 * invoke it.
-	 */
-	if (domain->handler)
-		ret = domain->handler(domain, dev, iova, flags,
-						domain->handler_token);
-
-	trace_io_page_fault(dev, iova, flags);
-	return ret;
-}
+extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
+			      unsigned long iova, int flags);
 
 static inline size_t iommu_map_sg(struct iommu_domain *domain,
 				  unsigned long iova, struct scatterlist *sg,
-- 
cgit v1.2.3


From 5af50993850a48ba749b122173d789ea90976c72 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 5 Apr 2017 17:54:56 +1000
Subject: KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller

This patch makes KVM capable of using the XIVE interrupt controller
to provide the standard PAPR "XICS" style hypercalls. It is necessary
for proper operations when the host uses XIVE natively.

This has been lightly tested on an actual system, including PCI
pass-through with a TG3 device.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[mpe: Cleanup pr_xxx(), unsplit pr_xxx() strings, etc., fix build
 failures by adding KVM_XIVE which depends on KVM_XICS and XIVE, and
 adding empty stubs for the kvm_xive_xxx() routines, fixup subject,
 integrate fixes from Paul for building PR=y HV=n]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/kvm_book3s_asm.h |    2 +
 arch/powerpc/include/asm/kvm_host.h       |   28 +-
 arch/powerpc/include/asm/kvm_ppc.h        |   74 ++
 arch/powerpc/include/asm/xive.h           |    9 +-
 arch/powerpc/kernel/asm-offsets.c         |   10 +
 arch/powerpc/kvm/Kconfig                  |    5 +
 arch/powerpc/kvm/Makefile                 |    4 +-
 arch/powerpc/kvm/book3s.c                 |   75 +-
 arch/powerpc/kvm/book3s_hv.c              |   51 +-
 arch/powerpc/kvm/book3s_hv_builtin.c      |  103 ++
 arch/powerpc/kvm/book3s_hv_rm_xics.c      |   10 +-
 arch/powerpc/kvm/book3s_hv_rm_xive.c      |   47 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |   62 +-
 arch/powerpc/kvm/book3s_rtas.c            |   21 +-
 arch/powerpc/kvm/book3s_xics.c            |   35 +-
 arch/powerpc/kvm/book3s_xics.h            |    7 +
 arch/powerpc/kvm/book3s_xive.c            | 1893 +++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_xive.h            |  256 ++++
 arch/powerpc/kvm/book3s_xive_template.c   |  503 ++++++++
 arch/powerpc/kvm/irq.h                    |    1 +
 arch/powerpc/kvm/powerpc.c                |   17 +-
 arch/powerpc/platforms/powernv/opal.c     |    1 +
 arch/powerpc/sysdev/xive/common.c         |  142 ++-
 arch/powerpc/sysdev/xive/native.c         |   86 +-
 include/linux/kvm_host.h                  |    1 -
 virt/kvm/kvm_main.c                       |    4 -
 26 files changed, 3358 insertions(+), 89 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_rm_xive.c
 create mode 100644 arch/powerpc/kvm/book3s_xive.c
 create mode 100644 arch/powerpc/kvm/book3s_xive.h
 create mode 100644 arch/powerpc/kvm/book3s_xive_template.c

diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 0593d9479f74..b148496ffe36 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -111,6 +111,8 @@ struct kvmppc_host_state {
 	struct kvm_vcpu *kvm_vcpu;
 	struct kvmppc_vcore *kvm_vcore;
 	void __iomem *xics_phys;
+	void __iomem *xive_tima_phys;
+	void __iomem *xive_tima_virt;
 	u32 saved_xirr;
 	u64 dabr;
 	u64 host_mmcr[7];	/* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7bba8f415627..5a8ab4a758f1 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -205,6 +205,12 @@ struct kvmppc_spapr_tce_table {
 /* XICS components, defined in book3s_xics.c */
 struct kvmppc_xics;
 struct kvmppc_icp;
+extern struct kvm_device_ops kvm_xics_ops;
+
+/* XIVE components, defined in book3s_xive.c */
+struct kvmppc_xive;
+struct kvmppc_xive_vcpu;
+extern struct kvm_device_ops kvm_xive_ops;
 
 struct kvmppc_passthru_irqmap;
 
@@ -293,6 +299,7 @@ struct kvm_arch {
 #endif
 #ifdef CONFIG_KVM_XICS
 	struct kvmppc_xics *xics;
+	struct kvmppc_xive *xive;
 	struct kvmppc_passthru_irqmap *pimap;
 #endif
 	struct kvmppc_ops *kvm_ops;
@@ -421,7 +428,7 @@ struct kvmppc_passthru_irqmap {
 
 #define KVMPPC_IRQ_DEFAULT	0
 #define KVMPPC_IRQ_MPIC		1
-#define KVMPPC_IRQ_XICS		2
+#define KVMPPC_IRQ_XICS		2 /* Includes a XIVE option */
 
 #define MMIO_HPTE_CACHE_SIZE	4
 
@@ -443,6 +450,21 @@ struct mmio_hpte_cache {
 
 struct openpic;
 
+/* W0 and W1 of a XIVE thread management context */
+union xive_tma_w01 {
+	struct {
+		u8	nsr;
+		u8	cppr;
+		u8	ipb;
+		u8	lsmfb;
+		u8	ack;
+		u8	inc;
+		u8	age;
+		u8	pipr;
+	};
+	__be64 w01;
+};
+
 struct kvm_vcpu_arch {
 	ulong host_stack;
 	u32 host_pid;
@@ -688,6 +710,10 @@ struct kvm_vcpu_arch {
 	struct openpic *mpic;	/* KVM_IRQ_MPIC */
 #ifdef CONFIG_KVM_XICS
 	struct kvmppc_icp *icp; /* XICS presentation controller */
+	struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
+	__be32 xive_cam_word;    /* Cooked W2 in proper endian with valid bit */
+	u32 xive_pushed;	 /* Is the VP pushed on the physical CPU ? */
+	union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c3877992eff9..ed52b13d9ffb 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -225,6 +225,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
 extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
 extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
 extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+
 extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
 				u32 priority);
 extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
@@ -412,6 +413,14 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 	paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr;
 }
 
+static inline void kvmppc_set_xive_tima(int cpu,
+					unsigned long phys_addr,
+					void __iomem *virt_addr)
+{
+	paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
+	paca[cpu].kvm_hstate.xive_tima_virt = virt_addr;
+}
+
 static inline u32 kvmppc_get_xics_latch(void)
 {
 	u32 xirr;
@@ -442,6 +451,11 @@ static inline void __init kvm_cma_reserve(void)
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
+static inline void kvmppc_set_xive_tima(int cpu,
+					unsigned long phys_addr,
+					void __iomem *virt_addr)
+{}
+
 static inline u32 kvmppc_get_xics_latch(void)
 {
 	return 0;
@@ -492,6 +506,10 @@ extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
 					struct kvmppc_irq_map *irq_map,
 					struct kvmppc_passthru_irqmap *pimap,
 					bool *again);
+
+extern int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+			       int level, bool line_status);
+
 extern int h_ipi_redirect;
 #else
 static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
@@ -509,6 +527,60 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 	{ return 0; }
 #endif
 
+#ifdef CONFIG_KVM_XIVE
+/*
+ * Below the first "xive" is the "eXternal Interrupt Virtualization Engine"
+ * ie. P9 new interrupt controller, while the second "xive" is the legacy
+ * "eXternal Interrupt Vector Entry" which is the configuration of an
+ * interrupt on the "xics" interrupt controller on P8 and earlier. Those
+ * two function consume or produce a legacy "XIVE" state from the
+ * new "XIVE" interrupt controller.
+ */
+extern int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+				u32 priority);
+extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+				u32 *priority);
+extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
+extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
+extern void kvmppc_xive_init_module(void);
+extern void kvmppc_xive_exit_module(void);
+
+extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+				    struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu);
+extern int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+				  struct irq_desc *host_desc);
+extern int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+				  struct irq_desc *host_desc);
+extern u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
+
+extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+			       int level, bool line_status);
+#else
+static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+				       u32 priority) { return -1; }
+static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+				       u32 *priority) { return -1; }
+static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
+static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
+static inline void kvmppc_xive_init_module(void) { }
+static inline void kvmppc_xive_exit_module(void) { }
+
+static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+					   struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
+static inline int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+					 struct irq_desc *host_desc) { return -ENODEV; }
+static inline int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+					 struct irq_desc *host_desc) { return -ENODEV; }
+static inline u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) { return 0; }
+static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { return -ENOENT; }
+
+static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+				      int level, bool line_status) { return -ENODEV; }
+#endif /* CONFIG_KVM_XIVE */
+
 /*
  * Prototypes for functions called only from assembler code.
  * Having prototypes reduces sparse errors.
@@ -546,6 +618,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
                           unsigned long slb_v, unsigned int status, bool data);
 unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
+unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu);
+unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
 int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
                     unsigned long mfrr);
 int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 3cdbeaeac397..c8a822acf962 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -99,7 +99,6 @@ struct xive_q {
 #define XIVE_ESB_SET_PQ_01	0xd00
 #define XIVE_ESB_SET_PQ_10	0xe00
 #define XIVE_ESB_SET_PQ_11	0xf00
-#define XIVE_ESB_MASK		XIVE_ESB_SET_PQ_01
 
 #define XIVE_ESB_VAL_P		0x2
 #define XIVE_ESB_VAL_Q		0x1
@@ -136,11 +135,11 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
 				       __be32 *qpage, u32 order, bool can_escalate);
 extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
 
-extern bool __xive_irq_trigger(struct xive_irq_data *xd);
-extern bool __xive_irq_retrigger(struct xive_irq_data *xd);
-extern void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd);
-
+extern void xive_native_sync_source(u32 hw_irq);
 extern bool is_xive_irq(struct irq_chip *chip);
+extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_disable_vp(u32 vp_id);
+extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
 
 #else
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4367e7df51a1..1822187813dc 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -630,6 +630,8 @@ int main(void)
 	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
 	HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
 	HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
+	HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
+	HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
 	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
 	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
 	HSTATE_FIELD(HSTATE_PTID, ptid);
@@ -715,6 +717,14 @@ int main(void)
 	OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
 #endif
 
+#ifdef CONFIG_KVM_XICS
+	DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu,
+					       arch.xive_saved_state));
+	DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
+					    arch.xive_cam_word));
+	DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+#endif
+
 #ifdef CONFIG_KVM_EXIT_TIMING
 	OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
 	OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 029be26b5a17..b9d66e53b773 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -196,6 +196,11 @@ config KVM_XICS
 	  Specification) interrupt controller architecture used on
 	  IBM POWER (pSeries) servers.
 
+config KVM_XIVE
+	bool
+	default y
+	depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index b87ccde2137a..d91a2604c496 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -74,7 +74,7 @@ kvm-hv-y += \
 	book3s_64_mmu_radix.o
 
 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
-	book3s_hv_rm_xics.o
+	book3s_hv_rm_xics.o book3s_hv_rm_xive.o
 
 ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
@@ -89,6 +89,8 @@ endif
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
+kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
+
 kvm-book3s_64-module-objs := \
 	$(common-objs-y) \
 	book3s.o \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index aedacefd961d..cb8009cd688d 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -35,6 +35,7 @@
 #include <asm/kvm_book3s.h>
 #include <asm/mmu_context.h>
 #include <asm/page.h>
+#include <asm/xive.h>
 
 #include "book3s.h"
 #include "trace.h"
@@ -578,11 +579,14 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
 			break;
 #ifdef CONFIG_KVM_XICS
 		case KVM_REG_PPC_ICP_STATE:
-			if (!vcpu->arch.icp) {
+			if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
 				r = -ENXIO;
 				break;
 			}
-			*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
+			if (xive_enabled())
+				*val = get_reg_val(id, kvmppc_xive_get_icp(vcpu));
+			else
+				*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
 			break;
 #endif /* CONFIG_KVM_XICS */
 		case KVM_REG_PPC_FSCR:
@@ -648,12 +652,14 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_KVM_XICS
 		case KVM_REG_PPC_ICP_STATE:
-			if (!vcpu->arch.icp) {
+			if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
 				r = -ENXIO;
 				break;
 			}
-			r = kvmppc_xics_set_icp(vcpu,
-						set_reg_val(id, *val));
+			if (xive_enabled())
+				r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val));
+			else
+				r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
 			break;
 #endif /* CONFIG_KVM_XICS */
 		case KVM_REG_PPC_FSCR:
@@ -924,6 +930,50 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
 	return kvm->arch.kvm_ops->hcall_implemented(hcall);
 }
 
+#ifdef CONFIG_KVM_XICS
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+		bool line_status)
+{
+	if (xive_enabled())
+		return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level,
+					   line_status);
+	else
+		return kvmppc_xics_set_irq(kvm, irq_source_id, irq, level,
+					   line_status);
+}
+
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
+			      struct kvm *kvm, int irq_source_id,
+			      int level, bool line_status)
+{
+	return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
+			   level, line_status);
+}
+static int kvmppc_book3s_set_irq(struct kvm_kernel_irq_routing_entry *e,
+				 struct kvm *kvm, int irq_source_id, int level,
+				 bool line_status)
+{
+	return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
+}
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
+{
+	entries->gsi = gsi;
+	entries->type = KVM_IRQ_ROUTING_IRQCHIP;
+	entries->set = kvmppc_book3s_set_irq;
+	entries->irqchip.irqchip = 0;
+	entries->irqchip.pin = gsi;
+	return 1;
+}
+
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	return pin;
+}
+
+#endif /* CONFIG_KVM_XICS */
+
 static int kvmppc_book3s_init(void)
 {
 	int r;
@@ -934,12 +984,25 @@ static int kvmppc_book3s_init(void)
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	r = kvmppc_book3s_init_pr();
 #endif
-	return r;
 
+#ifdef CONFIG_KVM_XICS
+#ifdef CONFIG_KVM_XIVE
+	if (xive_enabled()) {
+		kvmppc_xive_init_module();
+		kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
+	} else
+#endif
+		kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
+#endif
+	return r;
 }
 
 static void kvmppc_book3s_exit(void)
 {
+#ifdef CONFIG_KVM_XICS
+	if (xive_enabled())
+		kvmppc_xive_exit_module();
+#endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	kvmppc_book3s_exit_pr();
 #endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index fadb75abfe37..128efb42ec4e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -67,6 +67,7 @@
 #include <asm/mmu.h>
 #include <asm/opal.h>
 #include <asm/xics.h>
+#include <asm/xive.h>
 
 #include "book3s.h"
 
@@ -837,6 +838,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	case H_IPOLL:
 	case H_XIRR_X:
 		if (kvmppc_xics_enabled(vcpu)) {
+			if (xive_enabled()) {
+				ret = H_NOT_AVAILABLE;
+				return RESUME_GUEST;
+			}
 			ret = kvmppc_xics_hcall(vcpu, req);
 			break;
 		}
@@ -2947,8 +2952,12 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			r = kvmppc_book3s_hv_page_fault(run, vcpu,
 				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
 			srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-		} else if (r == RESUME_PASSTHROUGH)
-			r = kvmppc_xics_rm_complete(vcpu, 0);
+		} else if (r == RESUME_PASSTHROUGH) {
+			if (WARN_ON(xive_enabled()))
+				r = H_SUCCESS;
+			else
+				r = kvmppc_xics_rm_complete(vcpu, 0);
+		}
 	} while (is_kvmppc_resume_guest(r));
 
  out:
@@ -3400,10 +3409,20 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	/*
 	 * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
 	 * Set HVICE bit to enable hypervisor virtualization interrupts.
+	 * Set HEIC to prevent OS interrupts to go to hypervisor (should
+	 * be unnecessary but better safe than sorry in case we re-enable
+	 * EE in HV mode with this LPCR still set)
 	 */
 	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		lpcr &= ~LPCR_VPM0;
-		lpcr |= LPCR_HVICE;
+		lpcr |= LPCR_HVICE | LPCR_HEIC;
+
+		/*
+		 * If xive is enabled, we route 0x500 interrupts directly
+		 * to the guest.
+		 */
+		if (xive_enabled())
+			lpcr |= LPCR_LPES;
 	}
 
 	/*
@@ -3533,7 +3552,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	struct kvmppc_irq_map *irq_map;
 	struct kvmppc_passthru_irqmap *pimap;
 	struct irq_chip *chip;
-	int i;
+	int i, rc = 0;
 
 	if (!kvm_irq_bypass)
 		return 1;
@@ -3558,10 +3577,10 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	/*
 	 * For now, we only support interrupts for which the EOI operation
 	 * is an OPAL call followed by a write to XIRR, since that's
-	 * what our real-mode EOI code does.
+	 * what our real-mode EOI code does, or a XIVE interrupt
 	 */
 	chip = irq_data_get_irq_chip(&desc->irq_data);
-	if (!chip || !is_pnv_opal_msi(chip)) {
+	if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
 		pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
 			host_irq, guest_gsi);
 		mutex_unlock(&kvm->lock);
@@ -3603,7 +3622,12 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	if (i == pimap->n_mapped)
 		pimap->n_mapped++;
 
-	kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+	if (xive_enabled())
+		rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
+	else
+		kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+	if (rc)
+		irq_map->r_hwirq = 0;
 
 	mutex_unlock(&kvm->lock);
 
@@ -3614,7 +3638,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 {
 	struct irq_desc *desc;
 	struct kvmppc_passthru_irqmap *pimap;
-	int i;
+	int i, rc = 0;
 
 	if (!kvm_irq_bypass)
 		return 0;
@@ -3641,9 +3665,12 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 		return -ENODEV;
 	}
 
-	kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
+	if (xive_enabled())
+		rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
+	else
+		kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
 
-	/* invalidate the entry */
+	/* invalidate the entry (what do do on error from the above ?) */
 	pimap->mapped[i].r_hwirq = 0;
 
 	/*
@@ -3652,7 +3679,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	 */
 
 	mutex_unlock(&kvm->lock);
-	return 0;
+	return rc;
 }
 
 static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
@@ -3930,7 +3957,7 @@ static int kvmppc_book3s_init_hv(void)
 	 * indirectly, via OPAL.
 	 */
 #ifdef CONFIG_SMP
-	if (!get_paca()->kvm_hstate.xics_phys) {
+	if (!xive_enabled() && !get_paca()->kvm_hstate.xics_phys) {
 		struct device_node *np;
 
 		np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index a752e29977e0..846b40cb3a62 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -32,6 +32,24 @@
 
 #define KVM_CMA_CHUNK_ORDER	18
 
+#include "book3s_xics.h"
+#include "book3s_xive.h"
+
+/*
+ * The XIVE module will populate these when it loads
+ */
+unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
+unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
+int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
+		       unsigned long mfrr);
+int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
+int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
+EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
+EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
+EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
+EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
+EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
+
 /*
  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
  * should be power of 2.
@@ -210,6 +228,7 @@ void kvmhv_rm_send_ipi(int cpu)
 		__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
 		return;
 	}
+
 	/* On POWER8 for IPIs to threads in the same core, use msgsnd. */
 	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
 	    cpu_first_thread_sibling(cpu) ==
@@ -406,6 +425,9 @@ static long kvmppc_read_one_intr(bool *again)
 	u8 host_ipi;
 	int64_t rc;
 
+	if (xive_enabled())
+		return 1;
+
 	/* see if a host IPI is pending */
 	host_ipi = local_paca->kvm_hstate.host_ipi;
 	if (host_ipi)
@@ -490,3 +512,84 @@ static long kvmppc_read_one_intr(bool *again)
 
 	return kvmppc_check_passthru(xisr, xirr, again);
 }
+
+#ifdef CONFIG_KVM_XICS
+static inline bool is_rm(void)
+{
+	return !(mfmsr() & MSR_DR);
+}
+
+unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
+{
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_xirr(vcpu);
+		if (unlikely(!__xive_vm_h_xirr))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_xirr(vcpu);
+	} else
+		return xics_rm_h_xirr(vcpu);
+}
+
+unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.gpr[5] = get_tb();
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_xirr(vcpu);
+		if (unlikely(!__xive_vm_h_xirr))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_xirr(vcpu);
+	} else
+		return xics_rm_h_xirr(vcpu);
+}
+
+unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
+{
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_ipoll(vcpu, server);
+		if (unlikely(!__xive_vm_h_ipoll))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_ipoll(vcpu, server);
+	} else
+		return H_TOO_HARD;
+}
+
+int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+		    unsigned long mfrr)
+{
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_ipi(vcpu, server, mfrr);
+		if (unlikely(!__xive_vm_h_ipi))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_ipi(vcpu, server, mfrr);
+	} else
+		return xics_rm_h_ipi(vcpu, server, mfrr);
+}
+
+int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_cppr(vcpu, cppr);
+		if (unlikely(!__xive_vm_h_cppr))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_cppr(vcpu, cppr);
+	} else
+		return xics_rm_h_cppr(vcpu, cppr);
+}
+
+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	if (xive_enabled()) {
+		if (is_rm())
+			return xive_rm_h_eoi(vcpu, xirr);
+		if (unlikely(!__xive_vm_h_eoi))
+			return H_NOT_AVAILABLE;
+		return __xive_vm_h_eoi(vcpu, xirr);
+	} else
+		return xics_rm_h_eoi(vcpu, xirr);
+}
+#endif /* CONFIG_KVM_XICS */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3a1a463a039a..f8068801ac36 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -485,7 +485,7 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 }
 
 
-unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
+unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
 {
 	union kvmppc_icp_state old_state, new_state;
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -523,8 +523,8 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
 	return check_too_hard(xics, icp);
 }
 
-int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
-		    unsigned long mfrr)
+int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+		  unsigned long mfrr)
 {
 	union kvmppc_icp_state old_state, new_state;
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -610,7 +610,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	return check_too_hard(xics, this_icp);
 }
 
-int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 {
 	union kvmppc_icp_state old_state, new_state;
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -730,7 +730,7 @@ static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
 	return check_too_hard(xics, icp);
 }
 
-int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 {
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
 	struct kvmppc_icp *icp = vcpu->arch.icp;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c
new file mode 100644
index 000000000000..abf5f01b6eb1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c
@@ -0,0 +1,47 @@
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+#include <asm/synch.h>
+#include <asm/cputhreads.h>
+#include <asm/pgtable.h>
+#include <asm/ppc-opcode.h>
+#include <asm/pnv-pci.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
+#include <asm/asm-prototypes.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+
+#include "book3s_xive.h"
+
+/* XXX */
+#include <asm/udbg.h>
+//#define DBG(fmt...) udbg_printf(fmt)
+#define DBG(fmt...) do { } while(0)
+
+static inline void __iomem *get_tima_phys(void)
+{
+	return local_paca->kvm_hstate.xive_tima_phys;
+}
+
+#undef XIVE_RUNTIME_CHECKS
+#define X_PFX xive_rm_
+#define X_STATIC
+#define X_STAT_PFX stat_rm_
+#define __x_tima		get_tima_phys()
+#define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_page))
+#define __x_trig_page(xd)	((void __iomem *)((xd)->trig_page))
+#define __x_readb	__raw_rm_readb
+#define __x_writeb	__raw_rm_writeb
+#define __x_readw	__raw_rm_readw
+#define __x_readq	__raw_rm_readq
+#define __x_writeq	__raw_rm_writeq
+
+#include "book3s_xive_template.c"
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 7c6477d1840a..bdb3f76ceb6b 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -30,6 +30,7 @@
 #include <asm/book3s/64/mmu-hash.h>
 #include <asm/tm.h>
 #include <asm/opal.h>
+#include <asm/xive-regs.h>
 
 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
 
@@ -970,6 +971,23 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	cmpwi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
+#ifdef CONFIG_KVM_XICS
+	/* We are entering the guest on that thread, push VCPU to XIVE */
+	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
+	cmpldi	cr0, r10, r0
+	beq	no_xive
+	ld	r11, VCPU_XIVE_SAVED_STATE(r4)
+	li	r9, TM_QW1_OS
+	stdcix	r11,r9,r10
+	eieio
+	lwz	r11, VCPU_XIVE_CAM_WORD(r4)
+	li	r9, TM_QW1_OS + TM_WORD2
+	stwcix	r11,r9,r10
+	li	r9, 1
+	stw	r9, VCPU_XIVE_PUSHED(r4)
+no_xive:
+#endif /* CONFIG_KVM_XICS */
+
 deliver_guest_interrupt:
 	ld	r6, VCPU_CTR(r4)
 	ld	r7, VCPU_XER(r4)
@@ -1307,6 +1325,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	blt	deliver_guest_interrupt
 
 guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
+#ifdef CONFIG_KVM_XICS
+	/* We are exiting, pull the VP from the XIVE */
+	lwz	r0, VCPU_XIVE_PUSHED(r9)
+	cmpwi	cr0, r0, 0
+	beq	1f
+	li	r7, TM_SPC_PULL_OS_CTX
+	li	r6, TM_QW1_OS
+	mfmsr	r0
+	andi.	r0, r0, MSR_IR		/* in real mode? */
+	beq	2f
+	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
+	cmpldi	cr0, r10, 0
+	beq	1f
+	/* First load to pull the context, we ignore the value */
+	lwzx	r11, r7, r10
+	eieio
+	/* Second load to recover the context state (Words 0 and 1) */
+	ldx	r11, r6, r10
+	b	3f
+2:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
+	cmpldi	cr0, r10, 0
+	beq	1f
+	/* First load to pull the context, we ignore the value */
+	lwzcix	r11, r7, r10
+	eieio
+	/* Second load to recover the context state (Words 0 and 1) */
+	ldcix	r11, r6, r10
+3:	std	r11, VCPU_XIVE_SAVED_STATE(r9)
+	/* Fixup some of the state for the next load */
+	li	r10, 0
+	li	r0, 0xff
+	stw	r10, VCPU_XIVE_PUSHED(r9)
+	stb	r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
+	stb	r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
+1:
+#endif /* CONFIG_KVM_XICS */
 	/* Save more register state  */
 	mfdar	r6
 	mfdsisr	r7
@@ -2011,7 +2065,7 @@ hcall_real_table:
 	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
 	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
 	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
-	.long	0		/* 0x70 - H_IPOLL */
+	.long	DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
 	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
 #else
 	.long	0		/* 0x64 - H_EOI */
@@ -2181,7 +2235,11 @@ hcall_real_table:
 	.long	0		/* 0x2f0 */
 	.long	0		/* 0x2f4 */
 	.long	0		/* 0x2f8 */
-	.long	0		/* 0x2fc */
+#ifdef CONFIG_KVM_XICS
+	.long	DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
+#else
+	.long	0		/* 0x2fc - H_XIRR_X*/
+#endif
 	.long	DOTSYM(kvmppc_h_random) - hcall_real_table
 	.globl	hcall_real_table_end
 hcall_real_table_end:
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 20528701835b..2d3b2b1cc272 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -16,6 +16,7 @@
 #include <asm/kvm_ppc.h>
 #include <asm/hvcall.h>
 #include <asm/rtas.h>
+#include <asm/xive.h>
 
 #ifdef CONFIG_KVM_XICS
 static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -32,7 +33,10 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
 	server = be32_to_cpu(args->args[1]);
 	priority = be32_to_cpu(args->args[2]);
 
-	rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
+	if (xive_enabled())
+		rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority);
+	else
+		rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
 	if (rc)
 		rc = -3;
 out:
@@ -52,7 +56,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
 	irq = be32_to_cpu(args->args[0]);
 
 	server = priority = 0;
-	rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
+	if (xive_enabled())
+		rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority);
+	else
+		rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
 	if (rc) {
 		rc = -3;
 		goto out;
@@ -76,7 +83,10 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
 
 	irq = be32_to_cpu(args->args[0]);
 
-	rc = kvmppc_xics_int_off(vcpu->kvm, irq);
+	if (xive_enabled())
+		rc = kvmppc_xive_int_off(vcpu->kvm, irq);
+	else
+		rc = kvmppc_xics_int_off(vcpu->kvm, irq);
 	if (rc)
 		rc = -3;
 out:
@@ -95,7 +105,10 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
 
 	irq = be32_to_cpu(args->args[0]);
 
-	rc = kvmppc_xics_int_on(vcpu->kvm, irq);
+	if (xive_enabled())
+		rc = kvmppc_xive_int_on(vcpu->kvm, irq);
+	else
+		rc = kvmppc_xics_int_on(vcpu->kvm, irq);
 	if (rc)
 		rc = -3;
 out:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index ef4fd528c193..e6829c415bc8 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1307,8 +1307,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
 	return 0;
 }
 
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
-		bool line_status)
+int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+			bool line_status)
 {
 	struct kvmppc_xics *xics = kvm->arch.xics;
 
@@ -1317,14 +1317,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	return ics_deliver_irq(xics, irq, level);
 }
 
-int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
-			      struct kvm *kvm, int irq_source_id,
-			      int level, bool line_status)
-{
-	return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
-			   level, line_status);
-}
-
 static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	struct kvmppc_xics *xics = dev->private;
@@ -1458,29 +1450,6 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
 	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
 }
 
-static int xics_set_irq(struct kvm_kernel_irq_routing_entry *e,
-			struct kvm *kvm, int irq_source_id, int level,
-			bool line_status)
-{
-	return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
-}
-
-int kvm_irq_map_gsi(struct kvm *kvm,
-		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
-{
-	entries->gsi = gsi;
-	entries->type = KVM_IRQ_ROUTING_IRQCHIP;
-	entries->set = xics_set_irq;
-	entries->irqchip.irqchip = 0;
-	entries->irqchip.pin = gsi;
-	return 1;
-}
-
-int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	return pin;
-}
-
 void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
 			    unsigned long host_irq)
 {
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index ec5474cf70c6..453c9e518c19 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -10,6 +10,7 @@
 #ifndef _KVM_PPC_BOOK3S_XICS_H
 #define _KVM_PPC_BOOK3S_XICS_H
 
+#ifdef CONFIG_KVM_XICS
 /*
  * We use a two-level tree to store interrupt source information.
  * There are up to 1024 ICS nodes, each of which can represent
@@ -144,5 +145,11 @@ static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
 	return ics;
 }
 
+extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu);
+extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+			 unsigned long mfrr);
+extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
+extern int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
 
+#endif /* CONFIG_KVM_XICS */
 #endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
new file mode 100644
index 000000000000..7807ee17af4b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -0,0 +1,1893 @@
+/*
+ * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "xive-kvm: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/debug.h>
+#include <asm/time.h>
+#include <asm/opal.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xive.h"
+
+
+/*
+ * Virtual mode variants of the hcalls for use on radix/radix
+ * with AIL. They require the VCPU's VP to be "pushed"
+ *
+ * We still instanciate them here because we use some of the
+ * generated utility functions as well in this file.
+ */
+#define XIVE_RUNTIME_CHECKS
+#define X_PFX xive_vm_
+#define X_STATIC static
+#define X_STAT_PFX stat_vm_
+#define __x_tima		xive_tima
+#define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_mmio))
+#define __x_trig_page(xd)	((void __iomem *)((xd)->trig_mmio))
+#define __x_readb	__raw_readb
+#define __x_writeb	__raw_writeb
+#define __x_readw	__raw_readw
+#define __x_readq	__raw_readq
+#define __x_writeq	__raw_writeq
+
+#include "book3s_xive_template.c"
+
+/*
+ * We leave a gap of a couple of interrupts in the queue to
+ * account for the IPI and additional safety guard.
+ */
+#define XIVE_Q_GAP	2
+
+/*
+ * This is a simple trigger for a generic XIVE IRQ. This must
+ * only be called for interrupts that support a trigger page
+ */
+static bool xive_irq_trigger(struct xive_irq_data *xd)
+{
+	/* This should be only for MSIs */
+	if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
+		return false;
+
+	/* Those interrupts should always have a trigger page */
+	if (WARN_ON(!xd->trig_mmio))
+		return false;
+
+	out_be64(xd->trig_mmio, 0);
+
+	return true;
+}
+
+static irqreturn_t xive_esc_irq(int irq, void *data)
+{
+	struct kvm_vcpu *vcpu = data;
+
+	/* We use the existing H_PROD mechanism to wake up the target */
+	vcpu->arch.prodded = 1;
+	smp_mb();
+	if (vcpu->arch.ceded)
+		kvmppc_fast_vcpu_kick(vcpu);
+
+	return IRQ_HANDLED;
+}
+
+static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_q *q = &xc->queues[prio];
+	char *name = NULL;
+	int rc;
+
+	/* Already there ? */
+	if (xc->esc_virq[prio])
+		return 0;
+
+	/* Hook up the escalation interrupt */
+	xc->esc_virq[prio] = irq_create_mapping(NULL, q->esc_irq);
+	if (!xc->esc_virq[prio]) {
+		pr_err("Failed to map escalation interrupt for queue %d of VCPU %d\n",
+		       prio, xc->server_num);
+		return -EIO;
+	}
+
+	/*
+	 * Future improvement: start with them disabled
+	 * and handle DD2 and later scheme of merged escalation
+	 * interrupts
+	 */
+	name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+			 vcpu->kvm->arch.lpid, xc->server_num, prio);
+	if (!name) {
+		pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
+		       prio, xc->server_num);
+		rc = -ENOMEM;
+		goto error;
+	}
+	rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
+			 IRQF_NO_THREAD, name, vcpu);
+	if (rc) {
+		pr_err("Failed to request escalation interrupt for queue %d of VCPU %d\n",
+		       prio, xc->server_num);
+		goto error;
+	}
+	xc->esc_virq_names[prio] = name;
+	return 0;
+error:
+	irq_dispose_mapping(xc->esc_virq[prio]);
+	xc->esc_virq[prio] = 0;
+	kfree(name);
+	return rc;
+}
+
+static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = xc->xive;
+	struct xive_q *q =  &xc->queues[prio];
+	void *qpage;
+	int rc;
+
+	if (WARN_ON(q->qpage))
+		return 0;
+
+	/* Allocate the queue and retrieve infos on current node for now */
+	qpage = (__be32 *)__get_free_pages(GFP_KERNEL, xive->q_page_order);
+	if (!qpage) {
+		pr_err("Failed to allocate queue %d for VCPU %d\n",
+		       prio, xc->server_num);
+		return -ENOMEM;;
+	}
+	memset(qpage, 0, 1 << xive->q_order);
+
+	/*
+	 * Reconfigure the queue. This will set q->qpage only once the
+	 * queue is fully configured. This is a requirement for prio 0
+	 * as we will stop doing EOIs for every IPI as soon as we observe
+	 * qpage being non-NULL, and instead will only EOI when we receive
+	 * corresponding queue 0 entries
+	 */
+	rc = xive_native_configure_queue(xc->vp_id, q, prio, qpage,
+					 xive->q_order, true);
+	if (rc)
+		pr_err("Failed to configure queue %d for VCPU %d\n",
+		       prio, xc->server_num);
+	return rc;
+}
+
+/* Called with kvm_lock held */
+static int xive_check_provisioning(struct kvm *kvm, u8 prio)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvm_vcpu *vcpu;
+	int i, rc;
+
+	lockdep_assert_held(&kvm->lock);
+
+	/* Already provisioned ? */
+	if (xive->qmap & (1 << prio))
+		return 0;
+
+	pr_devel("Provisioning prio... %d\n", prio);
+
+	/* Provision each VCPU and enable escalations */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!vcpu->arch.xive_vcpu)
+			continue;
+		rc = xive_provision_queue(vcpu, prio);
+		if (rc == 0)
+			xive_attach_escalation(vcpu, prio);
+		if (rc)
+			return rc;
+	}
+
+	/* Order previous stores and mark it as provisioned */
+	mb();
+	xive->qmap |= (1 << prio);
+	return 0;
+}
+
+static void xive_inc_q_pending(struct kvm *kvm, u32 server, u8 prio)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvmppc_xive_vcpu *xc;
+	struct xive_q *q;
+
+	/* Locate target server */
+	vcpu = kvmppc_xive_find_server(kvm, server);
+	if (!vcpu) {
+		pr_warn("%s: Can't find server %d\n", __func__, server);
+		return;
+	}
+	xc = vcpu->arch.xive_vcpu;
+	if (WARN_ON(!xc))
+		return;
+
+	q = &xc->queues[prio];
+	atomic_inc(&q->pending_count);
+}
+
+static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_q *q;
+	u32 max;
+
+	if (WARN_ON(!xc))
+		return -ENXIO;
+	if (!xc->valid)
+		return -ENXIO;
+
+	q = &xc->queues[prio];
+	if (WARN_ON(!q->qpage))
+		return -ENXIO;
+
+	/* Calculate max number of interrupts in that queue. */
+	max = (q->msk + 1) - XIVE_Q_GAP;
+	return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
+}
+
+static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
+{
+	struct kvm_vcpu *vcpu;
+	int i, rc;
+
+	/* Locate target server */
+	vcpu = kvmppc_xive_find_server(kvm, *server);
+	if (!vcpu) {
+		pr_devel("Can't find server %d\n", *server);
+		return -EINVAL;
+	}
+
+	pr_devel("Finding irq target on 0x%x/%d...\n", *server, prio);
+
+	/* Try pick it */
+	rc = xive_try_pick_queue(vcpu, prio);
+	if (rc == 0)
+		return rc;
+
+	pr_devel(" .. failed, looking up candidate...\n");
+
+	/* Failed, pick another VCPU */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!vcpu->arch.xive_vcpu)
+			continue;
+		rc = xive_try_pick_queue(vcpu, prio);
+		if (rc == 0) {
+			*server = vcpu->arch.xive_vcpu->server_num;
+			pr_devel("  found on 0x%x/%d\n", *server, prio);
+			return rc;
+		}
+	}
+	pr_devel("  no available target !\n");
+
+	/* No available target ! */
+	return -EBUSY;
+}
+
+static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
+			     struct kvmppc_xive_src_block *sb,
+			     struct kvmppc_xive_irq_state *state)
+{
+	struct xive_irq_data *xd;
+	u32 hw_num;
+	u8 old_prio;
+	u64 val;
+
+	/*
+	 * Take the lock, set masked, try again if racing
+	 * with H_EOI
+	 */
+	for (;;) {
+		arch_spin_lock(&sb->lock);
+		old_prio = state->guest_priority;
+		state->guest_priority = MASKED;
+		mb();
+		if (!state->in_eoi)
+			break;
+		state->guest_priority = old_prio;
+		arch_spin_unlock(&sb->lock);
+	}
+
+	/* No change ? Bail */
+	if (old_prio == MASKED)
+		return old_prio;
+
+	/* Get the right irq */
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	/*
+	 * If the interrupt is marked as needing masking via
+	 * firmware, we do it here. Firmware masking however
+	 * is "lossy", it won't return the old p and q bits
+	 * and won't set the interrupt to a state where it will
+	 * record queued ones. If this is an issue we should do
+	 * lazy masking instead.
+	 *
+	 * For now, we work around this in unmask by forcing
+	 * an interrupt whenever we unmask a non-LSI via FW
+	 * (if ever).
+	 */
+	if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
+		xive_native_configure_irq(hw_num,
+					  xive->vp_base + state->act_server,
+					  MASKED, state->number);
+		/* set old_p so we can track if an H_EOI was done */
+		state->old_p = true;
+		state->old_q = false;
+	} else {
+		/* Set PQ to 10, return old P and old Q and remember them */
+		val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10);
+		state->old_p = !!(val & 2);
+		state->old_q = !!(val & 1);
+
+		/*
+		 * Synchronize hardware to sensure the queues are updated
+		 * when masking
+		 */
+		xive_native_sync_source(hw_num);
+	}
+
+	return old_prio;
+}
+
+static void xive_lock_for_unmask(struct kvmppc_xive_src_block *sb,
+				 struct kvmppc_xive_irq_state *state)
+{
+	/*
+	 * Take the lock try again if racing with H_EOI
+	 */
+	for (;;) {
+		arch_spin_lock(&sb->lock);
+		if (!state->in_eoi)
+			break;
+		arch_spin_unlock(&sb->lock);
+	}
+}
+
+static void xive_finish_unmask(struct kvmppc_xive *xive,
+			       struct kvmppc_xive_src_block *sb,
+			       struct kvmppc_xive_irq_state *state,
+			       u8 prio)
+{
+	struct xive_irq_data *xd;
+	u32 hw_num;
+
+	/* If we aren't changing a thing, move on */
+	if (state->guest_priority != MASKED)
+		goto bail;
+
+	/* Get the right irq */
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	/*
+	 * See command in xive_lock_and_mask() concerning masking
+	 * via firmware.
+	 */
+	if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
+		xive_native_configure_irq(hw_num,
+					  xive->vp_base + state->act_server,
+					  state->act_priority, state->number);
+		/* If an EOI is needed, do it here */
+		if (!state->old_p)
+			xive_vm_source_eoi(hw_num, xd);
+		/* If this is not an LSI, force a trigger */
+		if (!(xd->flags & OPAL_XIVE_IRQ_LSI))
+			xive_irq_trigger(xd);
+		goto bail;
+	}
+
+	/* Old Q set, set PQ to 11 */
+	if (state->old_q)
+		xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
+
+	/*
+	 * If not old P, then perform an "effective" EOI,
+	 * on the source. This will handle the cases where
+	 * FW EOI is needed.
+	 */
+	if (!state->old_p)
+		xive_vm_source_eoi(hw_num, xd);
+
+	/* Synchronize ordering and mark unmasked */
+	mb();
+bail:
+	state->guest_priority = prio;
+}
+
+/*
+ * Target an interrupt to a given server/prio, this will fallback
+ * to another server if necessary and perform the HW targetting
+ * updates as needed
+ *
+ * NOTE: Must be called with the state lock held
+ */
+static int xive_target_interrupt(struct kvm *kvm,
+				 struct kvmppc_xive_irq_state *state,
+				 u32 server, u8 prio)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	u32 hw_num;
+	int rc;
+
+	/*
+	 * This will return a tentative server and actual
+	 * priority. The count for that new target will have
+	 * already been incremented.
+	 */
+	rc = xive_select_target(kvm, &server, prio);
+
+	/*
+	 * We failed to find a target ? Not much we can do
+	 * at least until we support the GIQ.
+	 */
+	if (rc)
+		return rc;
+
+	/*
+	 * Increment the old queue pending count if there
+	 * was one so that the old queue count gets adjusted later
+	 * when observed to be empty.
+	 */
+	if (state->act_priority != MASKED)
+		xive_inc_q_pending(kvm,
+				   state->act_server,
+				   state->act_priority);
+	/*
+	 * Update state and HW
+	 */
+	state->act_priority = prio;
+	state->act_server = server;
+
+	/* Get the right irq */
+	kvmppc_xive_select_irq(state, &hw_num, NULL);
+
+	return xive_native_configure_irq(hw_num,
+					 xive->vp_base + server,
+					 prio, state->number);
+}
+
+/*
+ * Targetting rules: In order to avoid losing track of
+ * pending interrupts accross mask and unmask, which would
+ * allow queue overflows, we implement the following rules:
+ *
+ *  - Unless it was never enabled (or we run out of capacity)
+ *    an interrupt is always targetted at a valid server/queue
+ *    pair even when "masked" by the guest. This pair tends to
+ *    be the last one used but it can be changed under some
+ *    circumstances. That allows us to separate targetting
+ *    from masking, we only handle accounting during (re)targetting,
+ *    this also allows us to let an interrupt drain into its target
+ *    queue after masking, avoiding complex schemes to remove
+ *    interrupts out of remote processor queues.
+ *
+ *  - When masking, we set PQ to 10 and save the previous value
+ *    of P and Q.
+ *
+ *  - When unmasking, if saved Q was set, we set PQ to 11
+ *    otherwise we leave PQ to the HW state which will be either
+ *    10 if nothing happened or 11 if the interrupt fired while
+ *    masked. Effectively we are OR'ing the previous Q into the
+ *    HW Q.
+ *
+ *    Then if saved P is clear, we do an effective EOI (Q->P->Trigger)
+ *    which will unmask the interrupt and shoot a new one if Q was
+ *    set.
+ *
+ *    Otherwise (saved P is set) we leave PQ unchanged (so 10 or 11,
+ *    effectively meaning an H_EOI from the guest is still expected
+ *    for that interrupt).
+ *
+ *  - If H_EOI occurs while masked, we clear the saved P.
+ *
+ *  - When changing target, we account on the new target and
+ *    increment a separate "pending" counter on the old one.
+ *    This pending counter will be used to decrement the old
+ *    target's count when its queue has been observed empty.
+ */
+
+int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+			 u32 priority)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u8 new_act_prio;
+	int rc = 0;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	pr_devel("set_xive ! irq 0x%x server 0x%x prio %d\n",
+		 irq, server, priority);
+
+	/* First, check provisioning of queues */
+	if (priority != MASKED)
+		rc = xive_check_provisioning(xive->kvm,
+			      xive_prio_from_guest(priority));
+	if (rc) {
+		pr_devel("  provisioning failure %d !\n", rc);
+		return rc;
+	}
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	/*
+	 * We first handle masking/unmasking since the locking
+	 * might need to be retried due to EOIs, we'll handle
+	 * targetting changes later. These functions will return
+	 * with the SB lock held.
+	 *
+	 * xive_lock_and_mask() will also set state->guest_priority
+	 * but won't otherwise change other fields of the state.
+	 *
+	 * xive_lock_for_unmask will not actually unmask, this will
+	 * be done later by xive_finish_unmask() once the targetting
+	 * has been done, so we don't try to unmask an interrupt
+	 * that hasn't yet been targetted.
+	 */
+	if (priority == MASKED)
+		xive_lock_and_mask(xive, sb, state);
+	else
+		xive_lock_for_unmask(sb, state);
+
+
+	/*
+	 * Then we handle targetting.
+	 *
+	 * First calculate a new "actual priority"
+	 */
+	new_act_prio = state->act_priority;
+	if (priority != MASKED)
+		new_act_prio = xive_prio_from_guest(priority);
+
+	pr_devel(" new_act_prio=%x act_server=%x act_prio=%x\n",
+		 new_act_prio, state->act_server, state->act_priority);
+
+	/*
+	 * Then check if we actually need to change anything,
+	 *
+	 * The condition for re-targetting the interrupt is that
+	 * we have a valid new priority (new_act_prio is not 0xff)
+	 * and either the server or the priority changed.
+	 *
+	 * Note: If act_priority was ff and the new priority is
+	 *       also ff, we don't do anything and leave the interrupt
+	 *       untargetted. An attempt of doing an int_on on an
+	 *       untargetted interrupt will fail. If that is a problem
+	 *       we could initialize interrupts with valid default
+	 */
+
+	if (new_act_prio != MASKED &&
+	    (state->act_server != server ||
+	     state->act_priority != new_act_prio))
+		rc = xive_target_interrupt(kvm, state, server, new_act_prio);
+
+	/*
+	 * Perform the final unmasking of the interrupt source
+	 * if necessary
+	 */
+	if (priority != MASKED)
+		xive_finish_unmask(xive, sb, state, priority);
+
+	/*
+	 * Finally Update saved_priority to match. Only int_on/off
+	 * set this field to a different value.
+	 */
+	state->saved_priority = priority;
+
+	arch_spin_unlock(&sb->lock);
+	return rc;
+}
+
+int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+			 u32 *priority)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+	arch_spin_lock(&sb->lock);
+	*server = state->guest_server;
+	*priority = state->guest_priority;
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+
+int kvmppc_xive_int_on(struct kvm *kvm, u32 irq)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	pr_devel("int_on(irq=0x%x)\n", irq);
+
+	/*
+	 * Check if interrupt was not targetted
+	 */
+	if (state->act_priority == MASKED) {
+		pr_devel("int_on on untargetted interrupt\n");
+		return -EINVAL;
+	}
+
+	/* If saved_priority is 0xff, do nothing */
+	if (state->saved_priority == MASKED)
+		return 0;
+
+	/*
+	 * Lock and unmask it.
+	 */
+	xive_lock_for_unmask(sb, state);
+	xive_finish_unmask(xive, sb, state, state->saved_priority);
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+
+int kvmppc_xive_int_off(struct kvm *kvm, u32 irq)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	pr_devel("int_off(irq=0x%x)\n", irq);
+
+	/*
+	 * Lock and mask
+	 */
+	state->saved_priority = xive_lock_and_mask(xive, sb, state);
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+
+static bool xive_restore_pending_irq(struct kvmppc_xive *xive, u32 irq)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return false;
+	state = &sb->irq_state[idx];
+	if (!state->valid)
+		return false;
+
+	/*
+	 * Trigger the IPI. This assumes we never restore a pass-through
+	 * interrupt which should be safe enough
+	 */
+	xive_irq_trigger(&state->ipi_data);
+
+	return true;
+}
+
+u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+	if (!xc)
+		return 0;
+
+	/* Return the per-cpu state for state saving/migration */
+	return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
+	       (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+}
+
+int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	u8 cppr, mfrr;
+	u32 xisr;
+
+	if (!xc || !xive)
+		return -ENOENT;
+
+	/* Grab individual state fields. We don't use pending_pri */
+	cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
+	xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
+		KVM_REG_PPC_ICP_XISR_MASK;
+	mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
+
+	pr_devel("set_icp vcpu %d cppr=0x%x mfrr=0x%x xisr=0x%x\n",
+		 xc->server_num, cppr, mfrr, xisr);
+
+	/*
+	 * We can't update the state of a "pushed" VCPU, but that
+	 * shouldn't happen.
+	 */
+	if (WARN_ON(vcpu->arch.xive_pushed))
+		return -EIO;
+
+	/* Update VCPU HW saved state */
+	vcpu->arch.xive_saved_state.cppr = cppr;
+	xc->hw_cppr = xc->cppr = cppr;
+
+	/*
+	 * Update MFRR state. If it's not 0xff, we mark the VCPU as
+	 * having a pending MFRR change, which will re-evaluate the
+	 * target. The VCPU will thus potentially get a spurious
+	 * interrupt but that's not a big deal.
+	 */
+	xc->mfrr = mfrr;
+	if (mfrr < cppr)
+		xive_irq_trigger(&xc->vp_ipi_data);
+
+	/*
+	 * Now saved XIRR is "interesting". It means there's something in
+	 * the legacy "1 element" queue... for an IPI we simply ignore it,
+	 * as the MFRR restore will handle that. For anything else we need
+	 * to force a resend of the source.
+	 * However the source may not have been setup yet. If that's the
+	 * case, we keep that info and increment a counter in the xive to
+	 * tell subsequent xive_set_source() to go look.
+	 */
+	if (xisr > XICS_IPI && !xive_restore_pending_irq(xive, xisr)) {
+		xc->delayed_irq = xisr;
+		xive->delayed_irqs++;
+		pr_devel("  xisr restore delayed\n");
+	}
+
+	return 0;
+}
+
+int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+			   struct irq_desc *host_desc)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	struct irq_data *host_data = irq_desc_get_irq_data(host_desc);
+	unsigned int host_irq = irq_desc_get_irq(host_desc);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
+	u16 idx;
+	u8 prio;
+	int rc;
+
+	if (!xive)
+		return -ENODEV;
+
+	pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq);
+
+	sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	/*
+	 * Mark the passed-through interrupt as going to a VCPU,
+	 * this will prevent further EOIs and similar operations
+	 * from the XIVE code. It will also mask the interrupt
+	 * to either PQ=10 or 11 state, the latter if the interrupt
+	 * is pending. This will allow us to unmask or retrigger it
+	 * after routing it to the guest with a simple EOI.
+	 *
+	 * The "state" argument is a "token", all it needs is to be
+	 * non-NULL to switch to passed-through or NULL for the
+	 * other way around. We may not yet have an actual VCPU
+	 * target here and we don't really care.
+	 */
+	rc = irq_set_vcpu_affinity(host_irq, state);
+	if (rc) {
+		pr_err("Failed to set VCPU affinity for irq %d\n", host_irq);
+		return rc;
+	}
+
+	/*
+	 * Mask and read state of IPI. We need to know if its P bit
+	 * is set as that means it's potentially already using a
+	 * queue entry in the target
+	 */
+	prio = xive_lock_and_mask(xive, sb, state);
+	pr_devel(" old IPI prio %02x P:%d Q:%d\n", prio,
+		 state->old_p, state->old_q);
+
+	/* Turn the IPI hard off */
+	xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+
+	/* Grab info about irq */
+	state->pt_number = hw_irq;
+	state->pt_data = irq_data_get_irq_handler_data(host_data);
+
+	/*
+	 * Configure the IRQ to match the existing configuration of
+	 * the IPI if it was already targetted. Otherwise this will
+	 * mask the interrupt in a lossy way (act_priority is 0xff)
+	 * which is fine for a never started interrupt.
+	 */
+	xive_native_configure_irq(hw_irq,
+				  xive->vp_base + state->act_server,
+				  state->act_priority, state->number);
+
+	/*
+	 * We do an EOI to enable the interrupt (and retrigger if needed)
+	 * if the guest has the interrupt unmasked and the P bit was *not*
+	 * set in the IPI. If it was set, we know a slot may still be in
+	 * use in the target queue thus we have to wait for a guest
+	 * originated EOI
+	 */
+	if (prio != MASKED && !state->old_p)
+		xive_vm_source_eoi(hw_irq, state->pt_data);
+
+	/* Clear old_p/old_q as they are no longer relevant */
+	state->old_p = state->old_q = false;
+
+	/* Restore guest prio (unlocks EOI) */
+	mb();
+	state->guest_priority = prio;
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
+
+int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+			   struct irq_desc *host_desc)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	unsigned int host_irq = irq_desc_get_irq(host_desc);
+	u16 idx;
+	u8 prio;
+	int rc;
+
+	if (!xive)
+		return -ENODEV;
+
+	pr_devel("clr_mapped girq 0x%lx...\n", guest_irq);
+
+	sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	/*
+	 * Mask and read state of IRQ. We need to know if its P bit
+	 * is set as that means it's potentially already using a
+	 * queue entry in the target
+	 */
+	prio = xive_lock_and_mask(xive, sb, state);
+	pr_devel(" old IRQ prio %02x P:%d Q:%d\n", prio,
+		 state->old_p, state->old_q);
+
+	/*
+	 * If old_p is set, the interrupt is pending, we switch it to
+	 * PQ=11. This will force a resend in the host so the interrupt
+	 * isn't lost to whatver host driver may pick it up
+	 */
+	if (state->old_p)
+		xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
+
+	/* Release the passed-through interrupt to the host */
+	rc = irq_set_vcpu_affinity(host_irq, NULL);
+	if (rc) {
+		pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq);
+		return rc;
+	}
+
+	/* Forget about the IRQ */
+	state->pt_number = 0;
+	state->pt_data = NULL;
+
+	/* Reconfigure the IPI */
+	xive_native_configure_irq(state->ipi_number,
+				  xive->vp_base + state->act_server,
+				  state->act_priority, state->number);
+
+	/*
+	 * If old_p is set (we have a queue entry potentially
+	 * occupied) or the interrupt is masked, we set the IPI
+	 * to PQ=10 state. Otherwise we just re-enable it (PQ=00).
+	 */
+	if (prio == MASKED || state->old_p)
+		xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_10);
+	else
+		xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_00);
+
+	/* Restore guest prio (unlocks EOI) */
+	mb();
+	state->guest_priority = prio;
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
+
+static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	int i, j;
+
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+		if (!sb)
+			continue;
+		for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
+			struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
+
+			if (!state->valid)
+				continue;
+			if (state->act_priority == MASKED)
+				continue;
+			if (state->act_server != xc->server_num)
+				continue;
+
+			/* Clean it up */
+			arch_spin_lock(&sb->lock);
+			state->act_priority = MASKED;
+			xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+			xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+			if (state->pt_number) {
+				xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
+				xive_native_configure_irq(state->pt_number, 0, MASKED, 0);
+			}
+			arch_spin_unlock(&sb->lock);
+		}
+	}
+}
+
+void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = xc->xive;
+	int i;
+
+	pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+	/* Ensure no interrupt is still routed to that VP */
+	xc->valid = false;
+	kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+	/* Mask the VP IPI */
+	xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
+
+	/* Disable the VP */
+	xive_native_disable_vp(xc->vp_id);
+
+	/* Free the queues & associated interrupts */
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		struct xive_q *q = &xc->queues[i];
+
+		/* Free the escalation irq */
+		if (xc->esc_virq[i]) {
+			free_irq(xc->esc_virq[i], vcpu);
+			irq_dispose_mapping(xc->esc_virq[i]);
+			kfree(xc->esc_virq_names[i]);
+		}
+		/* Free the queue */
+		xive_native_disable_queue(xc->vp_id, q, i);
+		if (q->qpage) {
+			free_pages((unsigned long)q->qpage,
+				   xive->q_page_order);
+			q->qpage = NULL;
+		}
+	}
+
+	/* Free the IPI */
+	if (xc->vp_ipi) {
+		xive_cleanup_irq_data(&xc->vp_ipi_data);
+		xive_native_free_irq(xc->vp_ipi);
+	}
+	/* Free the VP */
+	kfree(xc);
+}
+
+int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+			     struct kvm_vcpu *vcpu, u32 cpu)
+{
+	struct kvmppc_xive *xive = dev->private;
+	struct kvmppc_xive_vcpu *xc;
+	int i, r = -EBUSY;
+
+	pr_devel("connect_vcpu(cpu=%d)\n", cpu);
+
+	if (dev->ops != &kvm_xive_ops) {
+		pr_devel("Wrong ops !\n");
+		return -EPERM;
+	}
+	if (xive->kvm != vcpu->kvm)
+		return -EPERM;
+	if (vcpu->arch.irq_type)
+		return -EBUSY;
+	if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
+		pr_devel("Duplicate !\n");
+		return -EEXIST;
+	}
+	if (cpu >= KVM_MAX_VCPUS) {
+		pr_devel("Out of bounds !\n");
+		return -EINVAL;
+	}
+	xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+	if (!xc)
+		return -ENOMEM;
+
+	/* We need to synchronize with queue provisioning */
+	mutex_lock(&vcpu->kvm->lock);
+	vcpu->arch.xive_vcpu = xc;
+	xc->xive = xive;
+	xc->vcpu = vcpu;
+	xc->server_num = cpu;
+	xc->vp_id = xive->vp_base + cpu;
+	xc->mfrr = 0xff;
+	xc->valid = true;
+
+	r = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+	if (r)
+		goto bail;
+
+	/* Configure VCPU fields for use by assembly push/pull */
+	vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+	vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+	/* Allocate IPI */
+	xc->vp_ipi = xive_native_alloc_irq();
+	if (!xc->vp_ipi) {
+		r = -EIO;
+		goto bail;
+	}
+	pr_devel(" IPI=0x%x\n", xc->vp_ipi);
+
+	r = xive_native_populate_irq_data(xc->vp_ipi, &xc->vp_ipi_data);
+	if (r)
+		goto bail;
+
+	/*
+	 * Initialize queues. Initially we set them all for no queueing
+	 * and we enable escalation for queue 0 only which we'll use for
+	 * our mfrr change notifications. If the VCPU is hot-plugged, we
+	 * do handle provisioning however.
+	 */
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		struct xive_q *q = &xc->queues[i];
+
+		/* Is queue already enabled ? Provision it */
+		if (xive->qmap & (1 << i)) {
+			r = xive_provision_queue(vcpu, i);
+			if (r == 0)
+				xive_attach_escalation(vcpu, i);
+			if (r)
+				goto bail;
+		} else {
+			r = xive_native_configure_queue(xc->vp_id,
+							q, i, NULL, 0, true);
+			if (r) {
+				pr_err("Failed to configure queue %d for VCPU %d\n",
+				       i, cpu);
+				goto bail;
+			}
+		}
+	}
+
+	/* If not done above, attach priority 0 escalation */
+	r = xive_attach_escalation(vcpu, 0);
+	if (r)
+		goto bail;
+
+	/* Enable the VP */
+	r = xive_native_enable_vp(xc->vp_id);
+	if (r)
+		goto bail;
+
+	/* Route the IPI */
+	r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
+	if (!r)
+		xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00);
+
+bail:
+	mutex_unlock(&vcpu->kvm->lock);
+	if (r) {
+		kvmppc_xive_cleanup_vcpu(vcpu);
+		return r;
+	}
+
+	vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
+	return 0;
+}
+
+/*
+ * Scanning of queues before/after migration save
+ */
+static void xive_pre_save_set_queued(struct kvmppc_xive *xive, u32 irq)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return;
+
+	state = &sb->irq_state[idx];
+
+	/* Some sanity checking */
+	if (!state->valid) {
+		pr_err("invalid irq 0x%x in cpu queue!\n", irq);
+		return;
+	}
+
+	/*
+	 * If the interrupt is in a queue it should have P set.
+	 * We warn so that gets reported. A backtrace isn't useful
+	 * so no need to use a WARN_ON.
+	 */
+	if (!state->saved_p)
+		pr_err("Interrupt 0x%x is marked in a queue but P not set !\n", irq);
+
+	/* Set flag */
+	state->in_queue = true;
+}
+
+static void xive_pre_save_mask_irq(struct kvmppc_xive *xive,
+				   struct kvmppc_xive_src_block *sb,
+				   u32 irq)
+{
+	struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
+
+	if (!state->valid)
+		return;
+
+	/* Mask and save state, this will also sync HW queues */
+	state->saved_scan_prio = xive_lock_and_mask(xive, sb, state);
+
+	/* Transfer P and Q */
+	state->saved_p = state->old_p;
+	state->saved_q = state->old_q;
+
+	/* Unlock */
+	arch_spin_unlock(&sb->lock);
+}
+
+static void xive_pre_save_unmask_irq(struct kvmppc_xive *xive,
+				     struct kvmppc_xive_src_block *sb,
+				     u32 irq)
+{
+	struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
+
+	if (!state->valid)
+		return;
+
+	/*
+	 * Lock / exclude EOI (not technically necessary if the
+	 * guest isn't running concurrently. If this becomes a
+	 * performance issue we can probably remove the lock.
+	 */
+	xive_lock_for_unmask(sb, state);
+
+	/* Restore mask/prio if it wasn't masked */
+	if (state->saved_scan_prio != MASKED)
+		xive_finish_unmask(xive, sb, state, state->saved_scan_prio);
+
+	/* Unlock */
+	arch_spin_unlock(&sb->lock);
+}
+
+static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
+{
+	u32 idx = q->idx;
+	u32 toggle = q->toggle;
+	u32 irq;
+
+	do {
+		irq = __xive_read_eq(q->qpage, q->msk, &idx, &toggle);
+		if (irq > XICS_IPI)
+			xive_pre_save_set_queued(xive, irq);
+	} while(irq);
+}
+
+static void xive_pre_save_scan(struct kvmppc_xive *xive)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	int i, j;
+
+	/*
+	 * See comment in xive_get_source() about how this
+	 * work. Collect a stable state for all interrupts
+	 */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+		if (!sb)
+			continue;
+		for (j = 0;  j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+			xive_pre_save_mask_irq(xive, sb, j);
+	}
+
+	/* Then scan the queues and update the "in_queue" flag */
+	kvm_for_each_vcpu(i, vcpu, xive->kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+		if (!xc)
+			continue;
+		for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
+			if (xc->queues[i].qpage)
+				xive_pre_save_queue(xive, &xc->queues[i]);
+		}
+	}
+
+	/* Finally restore interrupt states */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+		if (!sb)
+			continue;
+		for (j = 0;  j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+			xive_pre_save_unmask_irq(xive, sb, j);
+	}
+}
+
+static void xive_post_save_scan(struct kvmppc_xive *xive)
+{
+	u32 i, j;
+
+	/* Clear all the in_queue flags */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+		if (!sb)
+			continue;
+		for (j = 0;  j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+			sb->irq_state[j].in_queue = false;
+	}
+
+	/* Next get_source() will do a new scan */
+	xive->saved_src_count = 0;
+}
+
+/*
+ * This returns the source configuration and state to user space.
+ */
+static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u64 val, prio;
+	u16 idx;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -ENOENT;
+
+	state = &sb->irq_state[idx];
+
+	if (!state->valid)
+		return -ENOENT;
+
+	pr_devel("get_source(%ld)...\n", irq);
+
+	/*
+	 * So to properly save the state into something that looks like a
+	 * XICS migration stream we cannot treat interrupts individually.
+	 *
+	 * We need, instead, mask them all (& save their previous PQ state)
+	 * to get a stable state in the HW, then sync them to ensure that
+	 * any interrupt that had already fired hits its queue, and finally
+	 * scan all the queues to collect which interrupts are still present
+	 * in the queues, so we can set the "pending" flag on them and
+	 * they can be resent on restore.
+	 *
+	 * So we do it all when the "first" interrupt gets saved, all the
+	 * state is collected at that point, the rest of xive_get_source()
+	 * will merely collect and convert that state to the expected
+	 * userspace bit mask.
+	 */
+	if (xive->saved_src_count == 0)
+		xive_pre_save_scan(xive);
+	xive->saved_src_count++;
+
+	/* Convert saved state into something compatible with xics */
+	val = state->guest_server;
+	prio = state->saved_scan_prio;
+
+	if (prio == MASKED) {
+		val |= KVM_XICS_MASKED;
+		prio = state->saved_priority;
+	}
+	val |= prio << KVM_XICS_PRIORITY_SHIFT;
+	if (state->lsi) {
+		val |= KVM_XICS_LEVEL_SENSITIVE;
+		if (state->saved_p)
+			val |= KVM_XICS_PENDING;
+	} else {
+		if (state->saved_p)
+			val |= KVM_XICS_PRESENTED;
+
+		if (state->saved_q)
+			val |= KVM_XICS_QUEUED;
+
+		/*
+		 * We mark it pending (which will attempt a re-delivery)
+		 * if we are in a queue *or* we were masked and had
+		 * Q set which is equivalent to the XICS "masked pending"
+		 * state
+		 */
+		if (state->in_queue || (prio == MASKED && state->saved_q))
+			val |= KVM_XICS_PENDING;
+	}
+
+	/*
+	 * If that was the last interrupt saved, reset the
+	 * in_queue flags
+	 */
+	if (xive->saved_src_count == xive->src_count)
+		xive_post_save_scan(xive);
+
+	/* Copy the result to userspace */
+	if (put_user(val, ubufp))
+		return -EFAULT;
+
+	return 0;
+}
+
+static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive,
+							   int irq)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvmppc_xive_src_block *sb;
+	int i, bid;
+
+	bid = irq >> KVMPPC_XICS_ICS_SHIFT;
+
+	mutex_lock(&kvm->lock);
+
+	/* block already exists - somebody else got here first */
+	if (xive->src_blocks[bid])
+		goto out;
+
+	/* Create the ICS */
+	sb = kzalloc(sizeof(*sb), GFP_KERNEL);
+	if (!sb)
+		goto out;
+
+	sb->id = bid;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
+		sb->irq_state[i].guest_priority = MASKED;
+		sb->irq_state[i].saved_priority = MASKED;
+		sb->irq_state[i].act_priority = MASKED;
+	}
+	smp_wmb();
+	xive->src_blocks[bid] = sb;
+
+	if (bid > xive->max_sbid)
+		xive->max_sbid = bid;
+
+out:
+	mutex_unlock(&kvm->lock);
+	return xive->src_blocks[bid];
+}
+
+static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu = NULL;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+		if (!xc)
+			continue;
+
+		if (xc->delayed_irq == irq) {
+			xc->delayed_irq = 0;
+			xive->delayed_irqs--;
+			return true;
+		}
+	}
+	return false;
+}
+
+static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 idx;
+	u64 val;
+	u8 act_prio, guest_prio;
+	u32 server;
+	int rc = 0;
+
+	if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
+		return -ENOENT;
+
+	pr_devel("set_source(irq=0x%lx)\n", irq);
+
+	/* Find the source */
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb) {
+		pr_devel("No source, creating source block...\n");
+		sb = xive_create_src_block(xive, irq);
+		if (!sb) {
+			pr_devel("Failed to create block...\n");
+			return -ENOMEM;
+		}
+	}
+	state = &sb->irq_state[idx];
+
+	/* Read user passed data */
+	if (get_user(val, ubufp)) {
+		pr_devel("fault getting user info !\n");
+		return -EFAULT;
+	}
+
+	server = val & KVM_XICS_DESTINATION_MASK;
+	guest_prio = val >> KVM_XICS_PRIORITY_SHIFT;
+
+	pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
+		 val, server, guest_prio);
+	/*
+	 * If the source doesn't already have an IPI, allocate
+	 * one and get the corresponding data
+	 */
+	if (!state->ipi_number) {
+		state->ipi_number = xive_native_alloc_irq();
+		if (state->ipi_number == 0) {
+			pr_devel("Failed to allocate IPI !\n");
+			return -ENOMEM;
+		}
+		xive_native_populate_irq_data(state->ipi_number, &state->ipi_data);
+		pr_devel(" src_ipi=0x%x\n", state->ipi_number);
+	}
+
+	/*
+	 * We use lock_and_mask() to set us in the right masked
+	 * state. We will override that state from the saved state
+	 * further down, but this will handle the cases of interrupts
+	 * that need FW masking. We set the initial guest_priority to
+	 * 0 before calling it to ensure it actually performs the masking.
+	 */
+	state->guest_priority = 0;
+	xive_lock_and_mask(xive, sb, state);
+
+	/*
+	 * Now, we select a target if we have one. If we don't we
+	 * leave the interrupt untargetted. It means that an interrupt
+	 * can become "untargetted" accross migration if it was masked
+	 * by set_xive() but there is little we can do about it.
+	 */
+
+	/* First convert prio and mark interrupt as untargetted */
+	act_prio = xive_prio_from_guest(guest_prio);
+	state->act_priority = MASKED;
+	state->guest_server = server;
+
+	/*
+	 * We need to drop the lock due to the mutex below. Hopefully
+	 * nothing is touching that interrupt yet since it hasn't been
+	 * advertized to a running guest yet
+	 */
+	arch_spin_unlock(&sb->lock);
+
+	/* If we have a priority target the interrupt */
+	if (act_prio != MASKED) {
+		/* First, check provisioning of queues */
+		mutex_lock(&xive->kvm->lock);
+		rc = xive_check_provisioning(xive->kvm, act_prio);
+		mutex_unlock(&xive->kvm->lock);
+
+		/* Target interrupt */
+		if (rc == 0)
+			rc = xive_target_interrupt(xive->kvm, state,
+						   server, act_prio);
+		/*
+		 * If provisioning or targetting failed, leave it
+		 * alone and masked. It will remain disabled until
+		 * the guest re-targets it.
+		 */
+	}
+
+	/*
+	 * Find out if this was a delayed irq stashed in an ICP,
+	 * in which case, treat it as pending
+	 */
+	if (xive->delayed_irqs && xive_check_delayed_irq(xive, irq)) {
+		val |= KVM_XICS_PENDING;
+		pr_devel("  Found delayed ! forcing PENDING !\n");
+	}
+
+	/* Cleanup the SW state */
+	state->old_p = false;
+	state->old_q = false;
+	state->lsi = false;
+	state->asserted = false;
+
+	/* Restore LSI state */
+	if (val & KVM_XICS_LEVEL_SENSITIVE) {
+		state->lsi = true;
+		if (val & KVM_XICS_PENDING)
+			state->asserted = true;
+		pr_devel("  LSI ! Asserted=%d\n", state->asserted);
+	}
+
+	/*
+	 * Restore P and Q. If the interrupt was pending, we
+	 * force both P and Q, which will trigger a resend.
+	 *
+	 * That means that a guest that had both an interrupt
+	 * pending (queued) and Q set will restore with only
+	 * one instance of that interrupt instead of 2, but that
+	 * is perfectly fine as coalescing interrupts that haven't
+	 * been presented yet is always allowed.
+	 */
+	if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+		state->old_p = true;
+	if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
+		state->old_q = true;
+
+	pr_devel("  P=%d, Q=%d\n", state->old_p, state->old_q);
+
+	/*
+	 * If the interrupt was unmasked, update guest priority and
+	 * perform the appropriate state transition and do a
+	 * re-trigger if necessary.
+	 */
+	if (val & KVM_XICS_MASKED) {
+		pr_devel("  masked, saving prio\n");
+		state->guest_priority = MASKED;
+		state->saved_priority = guest_prio;
+	} else {
+		pr_devel("  unmasked, restoring to prio %d\n", guest_prio);
+		xive_finish_unmask(xive, sb, state, guest_prio);
+		state->saved_priority = guest_prio;
+	}
+
+	/* Increment the number of valid sources and mark this one valid */
+	if (!state->valid)
+		xive->src_count++;
+	state->valid = true;
+
+	return 0;
+}
+
+int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+			bool line_status)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+
+	/* Perform locklessly .... (we need to do some RCUisms here...) */
+	state = &sb->irq_state[idx];
+	if (!state->valid)
+		return -EINVAL;
+
+	/* We don't allow a trigger on a passed-through interrupt */
+	if (state->pt_number)
+		return -EINVAL;
+
+	if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
+		state->asserted = 1;
+	else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
+		state->asserted = 0;
+		return 0;
+	}
+
+	/* Trigger the IPI */
+	xive_irq_trigger(&state->ipi_data);
+
+	return 0;
+}
+
+static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	/* We honor the existing XICS ioctl */
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xive_set_source(xive, attr->attr, attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int xive_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	/* We honor the existing XICS ioctl */
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xive_get_source(xive, attr->attr, attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	/* We honor the same limits as XICS, at least for now */
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
+		    attr->attr < KVMPPC_XICS_NR_IRQS)
+			return 0;
+		break;
+	}
+	return -ENXIO;
+}
+
+static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd)
+{
+	xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+	xive_native_configure_irq(hw_num, 0, MASKED, 0);
+	xive_cleanup_irq_data(xd);
+}
+
+static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
+{
+	int i;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+
+		if (!state->valid)
+			continue;
+
+		kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data);
+		xive_native_free_irq(state->ipi_number);
+
+		/* Pass-through, cleanup too */
+		if (state->pt_number)
+			kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data);
+
+		state->valid = false;
+	}
+}
+
+static void kvmppc_xive_free(struct kvm_device *dev)
+{
+	struct kvmppc_xive *xive = dev->private;
+	struct kvm *kvm = xive->kvm;
+	int i;
+
+	debugfs_remove(xive->dentry);
+
+	if (kvm)
+		kvm->arch.xive = NULL;
+
+	/* Mask and free interrupts */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		if (xive->src_blocks[i])
+			kvmppc_xive_free_sources(xive->src_blocks[i]);
+		kfree(xive->src_blocks[i]);
+		xive->src_blocks[i] = NULL;
+	}
+
+	if (xive->vp_base != XIVE_INVALID_VP)
+		xive_native_free_vp_block(xive->vp_base);
+
+
+	kfree(xive);
+	kfree(dev);
+}
+
+static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
+{
+	struct kvmppc_xive *xive;
+	struct kvm *kvm = dev->kvm;
+	int ret = 0;
+
+	pr_devel("Creating xive for partition\n");
+
+	xive = kzalloc(sizeof(*xive), GFP_KERNEL);
+	if (!xive)
+		return -ENOMEM;
+
+	dev->private = xive;
+	xive->dev = dev;
+	xive->kvm = kvm;
+
+	/* Already there ? */
+	if (kvm->arch.xive)
+		ret = -EEXIST;
+	else
+		kvm->arch.xive = xive;
+
+	/* We use the default queue size set by the host */
+	xive->q_order = xive_native_default_eq_shift();
+	if (xive->q_order < PAGE_SHIFT)
+		xive->q_page_order = 0;
+	else
+		xive->q_page_order = xive->q_order - PAGE_SHIFT;
+
+	/* Allocate a bunch of VPs */
+	xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
+	pr_devel("VP_Base=%x\n", xive->vp_base);
+
+	if (xive->vp_base == XIVE_INVALID_VP)
+		ret = -ENOMEM;
+
+	if (ret) {
+		kfree(xive);
+		return ret;
+	}
+
+	return 0;
+}
+
+
+static int xive_debug_show(struct seq_file *m, void *private)
+{
+	struct kvmppc_xive *xive = m->private;
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	u64 t_rm_h_xirr = 0;
+	u64 t_rm_h_ipoll = 0;
+	u64 t_rm_h_cppr = 0;
+	u64 t_rm_h_eoi = 0;
+	u64 t_rm_h_ipi = 0;
+	u64 t_vm_h_xirr = 0;
+	u64 t_vm_h_ipoll = 0;
+	u64 t_vm_h_cppr = 0;
+	u64 t_vm_h_eoi = 0;
+	u64 t_vm_h_ipi = 0;
+	unsigned int i;
+
+	if (!kvm)
+		return 0;
+
+	seq_printf(m, "=========\nVCPU state\n=========\n");
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+		if (!xc)
+			continue;
+
+		seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
+			   " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
+			   xc->server_num, xc->cppr, xc->hw_cppr,
+			   xc->mfrr, xc->pending,
+			   xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+
+		t_rm_h_xirr += xc->stat_rm_h_xirr;
+		t_rm_h_ipoll += xc->stat_rm_h_ipoll;
+		t_rm_h_cppr += xc->stat_rm_h_cppr;
+		t_rm_h_eoi += xc->stat_rm_h_eoi;
+		t_rm_h_ipi += xc->stat_rm_h_ipi;
+		t_vm_h_xirr += xc->stat_vm_h_xirr;
+		t_vm_h_ipoll += xc->stat_vm_h_ipoll;
+		t_vm_h_cppr += xc->stat_vm_h_cppr;
+		t_vm_h_eoi += xc->stat_vm_h_eoi;
+		t_vm_h_ipi += xc->stat_vm_h_ipi;
+	}
+
+	seq_printf(m, "Hcalls totals\n");
+	seq_printf(m, " H_XIRR  R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr);
+	seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll);
+	seq_printf(m, " H_CPPR  R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr);
+	seq_printf(m, " H_EOI   R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi);
+	seq_printf(m, " H_IPI   R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi);
+
+	return 0;
+}
+
+static int xive_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xive_debug_show, inode->i_private);
+}
+
+static const struct file_operations xive_debug_fops = {
+	.open = xive_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void xive_debugfs_init(struct kvmppc_xive *xive)
+{
+	char *name;
+
+	name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
+	if (!name) {
+		pr_err("%s: no memory for name\n", __func__);
+		return;
+	}
+
+	xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+					   xive, &xive_debug_fops);
+
+	pr_debug("%s: created %s\n", __func__, name);
+	kfree(name);
+}
+
+static void kvmppc_xive_init(struct kvm_device *dev)
+{
+	struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
+
+	/* Register some debug interfaces */
+	xive_debugfs_init(xive);
+}
+
+struct kvm_device_ops kvm_xive_ops = {
+	.name = "kvm-xive",
+	.create = kvmppc_xive_create,
+	.init = kvmppc_xive_init,
+	.destroy = kvmppc_xive_free,
+	.set_attr = xive_set_attr,
+	.get_attr = xive_get_attr,
+	.has_attr = xive_has_attr,
+};
+
+void kvmppc_xive_init_module(void)
+{
+	__xive_vm_h_xirr = xive_vm_h_xirr;
+	__xive_vm_h_ipoll = xive_vm_h_ipoll;
+	__xive_vm_h_ipi = xive_vm_h_ipi;
+	__xive_vm_h_cppr = xive_vm_h_cppr;
+	__xive_vm_h_eoi = xive_vm_h_eoi;
+}
+
+void kvmppc_xive_exit_module(void)
+{
+	__xive_vm_h_xirr = NULL;
+	__xive_vm_h_ipoll = NULL;
+	__xive_vm_h_ipi = NULL;
+	__xive_vm_h_cppr = NULL;
+	__xive_vm_h_eoi = NULL;
+}
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
new file mode 100644
index 000000000000..5938f7644dc1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _KVM_PPC_BOOK3S_XIVE_H
+#define _KVM_PPC_BOOK3S_XIVE_H
+
+#ifdef CONFIG_KVM_XICS
+#include "book3s_xics.h"
+
+/*
+ * State for one guest irq source.
+ *
+ * For each guest source we allocate a HW interrupt in the XIVE
+ * which we use for all SW triggers. It will be unused for
+ * pass-through but it's easier to keep around as the same
+ * guest interrupt can alternatively be emulated or pass-through
+ * if a physical device is hot unplugged and replaced with an
+ * emulated one.
+ *
+ * This state structure is very similar to the XICS one with
+ * additional XIVE specific tracking.
+ */
+struct kvmppc_xive_irq_state {
+	bool valid;			/* Interrupt entry is valid */
+
+	u32 number;			/* Guest IRQ number */
+	u32 ipi_number;			/* XIVE IPI HW number */
+	struct xive_irq_data ipi_data;	/* XIVE IPI associated data */
+	u32 pt_number;			/* XIVE Pass-through number if any */
+	struct xive_irq_data *pt_data;	/* XIVE Pass-through associated data */
+
+	/* Targetting as set by guest */
+	u32 guest_server;		/* Current guest selected target */
+	u8 guest_priority;		/* Guest set priority */
+	u8 saved_priority;		/* Saved priority when masking */
+
+	/* Actual targetting */
+	u32 act_server;			/* Actual server */
+	u8 act_priority;		/* Actual priority */
+
+	/* Various state bits */
+	bool in_eoi;			/* Synchronize with H_EOI */
+	bool old_p;			/* P bit state when masking */
+	bool old_q;			/* Q bit state when masking */
+	bool lsi;			/* level-sensitive interrupt */
+	bool asserted;			/* Only for emulated LSI: current state */
+
+	/* Saved for migration state */
+	bool in_queue;
+	bool saved_p;
+	bool saved_q;
+	u8 saved_scan_prio;
+};
+
+/* Select the "right" interrupt (IPI vs. passthrough) */
+static inline void kvmppc_xive_select_irq(struct kvmppc_xive_irq_state *state,
+					  u32 *out_hw_irq,
+					  struct xive_irq_data **out_xd)
+{
+	if (state->pt_number) {
+		if (out_hw_irq)
+			*out_hw_irq = state->pt_number;
+		if (out_xd)
+			*out_xd = state->pt_data;
+	} else {
+		if (out_hw_irq)
+			*out_hw_irq = state->ipi_number;
+		if (out_xd)
+			*out_xd = &state->ipi_data;
+	}
+}
+
+/*
+ * This corresponds to an "ICS" in XICS terminology, we use it
+ * as a mean to break up source information into multiple structures.
+ */
+struct kvmppc_xive_src_block {
+	arch_spinlock_t lock;
+	u16 id;
+	struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+};
+
+
+struct kvmppc_xive {
+	struct kvm *kvm;
+	struct kvm_device *dev;
+	struct dentry *dentry;
+
+	/* VP block associated with the VM */
+	u32	vp_base;
+
+	/* Blocks of sources */
+	struct kvmppc_xive_src_block *src_blocks[KVMPPC_XICS_MAX_ICS_ID + 1];
+	u32	max_sbid;
+
+	/*
+	 * For state save, we lazily scan the queues on the first interrupt
+	 * being migrated. We don't have a clean way to reset that flags
+	 * so we keep track of the number of valid sources and how many of
+	 * them were migrated so we can reset when all of them have been
+	 * processed.
+	 */
+	u32	src_count;
+	u32	saved_src_count;
+
+	/*
+	 * Some irqs are delayed on restore until the source is created,
+	 * keep track here of how many of them
+	 */
+	u32	delayed_irqs;
+
+	/* Which queues (priorities) are in use by the guest */
+	u8	qmap;
+
+	/* Queue orders */
+	u32	q_order;
+	u32	q_page_order;
+
+};
+
+#define KVMPPC_XIVE_Q_COUNT	8
+
+struct kvmppc_xive_vcpu {
+	struct kvmppc_xive	*xive;
+	struct kvm_vcpu		*vcpu;
+	bool			valid;
+
+	/* Server number. This is the HW CPU ID from a guest perspective */
+	u32			server_num;
+
+	/*
+	 * HW VP corresponding to this VCPU. This is the base of the VP
+	 * block plus the server number.
+	 */
+	u32			vp_id;
+	u32			vp_chip_id;
+	u32			vp_cam;
+
+	/* IPI used for sending ... IPIs */
+	u32			vp_ipi;
+	struct xive_irq_data	vp_ipi_data;
+
+	/* Local emulation state */
+	uint8_t			cppr;	/* guest CPPR */
+	uint8_t			hw_cppr;/* Hardware CPPR */
+	uint8_t			mfrr;
+	uint8_t			pending;
+
+	/* Each VP has 8 queues though we only provision some */
+	struct xive_q		queues[KVMPPC_XIVE_Q_COUNT];
+	u32			esc_virq[KVMPPC_XIVE_Q_COUNT];
+	char			*esc_virq_names[KVMPPC_XIVE_Q_COUNT];
+
+	/* Stash a delayed irq on restore from migration (see set_icp) */
+	u32			delayed_irq;
+
+	/* Stats */
+	u64			stat_rm_h_xirr;
+	u64			stat_rm_h_ipoll;
+	u64			stat_rm_h_cppr;
+	u64			stat_rm_h_eoi;
+	u64			stat_rm_h_ipi;
+	u64			stat_vm_h_xirr;
+	u64			stat_vm_h_ipoll;
+	u64			stat_vm_h_cppr;
+	u64			stat_vm_h_eoi;
+	u64			stat_vm_h_ipi;
+};
+
+static inline struct kvm_vcpu *kvmppc_xive_find_server(struct kvm *kvm, u32 nr)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.xive_vcpu && nr == vcpu->arch.xive_vcpu->server_num)
+			return vcpu;
+	}
+	return NULL;
+}
+
+static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmppc_xive *xive,
+		u32 irq, u16 *source)
+{
+	u32 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
+	u16 src = irq & KVMPPC_XICS_SRC_MASK;
+
+	if (source)
+		*source = src;
+	if (bid > KVMPPC_XICS_MAX_ICS_ID)
+		return NULL;
+	return xive->src_blocks[bid];
+}
+
+/*
+ * Mapping between guest priorities and host priorities
+ * is as follow.
+ *
+ * Guest request for 0...6 are honored. Guest request for anything
+ * higher results in a priority of 7 being applied.
+ *
+ * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
+ * in order to match AIX expectations
+ *
+ * Similar mapping is done for CPPR values
+ */
+static inline u8 xive_prio_from_guest(u8 prio)
+{
+	if (prio == 0xff || prio < 8)
+		return prio;
+	return 7;
+}
+
+static inline u8 xive_prio_to_guest(u8 prio)
+{
+	if (prio == 0xff || prio < 7)
+		return prio;
+	return 0xb;
+}
+
+static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
+{
+	u32 cur;
+
+	if (!qpage)
+		return 0;
+	cur = be32_to_cpup(qpage + *idx);
+	if ((cur >> 31) == *toggle)
+		return 0;
+	*idx = (*idx + 1) & msk;
+	if (*idx == 0)
+		(*toggle) ^= 1;
+	return cur & 0x7fffffff;
+}
+
+extern unsigned long xive_rm_h_xirr(struct kvm_vcpu *vcpu);
+extern unsigned long xive_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
+extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+			 unsigned long mfrr);
+extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
+extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
+
+extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
+extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
+extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
+			      unsigned long mfrr);
+extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
+extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
+
+#endif /* CONFIG_KVM_XICS */
+#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
new file mode 100644
index 000000000000..023a31133c37
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -0,0 +1,503 @@
+/*
+ * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+/* File to be included by other .c files */
+
+#define XGLUE(a,b) a##b
+#define GLUE(a,b) XGLUE(a,b)
+
+static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
+{
+	u8 cppr;
+	u16 ack;
+
+	/* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */
+
+	/* Perform the acknowledge OS to register cycle. */
+	ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
+
+	/* Synchronize subsequent queue accesses */
+	mb();
+
+	/* XXX Check grouping level */
+
+	/* Anything ? */
+	if (!((ack >> 8) & TM_QW1_NSR_EO))
+		return;
+
+	/* Grab CPPR of the most favored pending interrupt */
+	cppr = ack & 0xff;
+	if (cppr < 8)
+		xc->pending |= 1 << cppr;
+
+#ifdef XIVE_RUNTIME_CHECKS
+	/* Check consistency */
+	if (cppr >= xc->hw_cppr)
+		pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
+			smp_processor_id(), cppr, xc->hw_cppr);
+#endif
+
+	/*
+	 * Update our image of the HW CPPR. We don't yet modify
+	 * xc->cppr, this will be done as we scan for interrupts
+	 * in the queues.
+	 */
+	xc->hw_cppr = cppr;
+}
+
+static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
+{
+	u64 val;
+
+	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
+		offset |= offset << 4;
+
+	val =__x_readq(__x_eoi_page(xd) + offset);
+#ifdef __LITTLE_ENDIAN__
+	val >>= 64-8;
+#endif
+	return (u8)val;
+}
+
+
+static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
+{
+	/* If the XIVE supports the new "store EOI facility, use it */
+	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+		__x_writeq(0, __x_eoi_page(xd));
+	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
+		opal_int_eoi(hw_irq);
+	} else {
+		uint64_t eoi_val;
+
+		/*
+		 * Otherwise for EOI, we use the special MMIO that does
+		 * a clear of both P and Q and returns the old Q,
+		 * except for LSIs where we use the "EOI cycle" special
+		 * load.
+		 *
+		 * This allows us to then do a re-trigger if Q was set
+		 * rather than synthetizing an interrupt in software
+		 *
+		 * For LSIs, using the HW EOI cycle works around a problem
+		 * on P9 DD1 PHBs where the other ESB accesses don't work
+		 * properly.
+		 */
+		if (xd->flags & XIVE_IRQ_FLAG_LSI)
+			__x_readq(__x_eoi_page(xd));
+		else {
+			eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
+
+			/* Re-trigger if needed */
+			if ((eoi_val & 1) && __x_trig_page(xd))
+				__x_writeq(0, __x_trig_page(xd));
+		}
+	}
+}
+
+enum {
+	scan_fetch,
+	scan_poll,
+	scan_eoi,
+};
+
+static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
+				       u8 pending, int scan_type)
+{
+	u32 hirq = 0;
+	u8 prio = 0xff;
+
+	/* Find highest pending priority */
+	while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
+		struct xive_q *q;
+		u32 idx, toggle;
+		__be32 *qpage;
+
+		/*
+		 * If pending is 0 this will return 0xff which is what
+		 * we want
+		 */
+		prio = ffs(pending) - 1;
+
+		/*
+		 * If the most favoured prio we found pending is less
+		 * favored (or equal) than a pending IPI, we return
+		 * the IPI instead.
+		 *
+		 * Note: If pending was 0 and mfrr is 0xff, we will
+		 * not spurriously take an IPI because mfrr cannot
+		 * then be smaller than cppr.
+		 */
+		if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
+			prio = xc->mfrr;
+			hirq = XICS_IPI;
+			break;
+		}
+
+		/* Don't scan past the guest cppr */
+		if (prio >= xc->cppr || prio > 7)
+			break;
+
+		/* Grab queue and pointers */
+		q = &xc->queues[prio];
+		idx = q->idx;
+		toggle = q->toggle;
+
+		/*
+		 * Snapshot the queue page. The test further down for EOI
+		 * must use the same "copy" that was used by __xive_read_eq
+		 * since qpage can be set concurrently and we don't want
+		 * to miss an EOI.
+		 */
+		qpage = READ_ONCE(q->qpage);
+
+skip_ipi:
+		/*
+		 * Try to fetch from the queue. Will return 0 for a
+		 * non-queueing priority (ie, qpage = 0).
+		 */
+		hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
+
+		/*
+		 * If this was a signal for an MFFR change done by
+		 * H_IPI we skip it. Additionally, if we were fetching
+		 * we EOI it now, thus re-enabling reception of a new
+		 * such signal.
+		 *
+		 * We also need to do that if prio is 0 and we had no
+		 * page for the queue. In this case, we have non-queued
+		 * IPI that needs to be EOId.
+		 *
+		 * This is safe because if we have another pending MFRR
+		 * change that wasn't observed above, the Q bit will have
+		 * been set and another occurrence of the IPI will trigger.
+		 */
+		if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
+			if (scan_type == scan_fetch)
+				GLUE(X_PFX,source_eoi)(xc->vp_ipi,
+						       &xc->vp_ipi_data);
+			/* Loop back on same queue with updated idx/toggle */
+#ifdef XIVE_RUNTIME_CHECKS
+			WARN_ON(hirq && hirq != XICS_IPI);
+#endif
+			if (hirq)
+				goto skip_ipi;
+		}
+
+		/* If fetching, update queue pointers */
+		if (scan_type == scan_fetch) {
+			q->idx = idx;
+			q->toggle = toggle;
+		}
+
+		/* Something found, stop searching */
+		if (hirq)
+			break;
+
+		/* Clear the pending bit on the now empty queue */
+		pending &= ~(1 << prio);
+
+		/*
+		 * Check if the queue count needs adjusting due to
+		 * interrupts being moved away.
+		 */
+		if (atomic_read(&q->pending_count)) {
+			int p = atomic_xchg(&q->pending_count, 0);
+			if (p) {
+#ifdef XIVE_RUNTIME_CHECKS
+				WARN_ON(p > atomic_read(&q->count));
+#endif
+				atomic_sub(p, &q->count);
+			}
+		}
+	}
+
+	/* If we are just taking a "peek", do nothing else */
+	if (scan_type == scan_poll)
+		return hirq;
+
+	/* Update the pending bits */
+	xc->pending = pending;
+
+	/*
+	 * If this is an EOI that's it, no CPPR adjustment done here,
+	 * all we needed was cleanup the stale pending bits and check
+	 * if there's anything left.
+	 */
+	if (scan_type == scan_eoi)
+		return hirq;
+
+	/*
+	 * If we found an interrupt, adjust what the guest CPPR should
+	 * be as if we had just fetched that interrupt from HW.
+	 */
+	if (hirq)
+		xc->cppr = prio;
+	/*
+	 * If it was an IPI the HW CPPR might have been lowered too much
+	 * as the HW interrupt we use for IPIs is routed to priority 0.
+	 *
+	 * We re-sync it here.
+	 */
+	if (xc->cppr != xc->hw_cppr) {
+		xc->hw_cppr = xc->cppr;
+		__x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
+	}
+
+	return hirq;
+}
+
+X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u8 old_cppr;
+	u32 hirq;
+
+	pr_devel("H_XIRR\n");
+
+	xc->GLUE(X_STAT_PFX,h_xirr)++;
+
+	/* First collect pending bits from HW */
+	GLUE(X_PFX,ack_pending)(xc);
+
+	/*
+	 * Cleanup the old-style bits if needed (they may have been
+	 * set by pull or an escalation interrupts).
+	 */
+	if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
+		clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
+			  &vcpu->arch.pending_exceptions);
+
+	pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
+		 xc->pending, xc->hw_cppr, xc->cppr);
+
+	/* Grab previous CPPR and reverse map it */
+	old_cppr = xive_prio_to_guest(xc->cppr);
+
+	/* Scan for actual interrupts */
+	hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
+
+	pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
+		 hirq, xc->hw_cppr, xc->cppr);
+
+#ifdef XIVE_RUNTIME_CHECKS
+	/* That should never hit */
+	if (hirq & 0xff000000)
+		pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
+#endif
+
+	/*
+	 * XXX We could check if the interrupt is masked here and
+	 * filter it. If we chose to do so, we would need to do:
+	 *
+	 *    if (masked) {
+	 *        lock();
+	 *        if (masked) {
+	 *            old_Q = true;
+	 *            hirq = 0;
+	 *        }
+	 *        unlock();
+	 *    }
+	 */
+
+	/* Return interrupt and old CPPR in GPR4 */
+	vcpu->arch.gpr[4] = hirq | (old_cppr << 24);
+
+	return H_SUCCESS;
+}
+
+X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u8 pending = xc->pending;
+	u32 hirq;
+	u8 pipr;
+
+	pr_devel("H_IPOLL(server=%ld)\n", server);
+
+	xc->GLUE(X_STAT_PFX,h_ipoll)++;
+
+	/* Grab the target VCPU if not the current one */
+	if (xc->server_num != server) {
+		vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+		if (!vcpu)
+			return H_PARAMETER;
+		xc = vcpu->arch.xive_vcpu;
+
+		/* Scan all priorities */
+		pending = 0xff;
+	} else {
+		/* Grab pending interrupt if any */
+		pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
+		if (pipr < 8)
+			pending |= 1 << pipr;
+	}
+
+	hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
+
+	/* Return interrupt and old CPPR in GPR4 */
+	vcpu->arch.gpr[4] = hirq | (xc->cppr << 24);
+
+	return H_SUCCESS;
+}
+
+static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
+{
+	u8 pending, prio;
+
+	pending = xc->pending;
+	if (xc->mfrr != 0xff) {
+		if (xc->mfrr < 8)
+			pending |= 1 << xc->mfrr;
+		else
+			pending |= 0x80;
+	}
+	if (!pending)
+		return;
+	prio = ffs(pending) - 1;
+
+	__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
+}
+
+X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u8 old_cppr;
+
+	pr_devel("H_CPPR(cppr=%ld)\n", cppr);
+
+	xc->GLUE(X_STAT_PFX,h_cppr)++;
+
+	/* Map CPPR */
+	cppr = xive_prio_from_guest(cppr);
+
+	/* Remember old and update SW state */
+	old_cppr = xc->cppr;
+	xc->cppr = cppr;
+
+	/*
+	 * We are masking less, we need to look for pending things
+	 * to deliver and set VP pending bits accordingly to trigger
+	 * a new interrupt otherwise we might miss MFRR changes for
+	 * which we have optimized out sending an IPI signal.
+	 */
+	if (cppr > old_cppr)
+		GLUE(X_PFX,push_pending_to_hw)(xc);
+
+	/* Apply new CPPR */
+	xc->hw_cppr = cppr;
+	__x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
+
+	return H_SUCCESS;
+}
+
+X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_irq_data *xd;
+	u8 new_cppr = xirr >> 24;
+	u32 irq = xirr & 0x00ffffff, hw_num;
+	u16 src;
+	int rc = 0;
+
+	pr_devel("H_EOI(xirr=%08lx)\n", xirr);
+
+	xc->GLUE(X_STAT_PFX,h_eoi)++;
+
+	xc->cppr = xive_prio_from_guest(new_cppr);
+
+	/*
+	 * IPIs are synthetized from MFRR and thus don't need
+	 * any special EOI handling. The underlying interrupt
+	 * used to signal MFRR changes is EOId when fetched from
+	 * the queue.
+	 */
+	if (irq == XICS_IPI || irq == 0)
+		goto bail;
+
+	/* Find interrupt source */
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb) {
+		pr_devel(" source not found !\n");
+		rc = H_PARAMETER;
+		goto bail;
+	}
+	state = &sb->irq_state[src];
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	state->in_eoi = true;
+	mb();
+
+again:
+	if (state->guest_priority == MASKED) {
+		arch_spin_lock(&sb->lock);
+		if (state->guest_priority != MASKED) {
+			arch_spin_unlock(&sb->lock);
+			goto again;
+		}
+		pr_devel(" EOI on saved P...\n");
+
+		/* Clear old_p, that will cause unmask to perform an EOI */
+		state->old_p = false;
+
+		arch_spin_unlock(&sb->lock);
+	} else {
+		pr_devel(" EOI on source...\n");
+
+		/* Perform EOI on the source */
+		GLUE(X_PFX,source_eoi)(hw_num, xd);
+
+		/* If it's an emulated LSI, check level and resend */
+		if (state->lsi && state->asserted)
+			__x_writeq(0, __x_trig_page(xd));
+
+	}
+
+	mb();
+	state->in_eoi = false;
+bail:
+
+	/* Re-evaluate pending IRQs and update HW */
+	GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
+	GLUE(X_PFX,push_pending_to_hw)(xc);
+	pr_devel(" after scan pending=%02x\n", xc->pending);
+
+	/* Apply new CPPR */
+	xc->hw_cppr = xc->cppr;
+	__x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
+
+	return rc;
+}
+
+X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
+			       unsigned long mfrr)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+	pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
+
+	xc->GLUE(X_STAT_PFX,h_ipi)++;
+
+	/* Find target */
+	vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+	if (!vcpu)
+		return H_PARAMETER;
+	xc = vcpu->arch.xive_vcpu;
+
+	/* Locklessly write over MFRR */
+	xc->mfrr = mfrr;
+
+	/* Shoot the IPI if most favored than target cppr */
+	if (mfrr < xc->cppr)
+		__x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
+
+	return H_SUCCESS;
+}
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
index 5a9a10b90762..3f1be85a83bc 100644
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -12,6 +12,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 #endif
 #ifdef CONFIG_KVM_XICS
 	ret = ret || (kvm->arch.xics != NULL);
+	ret = ret || (kvm->arch.xive != NULL);
 #endif
 	smp_rmb();
 	return ret;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 95c91a9de351..de79bd721ec7 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -37,6 +37,8 @@
 #include <asm/cputhreads.h>
 #include <asm/irqflags.h>
 #include <asm/iommu.h>
+#include <asm/xive.h>
+
 #include "timing.h"
 #include "irq.h"
 #include "../mm/mmu_decl.h"
@@ -699,7 +701,10 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 		kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
 		break;
 	case KVMPPC_IRQ_XICS:
-		kvmppc_xics_free_icp(vcpu);
+		if (xive_enabled())
+			kvmppc_xive_cleanup_vcpu(vcpu);
+		else
+			kvmppc_xics_free_icp(vcpu);
 		break;
 	}
 
@@ -1219,8 +1224,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
 		r = -EPERM;
 		dev = kvm_device_from_filp(f.file);
-		if (dev)
-			r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
+		if (dev) {
+			if (xive_enabled())
+				r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]);
+			else
+				r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
+		}
 
 		fdput(f);
 		break;
@@ -1244,7 +1253,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
 		return true;
 #endif
 #ifdef CONFIG_KVM_XICS
-	if (kvm->arch.xics)
+	if (kvm->arch.xics || kvm->arch.xive)
 		return true;
 #endif
 	return false;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index e0f856bfbfe8..d71cd773d870 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -890,3 +890,4 @@ EXPORT_SYMBOL_GPL(opal_leds_set_ind);
 EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
 /* Export this for KVM */
 EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
+EXPORT_SYMBOL_GPL(opal_int_eoi);
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index d9cd7f705f21..496036c93531 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -46,13 +46,15 @@
 #endif
 
 bool __xive_enabled;
+EXPORT_SYMBOL_GPL(__xive_enabled);
 bool xive_cmdline_disabled;
 
 /* We use only one priority for now */
 static u8 xive_irq_priority;
 
-/* TIMA */
+/* TIMA exported to KVM */
 void __iomem *xive_tima;
+EXPORT_SYMBOL_GPL(xive_tima);
 u32 xive_tima_offset;
 
 /* Backend ops */
@@ -345,8 +347,11 @@ static void xive_irq_eoi(struct irq_data *d)
 	DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n",
 		    d->irq, irqd_to_hwirq(d), xc->pending_prio);
 
-	/* EOI the source if it hasn't been disabled */
-	if (!irqd_irq_disabled(d))
+	/*
+	 * EOI the source if it hasn't been disabled and hasn't
+	 * been passed-through to a KVM guest
+	 */
+	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
 		xive_do_source_eoi(irqd_to_hwirq(d), xd);
 
 	/*
@@ -689,9 +694,14 @@ static int xive_irq_set_affinity(struct irq_data *d,
 
 	old_target = xd->target;
 
-	rc = xive_ops->configure_irq(hw_irq,
-				     get_hard_smp_processor_id(target),
-				     xive_irq_priority, d->irq);
+	/*
+	 * Only configure the irq if it's not currently passed-through to
+	 * a KVM guest
+	 */
+	if (!irqd_is_forwarded_to_vcpu(d))
+		rc = xive_ops->configure_irq(hw_irq,
+					     get_hard_smp_processor_id(target),
+					     xive_irq_priority, d->irq);
 	if (rc < 0) {
 		pr_err("Error %d reconfiguring irq %d\n", rc, d->irq);
 		return rc;
@@ -771,6 +781,123 @@ static int xive_irq_retrigger(struct irq_data *d)
 	return 1;
 }
 
+static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int rc;
+	u8 pq;
+
+	/*
+	 * We only support this on interrupts that do not require
+	 * firmware calls for masking and unmasking
+	 */
+	if (xd->flags & XIVE_IRQ_FLAG_MASK_FW)
+		return -EIO;
+
+	/*
+	 * This is called by KVM with state non-NULL for enabling
+	 * pass-through or NULL for disabling it
+	 */
+	if (state) {
+		irqd_set_forwarded_to_vcpu(d);
+
+		/* Set it to PQ=10 state to prevent further sends */
+		pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
+
+		/* No target ? nothing to do */
+		if (xd->target == XIVE_INVALID_TARGET) {
+			/*
+			 * An untargetted interrupt should have been
+			 * also masked at the source
+			 */
+			WARN_ON(pq & 2);
+
+			return 0;
+		}
+
+		/*
+		 * If P was set, adjust state to PQ=11 to indicate
+		 * that a resend is needed for the interrupt to reach
+		 * the guest. Also remember the value of P.
+		 *
+		 * This also tells us that it's in flight to a host queue
+		 * or has already been fetched but hasn't been EOIed yet
+		 * by the host. This it's potentially using up a host
+		 * queue slot. This is important to know because as long
+		 * as this is the case, we must not hard-unmask it when
+		 * "returning" that interrupt to the host.
+		 *
+		 * This saved_p is cleared by the host EOI, when we know
+		 * for sure the queue slot is no longer in use.
+		 */
+		if (pq & 2) {
+			pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
+			xd->saved_p = true;
+
+			/*
+			 * Sync the XIVE source HW to ensure the interrupt
+			 * has gone through the EAS before we change its
+			 * target to the guest. That should guarantee us
+			 * that we *will* eventually get an EOI for it on
+			 * the host. Otherwise there would be a small window
+			 * for P to be seen here but the interrupt going
+			 * to the guest queue.
+			 */
+			if (xive_ops->sync_source)
+				xive_ops->sync_source(hw_irq);
+		} else
+			xd->saved_p = false;
+	} else {
+		irqd_clr_forwarded_to_vcpu(d);
+
+		/* No host target ? hard mask and return */
+		if (xd->target == XIVE_INVALID_TARGET) {
+			xive_do_source_set_mask(xd, true);
+			return 0;
+		}
+
+		/*
+		 * Sync the XIVE source HW to ensure the interrupt
+		 * has gone through the EAS before we change its
+		 * target to the host.
+		 */
+		if (xive_ops->sync_source)
+			xive_ops->sync_source(hw_irq);
+
+		/*
+		 * By convention we are called with the interrupt in
+		 * a PQ=10 or PQ=11 state, ie, it won't fire and will
+		 * have latched in Q whether there's a pending HW
+		 * interrupt or not.
+		 *
+		 * First reconfigure the target.
+		 */
+		rc = xive_ops->configure_irq(hw_irq,
+					     get_hard_smp_processor_id(xd->target),
+					     xive_irq_priority, d->irq);
+		if (rc)
+			return rc;
+
+		/*
+		 * Then if saved_p is not set, effectively re-enable the
+		 * interrupt with an EOI. If it is set, we know there is
+		 * still a message in a host queue somewhere that will be
+		 * EOId eventually.
+		 *
+		 * Note: We don't check irqd_irq_disabled(). Effectively,
+		 * we *will* let the irq get through even if masked if the
+		 * HW is still firing it in order to deal with the whole
+		 * saved_p business properly. If the interrupt triggers
+		 * while masked, the generic code will re-mask it anyway.
+		 */
+		if (!xd->saved_p)
+			xive_do_source_eoi(hw_irq, xd);
+
+	}
+	return 0;
+}
+
 static struct irq_chip xive_irq_chip = {
 	.name = "XIVE-IRQ",
 	.irq_startup = xive_irq_startup,
@@ -781,12 +908,14 @@ static struct irq_chip xive_irq_chip = {
 	.irq_set_affinity = xive_irq_set_affinity,
 	.irq_set_type = xive_irq_set_type,
 	.irq_retrigger = xive_irq_retrigger,
+	.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
 };
 
 bool is_xive_irq(struct irq_chip *chip)
 {
 	return chip == &xive_irq_chip;
 }
+EXPORT_SYMBOL_GPL(is_xive_irq);
 
 void xive_cleanup_irq_data(struct xive_irq_data *xd)
 {
@@ -801,6 +930,7 @@ void xive_cleanup_irq_data(struct xive_irq_data *xd)
 		xd->trig_mmio = NULL;
 	}
 }
+EXPORT_SYMBOL_GPL(xive_cleanup_irq_data);
 
 static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
 {
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 5fae59186cb2..6feac0a758e1 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -31,6 +31,7 @@
 #include <asm/xive.h>
 #include <asm/xive-regs.h>
 #include <asm/opal.h>
+#include <asm/kvm_ppc.h>
 
 #include "xive-internal.h"
 
@@ -95,6 +96,7 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(xive_native_populate_irq_data);
 
 int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
 {
@@ -108,6 +110,8 @@ int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
 	}
 	return rc == 0 ? 0 : -ENXIO;
 }
+EXPORT_SYMBOL_GPL(xive_native_configure_irq);
+
 
 /* This can be called multiple time to change a queue configuration */
 int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
@@ -172,6 +176,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
 fail:
 	return rc;
 }
+EXPORT_SYMBOL_GPL(xive_native_configure_queue);
 
 static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
 {
@@ -191,6 +196,7 @@ void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
 {
 	__xive_native_disable_queue(vp_id, q, prio);
 }
+EXPORT_SYMBOL_GPL(xive_native_disable_queue);
 
 static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
 {
@@ -261,6 +267,7 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
 	}
 	return 0;
 }
+#endif /* CONFIG_SMP */
 
 u32 xive_native_alloc_irq(void)
 {
@@ -276,6 +283,7 @@ u32 xive_native_alloc_irq(void)
 		return 0;
 	return rc;
 }
+EXPORT_SYMBOL_GPL(xive_native_alloc_irq);
 
 void xive_native_free_irq(u32 irq)
 {
@@ -286,7 +294,9 @@ void xive_native_free_irq(u32 irq)
 		msleep(1);
 	}
 }
+EXPORT_SYMBOL_GPL(xive_native_free_irq);
 
+#ifdef CONFIG_SMP
 static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
 {
 	s64 rc;
@@ -382,7 +392,7 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
 		return;
 
 	/* Enable the pool VP */
-	vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
+	vp = xive_pool_vps + cpu;
 	pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp);
 	for (;;) {
 		rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
@@ -427,7 +437,7 @@ static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
 	in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
 
 	/* Disable it */
-	vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
+	vp = xive_pool_vps + cpu;
 	for (;;) {
 		rc = opal_xive_set_vp_info(vp, 0, 0);
 		if (rc != OPAL_BUSY)
@@ -436,10 +446,11 @@ static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
 	}
 }
 
-static void xive_native_sync_source(u32 hw_irq)
+void xive_native_sync_source(u32 hw_irq)
 {
 	opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
 }
+EXPORT_SYMBOL_GPL(xive_native_sync_source);
 
 static const struct xive_ops xive_native_ops = {
 	.populate_irq_data	= xive_native_populate_irq_data,
@@ -500,10 +511,24 @@ static bool xive_parse_provisioning(struct device_node *np)
 	return true;
 }
 
+static void xive_native_setup_pools(void)
+{
+	/* Allocate a pool big enough */
+	pr_debug("XIVE: Allocating VP block for pool size %d\n", nr_cpu_ids);
+
+	xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
+	if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
+		pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n");
+
+	pr_debug("XIVE: Pool VPs allocated at 0x%x for %d max CPUs\n",
+		 xive_pool_vps, nr_cpu_ids);
+}
+
 u32 xive_native_default_eq_shift(void)
 {
 	return xive_queue_shift;
 }
+EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
 
 bool xive_native_init(void)
 {
@@ -513,7 +538,7 @@ bool xive_native_init(void)
 	struct property *prop;
 	u8 max_prio = 7;
 	const __be32 *p;
-	u32 val;
+	u32 val, cpu;
 	s64 rc;
 
 	if (xive_cmdline_disabled)
@@ -549,7 +574,11 @@ bool xive_native_init(void)
 			break;
 	}
 
-	/* Grab size of provisioning pages */
+	/* Configure Thread Management areas for KVM */
+	for_each_possible_cpu(cpu)
+		kvmppc_set_xive_tima(cpu, r.start, tima);
+
+	/* Grab size of provisionning pages */
 	xive_parse_provisioning(np);
 
 	/* Switch the XIVE to exploitation mode */
@@ -559,6 +588,9 @@ bool xive_native_init(void)
 		return false;
 	}
 
+	/* Setup some dummy HV pool VPs */
+	xive_native_setup_pools();
+
 	/* Initialize XIVE core with our backend */
 	if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS,
 			    max_prio)) {
@@ -637,3 +669,47 @@ void xive_native_free_vp_block(u32 vp_base)
 		pr_warn("OPAL error %lld freeing VP block\n", rc);
 }
 EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
+
+int xive_native_enable_vp(u32 vp_id)
+{
+	s64 rc;
+
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(1);
+	}
+	return rc ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_enable_vp);
+
+int xive_native_disable_vp(u32 vp_id)
+{
+	s64 rc;
+
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp_id, 0, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(1);
+	}
+	return rc ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_disable_vp);
+
+int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
+{
+	__be64 vp_cam_be;
+	__be32 vp_chip_id_be;
+	s64 rc;
+
+	rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be);
+	if (rc)
+		return -EIO;
+	*out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu;
+	*out_chip_id = be32_to_cpu(vp_chip_id_be);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c14ad9809da..d1a6e554ee68 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1165,7 +1165,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 void kvm_unregister_device_ops(u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
-extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a17d78759727..1b0da5771f71 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2839,10 +2839,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 	[KVM_DEV_TYPE_FSL_MPIC_20]	= &kvm_mpic_ops,
 	[KVM_DEV_TYPE_FSL_MPIC_42]	= &kvm_mpic_ops,
 #endif
-
-#ifdef CONFIG_KVM_XICS
-	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
-- 
cgit v1.2.3


From b5fe223a4bd0217a657ff084e48752c367a8a55f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 27 Apr 2017 07:52:35 -0700
Subject: srcu: Adjust default auto-expediting holdoff

The default value for the kernel boot parameter srcutree.exp_holdoff
is 50 microseconds, which is too long for good Tree SRCU performance
(compared to Classic SRCU) on the workloads tested by Mike Galbraith.
This commit therefore sets the default value to 25 microseconds, which
shows excellent results in Mike's testing.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Mike Galbraith <efault@gmx.de>
---
 kernel/rcu/srcutree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 74c283f9d15e..87b070de6371 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -39,7 +39,7 @@
 
 #include "rcu.h"
 
-ulong exp_holdoff = 50 * 1000; /* Holdoff (ns) for auto-expediting. */
+ulong exp_holdoff = 25 * 1000; /* Holdoff (ns) for auto-expediting. */
 module_param(exp_holdoff, ulong, 0444);
 
 static void srcu_invoke_callbacks(struct work_struct *work);
-- 
cgit v1.2.3


From c8581984ffd917736ab3630c0f07bb5b218411e3 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Wed, 22 Feb 2017 18:19:34 +0000
Subject: arc: axs10x: Add DT bindings for I2S audio playback

This patch adds the necessary DT bindings to get HDMI audio
output in ARC AXS10x SDP. The bindings for I2S controller were
added as well as the bindings for simple audio card.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: Carlos Palminha <palminha@synopsys.com>
Cc: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: devicetree@vger.kernel.org
Cc: linux-snps-arc@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/dts/axs10x_mb.dtsi | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index d6c1bbc98ac3..9d882b1a3e1c 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -149,12 +149,13 @@
 			interrupts = <14>;
 		};
 
-		i2c@0x1e000 {
-			compatible = "snps,designware-i2c";
+		i2s: i2s@1e000 {
+			compatible = "snps,designware-i2s";
 			reg = <0x1e000 0x100>;
-			clock-frequency = <400000>;
-			clocks = <&i2cclk>;
+			clocks = <&i2sclk 0>;
+			clock-names = "i2sclk";
 			interrupts = <15>;
+			#sound-dai-cells = <0>;
 		};
 
 		i2c@0x1f000 {
@@ -174,6 +175,7 @@
 				adi,input-colorspace = "rgb";
 				adi,input-clock = "1x";
 				adi,clock-delay = <0x03>;
+				#sound-dai-cells = <0>;
 
 				ports {
 					#address-cells = <1>;
@@ -295,5 +297,17 @@
 				};
 			};
 		};
+
+		sound_playback {
+			compatible = "simple-audio-card";
+			simple-audio-card,name = "AXS10x HDMI Audio";
+			simple-audio-card,format = "i2s";
+			simple-audio-card,cpu {
+				sound-dai = <&i2s>;
+			};
+			simple-audio-card,codec {
+				sound-dai = <&adv7511>;
+			};
+		};
 	};
 };
-- 
cgit v1.2.3


From 36b5a5152119cd08760067ca31d8577de993c490 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Wed, 22 Feb 2017 18:19:35 +0000
Subject: arc: axs10x: Fix ARC PGU default clock frequency

Default clock frequency for ARC PGU does not match any
existing HDMI mode, instead the default value matches a
DVI mode. Change the clock frequency to 74.25MHz so that
it matches HDMI mode 1280x720@60Hz

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: Carlos Palminha <palminha@synopsys.com>
Cc: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: devicetree@vger.kernel.org
Cc: linux-snps-arc@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Acked-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/dts/axs10x_mb.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index 9d882b1a3e1c..41cfb29b62c1 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -51,7 +51,7 @@
 			pguclk: pguclk {
 				#clock-cells = <0>;
 				compatible = "fixed-clock";
-				clock-frequency = <74440000>;
+				clock-frequency = <74250000>;
 			};
 		};
 
-- 
cgit v1.2.3


From df807fffaabde625fa9adb82e3e5b88cdaa5709a Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Thu, 27 Apr 2017 11:13:38 +0800
Subject: NFSv4.x/callback: Create the callback service through
 svc_create_pooled

As the comments for svc_set_num_threads() said,
" Destroying threads relies on the service threads filling in
rqstp->rq_task, which only the nfs ones do.  Assumes the serv
has been created using svc_create_pooled()."

If creating service through svc_create(), the svc_pool_map_put()
will be called in svc_destroy(), but the pool map isn't used.
So that, the reference of pool map will be drop, the next using
of pool map will get a zero npools.

[  137.992130] divide error: 0000 [#1] SMP
[  137.992148] Modules linked in: nfsd(E) nfsv4 nfs fscache fuse tun bridge stp llc ip_set nfnetlink vmw_vsock_vmci_transport vsock snd_seq_midi snd_seq_midi_event vmw_balloon coretemp crct10dif_pclmul crc32_pclmul ppdev ghash_clmulni_intel intel_rapl_perf joydev snd_ens1371 gameport snd_ac97_codec ac97_bus snd_seq snd_pcm snd_rawmidi snd_timer snd_seq_device snd soundcore parport_pc parport nfit acpi_cpufreq tpm_tis tpm_tis_core tpm vmw_vmci i2c_piix4 shpchp auth_rpcgss nfs_acl lockd(E) grace sunrpc(E) xfs libcrc32c vmwgfx drm_kms_helper ttm crc32c_intel drm e1000 mptspi scsi_transport_spi serio_raw mptscsih mptbase ata_generic pata_acpi [last unloaded: nfsd]
[  137.992336] CPU: 0 PID: 4514 Comm: rpc.nfsd Tainted: G            E   4.11.0-rc8+ #536
[  137.992777] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
[  137.993757] task: ffff955984101d00 task.stack: ffff9873c2604000
[  137.994231] RIP: 0010:svc_pool_for_cpu+0x2b/0x80 [sunrpc]
[  137.994768] RSP: 0018:ffff9873c2607c18 EFLAGS: 00010246
[  137.995227] RAX: 0000000000000000 RBX: ffff95598376f000 RCX: 0000000000000002
[  137.995673] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9559944aec00
[  137.996156] RBP: ffff9873c2607c18 R08: ffff9559944aec28 R09: 0000000000000000
[  137.996609] R10: 0000000001080002 R11: 0000000000000000 R12: ffff95598376f010
[  137.997063] R13: ffff95598376f018 R14: ffff9559944aec28 R15: ffff9559944aec00
[  137.997584] FS:  00007f755529eb40(0000) GS:ffff9559bb600000(0000) knlGS:0000000000000000
[  137.998048] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  137.998548] CR2: 000055f3aecd9660 CR3: 0000000084290000 CR4: 00000000001406f0
[  137.999052] Call Trace:
[  137.999517]  svc_xprt_do_enqueue+0xef/0x260 [sunrpc]
[  138.000028]  svc_xprt_received+0x47/0x90 [sunrpc]
[  138.000487]  svc_add_new_perm_xprt+0x76/0x90 [sunrpc]
[  138.000981]  svc_addsock+0x14b/0x200 [sunrpc]
[  138.001424]  ? recalc_sigpending+0x1b/0x50
[  138.001860]  ? __getnstimeofday64+0x41/0xd0
[  138.002346]  ? do_gettimeofday+0x29/0x90
[  138.002779]  write_ports+0x255/0x2c0 [nfsd]
[  138.003202]  ? _copy_from_user+0x4e/0x80
[  138.003676]  ? write_recoverydir+0x100/0x100 [nfsd]
[  138.004098]  nfsctl_transaction_write+0x48/0x80 [nfsd]
[  138.004544]  __vfs_write+0x37/0x160
[  138.004982]  ? selinux_file_permission+0xd7/0x110
[  138.005401]  ? security_file_permission+0x3b/0xc0
[  138.005865]  vfs_write+0xb5/0x1a0
[  138.006267]  SyS_write+0x55/0xc0
[  138.006654]  entry_SYSCALL_64_fastpath+0x1a/0xa9
[  138.007071] RIP: 0033:0x7f7554b9dc30
[  138.007437] RSP: 002b:00007ffc9f92c788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  138.007807] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f7554b9dc30
[  138.008168] RDX: 0000000000000002 RSI: 00005640cd536640 RDI: 0000000000000003
[  138.008573] RBP: 00007ffc9f92c780 R08: 0000000000000001 R09: 0000000000000002
[  138.008918] R10: 0000000000000064 R11: 0000000000000246 R12: 0000000000000004
[  138.009254] R13: 00005640cdbf77a0 R14: 00005640cdbf7720 R15: 00007ffc9f92c238
[  138.009610] Code: 0f 1f 44 00 00 48 8b 87 98 00 00 00 55 48 89 e5 48 83 78 08 00 74 10 8b 05 07 42 02 00 83 f8 01 74 40 83 f8 02 74 19 31 c0 31 d2 <f7> b7 88 00 00 00 5d 89 d0 48 c1 e0 07 48 03 87 90 00 00 00 c3
[  138.010664] RIP: svc_pool_for_cpu+0x2b/0x80 [sunrpc] RSP: ffff9873c2607c18
[  138.011061] ---[ end trace b3468224cafa7d11 ]---

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/callback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 773774531aff..c5e27ebd8da8 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -280,7 +280,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
 		printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
 			cb_info->users);
 
-	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
+	serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
 	if (!serv) {
 		printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
 		return ERR_PTR(-ENOMEM);
-- 
cgit v1.2.3


From 9e0d87680d689f1758185851c3da6eafb16e71e1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 26 Apr 2017 11:55:26 -0400
Subject: SUNRPC: Refactor svc_set_num_threads()

Refactor to separate out the functions of starting and stopping threads
so that they can be used in other helpers.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Tested-and-reviewed-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc.c | 96 ++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 38 deletions(-)

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a08aeb56b8e4..98dc33ae738b 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -702,59 +702,32 @@ found_pool:
 	return task;
 }
 
-/*
- * Create or destroy enough new threads to make the number
- * of threads the given number.  If `pool' is non-NULL, applies
- * only to threads in that pool, otherwise round-robins between
- * all pools.  Caller must ensure that mutual exclusion between this and
- * server startup or shutdown.
- *
- * Destroying threads relies on the service threads filling in
- * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
- * has been created using svc_create_pooled().
- *
- * Based on code that used to be in nfsd_svc() but tweaked
- * to be pool-aware.
- */
-int
-svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+/* create new threads */
+static int
+svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 {
 	struct svc_rqst	*rqstp;
 	struct task_struct *task;
 	struct svc_pool *chosen_pool;
-	int error = 0;
 	unsigned int state = serv->sv_nrthreads-1;
 	int node;
 
-	if (pool == NULL) {
-		/* The -1 assumes caller has done a svc_get() */
-		nrservs -= (serv->sv_nrthreads-1);
-	} else {
-		spin_lock_bh(&pool->sp_lock);
-		nrservs -= pool->sp_nrthreads;
-		spin_unlock_bh(&pool->sp_lock);
-	}
-
-	/* create new threads */
-	while (nrservs > 0) {
+	do {
 		nrservs--;
 		chosen_pool = choose_pool(serv, pool, &state);
 
 		node = svc_pool_map_get_node(chosen_pool->sp_id);
 		rqstp = svc_prepare_thread(serv, chosen_pool, node);
-		if (IS_ERR(rqstp)) {
-			error = PTR_ERR(rqstp);
-			break;
-		}
+		if (IS_ERR(rqstp))
+			return PTR_ERR(rqstp);
 
 		__module_get(serv->sv_ops->svo_module);
 		task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
 					      node, "%s", serv->sv_name);
 		if (IS_ERR(task)) {
-			error = PTR_ERR(task);
 			module_put(serv->sv_ops->svo_module);
 			svc_exit_thread(rqstp);
-			break;
+			return PTR_ERR(task);
 		}
 
 		rqstp->rq_task = task;
@@ -763,15 +736,62 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 
 		svc_sock_update_bufs(serv);
 		wake_up_process(task);
-	}
+	} while (nrservs > 0);
+
+	return 0;
+}
+
+
+/* destroy old threads */
+static int
+svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+	struct task_struct *task;
+	unsigned int state = serv->sv_nrthreads-1;
+
 	/* destroy old threads */
-	while (nrservs < 0 &&
-	       (task = choose_victim(serv, pool, &state)) != NULL) {
+	do {
+		task = choose_victim(serv, pool, &state);
+		if (task == NULL)
+			break;
 		send_sig(SIGINT, task, 1);
 		nrservs++;
+	} while (nrservs < 0);
+
+	return 0;
+}
+
+/*
+ * Create or destroy enough new threads to make the number
+ * of threads the given number.  If `pool' is non-NULL, applies
+ * only to threads in that pool, otherwise round-robins between
+ * all pools.  Caller must ensure that mutual exclusion between this and
+ * server startup or shutdown.
+ *
+ * Destroying threads relies on the service threads filling in
+ * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
+ * has been created using svc_create_pooled().
+ *
+ * Based on code that used to be in nfsd_svc() but tweaked
+ * to be pool-aware.
+ */
+int
+svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+	if (pool == NULL) {
+		/* The -1 assumes caller has done a svc_get() */
+		nrservs -= (serv->sv_nrthreads-1);
+	} else {
+		spin_lock_bh(&pool->sp_lock);
+		nrservs -= pool->sp_nrthreads;
+		spin_unlock_bh(&pool->sp_lock);
 	}
 
-	return error;
+	if (nrservs > 0)
+		return svc_start_kthreads(serv, pool, nrservs);
+	if (nrservs < 0)
+		return svc_signal_kthreads(serv, pool, nrservs);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(svc_set_num_threads);
 
-- 
cgit v1.2.3


From ed6473ddc704a2005b9900ca08e236ebb2d8540a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 26 Apr 2017 11:55:27 -0400
Subject: NFSv4: Fix callback server shutdown

We want to use kthread_stop() in order to ensure the threads are
shut down before we tear down the nfs_callback_info in nfs_callback_down.

Tested-and-reviewed-by: Kinglong Mee <kinglongmee@gmail.com>
Reported-by: Kinglong Mee <kinglongmee@gmail.com>
Fixes: bb6aeba736ba9 ("NFSv4.x: Switch to using svc_set_num_threads()...")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/callback.c          | 24 ++++++++++++++++--------
 include/linux/sunrpc/svc.h |  1 +
 net/sunrpc/svc.c           | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index c5e27ebd8da8..73a1f928226c 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -76,7 +76,10 @@ nfs4_callback_svc(void *vrqstp)
 
 	set_freezable();
 
-	while (!kthread_should_stop()) {
+	while (!kthread_freezable_should_stop(NULL)) {
+
+		if (signal_pending(current))
+			flush_signals(current);
 		/*
 		 * Listen for a request on the socket
 		 */
@@ -85,6 +88,8 @@ nfs4_callback_svc(void *vrqstp)
 			continue;
 		svc_process(rqstp);
 	}
+	svc_exit_thread(rqstp);
+	module_put_and_exit(0);
 	return 0;
 }
 
@@ -103,9 +108,10 @@ nfs41_callback_svc(void *vrqstp)
 
 	set_freezable();
 
-	while (!kthread_should_stop()) {
-		if (try_to_freeze())
-			continue;
+	while (!kthread_freezable_should_stop(NULL)) {
+
+		if (signal_pending(current))
+			flush_signals(current);
 
 		prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
 		spin_lock_bh(&serv->sv_cb_lock);
@@ -121,11 +127,13 @@ nfs41_callback_svc(void *vrqstp)
 				error);
 		} else {
 			spin_unlock_bh(&serv->sv_cb_lock);
-			schedule();
+			if (!kthread_should_stop())
+				schedule();
 			finish_wait(&serv->sv_cb_waitq, &wq);
 		}
-		flush_signals(current);
 	}
+	svc_exit_thread(rqstp);
+	module_put_and_exit(0);
 	return 0;
 }
 
@@ -221,14 +229,14 @@ err_bind:
 static struct svc_serv_ops nfs40_cb_sv_ops = {
 	.svo_function		= nfs4_callback_svc,
 	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
-	.svo_setup		= svc_set_num_threads,
+	.svo_setup		= svc_set_num_threads_sync,
 	.svo_module		= THIS_MODULE,
 };
 #if defined(CONFIG_NFS_V4_1)
 static struct svc_serv_ops nfs41_cb_sv_ops = {
 	.svo_function		= nfs41_callback_svc,
 	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
-	.svo_setup		= svc_set_num_threads,
+	.svo_setup		= svc_set_num_threads_sync,
 	.svo_module		= THIS_MODULE,
 };
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6ef19cf658b4..94631026f79c 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -473,6 +473,7 @@ void		   svc_pool_map_put(void);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
 			struct svc_serv_ops *);
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+int		   svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
 int		   svc_pool_stats_open(struct svc_serv *serv, struct file *file);
 void		   svc_destroy(struct svc_serv *);
 void		   svc_shutdown_net(struct svc_serv *, struct net *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 98dc33ae738b..bc0f5a0ecbdc 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -795,6 +795,44 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 }
 EXPORT_SYMBOL_GPL(svc_set_num_threads);
 
+/* destroy old threads */
+static int
+svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+	struct task_struct *task;
+	unsigned int state = serv->sv_nrthreads-1;
+
+	/* destroy old threads */
+	do {
+		task = choose_victim(serv, pool, &state);
+		if (task == NULL)
+			break;
+		kthread_stop(task);
+		nrservs++;
+	} while (nrservs < 0);
+	return 0;
+}
+
+int
+svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+	if (pool == NULL) {
+		/* The -1 assumes caller has done a svc_get() */
+		nrservs -= (serv->sv_nrthreads-1);
+	} else {
+		spin_lock_bh(&pool->sp_lock);
+		nrservs -= pool->sp_nrthreads;
+		spin_unlock_bh(&pool->sp_lock);
+	}
+
+	if (nrservs > 0)
+		return svc_start_kthreads(serv, pool, nrservs);
+	if (nrservs < 0)
+		return svc_stop_kthreads(serv, pool, nrservs);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
+
 /*
  * Called from a server thread as it's exiting. Caller must hold the "service
  * mutex" for the service.
-- 
cgit v1.2.3


From db4b0dfab7b016f5514442046d86a9727ee87f12 Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <kda@linux-powerpc.org>
Date: Wed, 29 Mar 2017 06:55:37 -0400
Subject: KVM: PPC: Book3S HV: Avoid preemptibility warning in module
 initialization

With CONFIG_DEBUG_PREEMPT, get_paca() produces the following warning
in kvmppc_book3s_init_hv() since it calls debug_smp_processor_id().

There is no real issue with the xics_phys field.
If paca->kvm_hstate.xics_phys is non-zero on one cpu, it will be
non-zero on them all.  Therefore this is not fixing any actual
problem, just the warning.

[  138.521188] BUG: using smp_processor_id() in preemptible [00000000] code: modprobe/5596
[  138.521308] caller is .kvmppc_book3s_init_hv+0x184/0x350 [kvm_hv]
[  138.521404] CPU: 5 PID: 5596 Comm: modprobe Not tainted 4.11.0-rc3-00022-gc7e790c #1
[  138.521509] Call Trace:
[  138.521563] [c0000007d018b810] [c0000000023eef10] .dump_stack+0xe4/0x150 (unreliable)
[  138.521694] [c0000007d018b8a0] [c000000001f6ec04] .check_preemption_disabled+0x134/0x150
[  138.521829] [c0000007d018b940] [d00000000a010274] .kvmppc_book3s_init_hv+0x184/0x350 [kvm_hv]
[  138.521963] [c0000007d018ba00] [c00000000191d5cc] .do_one_initcall+0x5c/0x1c0
[  138.522082] [c0000007d018bad0] [c0000000023e9494] .do_init_module+0x84/0x240
[  138.522201] [c0000007d018bb70] [c000000001aade18] .load_module+0x1f68/0x2a10
[  138.522319] [c0000007d018bd20] [c000000001aaeb30] .SyS_finit_module+0xc0/0xf0
[  138.522439] [c0000007d018be30] [c00000000191baec] system_call+0x38/0xfc

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 06b7d8ae27e5..d42182eb0c26 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3928,7 +3928,7 @@ static int kvmppc_book3s_init_hv(void)
 	 * indirectly, via OPAL.
 	 */
 #ifdef CONFIG_SMP
-	if (!get_paca()->kvm_hstate.xics_phys) {
+	if (!local_paca->kvm_hstate.xics_phys) {
 		struct device_node *np;
 
 		np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
-- 
cgit v1.2.3


From a37c45cd82e62a361706b9688a984a3a63957321 Mon Sep 17 00:00:00 2001
From: Michael Davidson <md@google.com>
Date: Tue, 25 Apr 2017 15:47:35 -0700
Subject: kbuild: clang: add -no-integrated-as to KBUILD_[AC]FLAGS

The Linux Kernel relies on GCC's acceptance of inline assembly as an
opaque object which will not have any validation performed on the content.
The current behaviour in LLVM is to perform validation of the contents by
means of parsing the input if the MC layer can handle it.

Disable clangs integrated assembler and use the GNU assembler instead.

Wording-mostly-from: Saleem Abdulrasool <compnerd@compnerd.org>
Signed-off-by: Michael Davidson <md@google.com>
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Makefile b/Makefile
index 439443176161..10d6cd348abb 100644
--- a/Makefile
+++ b/Makefile
@@ -708,6 +708,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
 # See modpost pattern 2
 KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
 KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
+KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
+KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
 else
 
 # These warnings generated too much noise in a regular build.
-- 
cgit v1.2.3


From 209aa2308365387bc03905b7b4bb36c52ea1e696 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Fri, 21 Apr 2017 21:35:51 +0200
Subject: nfs/filelayout: fix NULL pointer dereference in
 fl_pnfs_update_layout()

Calling pnfs_put_lset on an IS_ERR pointer results in a NULL pointer
dereference like the one below. At the same time the check of retvalue
of filelayout_check_deviceid() sets lseg to error, but does not free it
before that.

[ 3000.636161] BUG: unable to handle kernel NULL pointer dereference at 000000000000003c
[ 3000.636970] IP: pnfs_put_lseg+0x29/0x100 [nfsv4]
[ 3000.637420] PGD 4f23b067
[ 3000.637421] PUD 4a0f4067
[ 3000.637679] PMD 0
[ 3000.637937]
[ 3000.638287] Oops: 0000 [#1] SMP
[ 3000.638591] Modules linked in: nfs_layout_nfsv41_files nfsv3 nfnetlink_queue nfnetlink_log nfnetlink bluetooth rfkill rpcsec_gss_krb5 nfsv4 nfs fscache binfmt_misc arc4 md4 nls_utf8 cifs ccm dns_resolver rpcrdma ib_isert iscsi_target_mod ib_iser rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib ib_ucm ib_uverbs ib_umad ib_cm ib_core nls_koi8_u nls_cp932 ts_kmp nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcspkr virtio_balloon ppdev virtio_rng parport_pc i2c_piix4 parport acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c ata_generic pata_acpi virtio_blk virtio_net cirrus drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops crc32c_intel ata_piix ttm libata drm serio_raw
[ 3000.645245]  i2c_core virtio_pci virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: xt_u32]
[ 3000.646360] CPU: 1 PID: 26402 Comm: date Not tainted 4.11.0-rc7.1.el7.test.x86_64 #1
[ 3000.647092] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 3000.647638] task: ffff8800415ada00 task.stack: ffffc90000ff0000
[ 3000.648207] RIP: 0010:pnfs_put_lseg+0x29/0x100 [nfsv4]
[ 3000.648696] RSP: 0018:ffffc90000ff39b8 EFLAGS: 00010246
[ 3000.649193] RAX: 0000000000000000 RBX: fffffffffffffff4 RCX: 00000000000d43be
[ 3000.649859] RDX: 00000000000d43bd RSI: 0000000000000000 RDI: fffffffffffffff4
[ 3000.650530] RBP: ffffc90000ff39d8 R08: 000000000001e320 R09: ffffffffa05c35ce
[ 3000.651203] R10: ffff88007fd1e320 R11: ffffea0001283d80 R12: 0000000001400040
[ 3000.651875] R13: ffff88004f77d9f0 R14: ffffc90000ff3cd8 R15: ffff8800417ade00
[ 3000.652546] FS:  00007fac4d5cd740(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
[ 3000.653304] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3000.653849] CR2: 000000000000003c CR3: 000000004f080000 CR4: 00000000000406e0
[ 3000.654527] Call Trace:
[ 3000.654771]  fl_pnfs_update_layout.constprop.20+0x10c/0x150 [nfs_layout_nfsv41_files]
[ 3000.655505]  filelayout_pg_init_write+0x21d/0x270 [nfs_layout_nfsv41_files]
[ 3000.656195]  __nfs_pageio_add_request+0x11c/0x490 [nfs]
[ 3000.656698]  nfs_pageio_add_request+0xac/0x260 [nfs]
[ 3000.657180]  nfs_do_writepage+0x109/0x2e0 [nfs]
[ 3000.657616]  nfs_writepages_callback+0x16/0x30 [nfs]
[ 3000.658096]  write_cache_pages+0x26f/0x510
[ 3000.658495]  ? nfs_do_writepage+0x2e0/0x2e0 [nfs]
[ 3000.658946]  ? _raw_spin_unlock_bh+0x1e/0x20
[ 3000.659357]  ? wb_wakeup_delayed+0x5f/0x70
[ 3000.659748]  ? __mark_inode_dirty+0x2eb/0x360
[ 3000.660170]  nfs_writepages+0x84/0xd0 [nfs]
[ 3000.660575]  ? nfs_updatepage+0x571/0xb70 [nfs]
[ 3000.661012]  do_writepages+0x1e/0x30
[ 3000.661358]  __filemap_fdatawrite_range+0xc6/0x100
[ 3000.661819]  filemap_write_and_wait_range+0x41/0x90
[ 3000.662292]  nfs_file_fsync+0x34/0x1f0 [nfs]
[ 3000.662704]  vfs_fsync_range+0x3d/0xb0
[ 3000.663065]  vfs_fsync+0x1c/0x20
[ 3000.663385]  nfs4_file_flush+0x57/0x80 [nfsv4]
[ 3000.663813]  filp_close+0x2f/0x70
[ 3000.664132]  __close_fd+0x9a/0xc0
[ 3000.664453]  SyS_close+0x23/0x50
[ 3000.664785]  do_syscall_64+0x67/0x180
[ 3000.665162]  entry_SYSCALL64_slow_path+0x25/0x25
[ 3000.665600] RIP: 0033:0x7fac4d0e1e90
[ 3000.665946] RSP: 002b:00007ffd54e90c88 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
[ 3000.666679] RAX: ffffffffffffffda RBX: 00007fac4d3b5400 RCX: 00007fac4d0e1e90
[ 3000.667349] RDX: 0000000000000000 RSI: 00007fac4d5d9000 RDI: 0000000000000001
[ 3000.668031] RBP: 0000000000000000 R08: 00007fac4d3b6a00 R09: 00007fac4d5cd740
[ 3000.668709] R10: 00007ffd54e909e0 R11: 0000000000000246 R12: 0000000000000000
[ 3000.669385] R13: 00007fac4d3b5e80 R14: 0000000000000000 R15: 0000000000000000
[ 3000.670061] Code: 00 00 66 66 66 66 90 55 48 85 ff 48 89 e5 41 56 41 55 41 54 53 48 89 fb 0f 84 97 00 00 00 f6 05 16 8f bc ff 10 0f 85 a6 00 00 00 <4c> 8b 63 48 48 8d 7b 38 49 8b 84 24 90 00 00 00 4c 8d a8 88 00
[ 3000.671831] RIP: pnfs_put_lseg+0x29/0x100 [nfsv4] RSP: ffffc90000ff39b8
[ 3000.672462] CR2: 000000000000003c

Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/filelayout/filelayout.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d4174da89302..1cf85d65b748 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -921,11 +921,11 @@ fl_pnfs_update_layout(struct inode *ino,
 	fl = FILELAYOUT_LSEG(lseg);
 
 	status = filelayout_check_deviceid(lo, fl, gfp_flags);
-	if (status)
+	if (status) {
+		pnfs_put_lseg(lseg);
 		lseg = ERR_PTR(status);
+	}
 out:
-	if (IS_ERR(lseg))
-		pnfs_put_lseg(lseg);
 	return lseg;
 }
 
-- 
cgit v1.2.3


From 4edabfd7d0f7d39eeda8ffac76d9e884c22951d9 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sun, 23 Apr 2017 14:49:41 +0800
Subject: NFSv4: check return value of xdr_inline_decode

Function xdr_inline_decode() will return a NULL pointer if the input
buffer does not have long enough buffer to decode nbytes of data.
However, in function decode_op_map(), the return value of
xdr_inline_decode() is not validated before it is used. This patch adds
a check to the return value of xdr_inline_decode().

Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4xdr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 125212588115..dbfe48ac3529 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5570,6 +5570,8 @@ static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
 	unsigned int i;
 
 	p = xdr_inline_decode(xdr, 4);
+	if (!p)
+		return -EIO;
 	bitmap_words = be32_to_cpup(p++);
 	if (bitmap_words > NFS4_OP_MAP_NUM_WORDS)
 		return -EIO;
-- 
cgit v1.2.3


From 88bd4f862943205183c2fc20bbba103c71e9990b Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Wed, 26 Apr 2017 14:21:22 -0400
Subject: NFS4.1 handle interrupted slot reuse from ERR_DELAY

If the RPC slot was interrupted and server replied to the next
operation on the "reused" slot with ERR_DELAY, don't clear out
the "interrupted" flag until we properly recover.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index dbfc7574fab0..c2b82caa9068 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -698,7 +698,8 @@ static int nfs41_sequence_process(struct rpc_task *task,
 	session = slot->table->session;
 
 	if (slot->interrupted) {
-		slot->interrupted = 0;
+		if (res->sr_status != -NFS4ERR_DELAY)
+			slot->interrupted = 0;
 		interrupted = true;
 	}
 
-- 
cgit v1.2.3


From bdebfccd0ea6fc00c5ea8c832401111958bd24cf Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 27 Apr 2017 15:30:00 -0400
Subject: pNFS: Ensure we check layout validity before marking it for return

pnfs_error_mark_layout_for_return needs to check that the layout is
valid before calling pnfs_set_plh_return_info().

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index eff266ea813c..e45b3ffeda08 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2049,6 +2049,10 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
 	bool return_now = false;
 
 	spin_lock(&inode->i_lock);
+	if (!pnfs_layout_is_valid(lo)) {
+		spin_unlock(&inode->i_lock);
+		return;
+	}
 	pnfs_set_plh_return_info(lo, range.iomode, 0);
 	/* Block LAYOUTGET */
 	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
-- 
cgit v1.2.3


From 3417f3528a2c7f8824cad7f0c35425961e98886b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Apr 2017 19:44:55 +0200
Subject: clk: ti: divider: try to fix ti_clk_register_divider

Commit 6c0afb503937 ("clk: ti: convert to use proper register
definition for all accesses") converted all register accesses in
the TI clk driver to use a proper struct instead of a void
pointer casted struct that fits into a u32. Unfortunately, it
missed a conversion here in the didivder code, leading to a
compiler warning like so:

drivers/clk/ti/divider.c: In function 'ti_clk_register_divider':
drivers/clk/ti/divider.c:460:8: error: 'reg' may be used uninitialized in this function [-Werror=maybe-uninitialized]

Treating a 'u32' variable as a structure leads to a stack
overflow here, and the register address we pass down is never
initialized. Convert this part of the code as well so things
work properly.

Fixes: 6c0afb503937 ("clk: ti: convert to use proper register definition for all accesses")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[sboyd@codeaurora.org: Fixed fixes tag, rewrote commit message,
s/reg_setup/reg/]
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/ti/divider.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index d6dcb283b72b..88f04a4cb890 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -428,22 +428,17 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup)
 
 struct clk *ti_clk_register_divider(struct ti_clk *setup)
 {
-	struct ti_clk_divider *div;
-	struct clk_omap_reg *reg_setup;
-	u32 reg;
+	struct ti_clk_divider *div = setup->data;
+	struct clk_omap_reg reg = {
+		.index = div->module,
+		.offset = div->reg,
+	};
 	u8 width;
 	u32 flags = 0;
 	u8 div_flags = 0;
 	const struct clk_div_table *table;
 	struct clk *clk;
 
-	div = setup->data;
-
-	reg_setup = (struct clk_omap_reg *)&reg;
-
-	reg_setup->index = div->module;
-	reg_setup->offset = div->reg;
-
 	if (div->flags & CLKF_INDEX_STARTS_AT_ONE)
 		div_flags |= CLK_DIVIDER_ONE_BASED;
 
@@ -458,7 +453,7 @@ struct clk *ti_clk_register_divider(struct ti_clk *setup)
 		return (struct clk *)table;
 
 	clk = _register_divider(NULL, setup->name, div->parent,
-				flags, (void __iomem *)reg, div->bit_shift,
+				flags, &reg, div->bit_shift,
 				width, div_flags, table);
 
 	if (IS_ERR(clk))
-- 
cgit v1.2.3


From 9cf7adeca1f307b578021f227119495eecb1a510 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 28 Apr 2017 08:53:22 +0800
Subject: ACPICA: iasl: add ASL conversion tool

ACPICA commit c04d310039d3e0ed1cb62876fe7e596fbc75ab01
ACPICA commit a65c1df7e6b4bad8e37df822018c40c6c446add9

The key feature of this utility is that the original comments within
the input ASL files are preserved during the conversion process, and
included within the converted ASL+ file -- thus creating a transparent
conversion of existing ASL files to ASL+ (ASL 2.0)

This patch is an automatic generation of the ASL converter commit,
Linux kernel isn't affected by the functionality provided in this
commit, but requires the linuxized changes to support future ACPICA
release automation.

Link: https://github.com/acpica/acpica/commit/c04d3100
Link: https://github.com/acpica/acpica/commit/a65c1df7
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Jung-uk Kim <jkim@FreeBSD.org>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/acconvert.h | 144 ++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/acpica/acglobal.h  |  53 +++++++++++++++
 drivers/acpi/acpica/aclocal.h   |  99 ++++++++++++++++++++++-----
 drivers/acpi/acpica/acmacros.h  |  35 ++++++++++
 drivers/acpi/acpica/acopcode.h  |   2 +
 drivers/acpi/acpica/amlcode.h   |   4 +-
 drivers/acpi/acpica/psargs.c    |  23 +++++++
 drivers/acpi/acpica/psloop.c    |  32 +++++++++
 drivers/acpi/acpica/psobject.c  |  38 +++++++++++
 drivers/acpi/acpica/psopcode.c  |   5 +-
 drivers/acpi/acpica/psopinfo.c  |   2 +-
 drivers/acpi/acpica/pstree.c    |   7 ++
 drivers/acpi/acpica/psutils.c   |  11 +++
 drivers/acpi/acpica/utalloc.c   |  50 ++++++++++++++
 drivers/acpi/acpica/utdebug.c   |   1 +
 include/acpi/acconfig.h         |   1 +
 include/acpi/actbl2.h           |   1 +
 17 files changed, 490 insertions(+), 18 deletions(-)
 create mode 100644 drivers/acpi/acpica/acconvert.h

diff --git a/drivers/acpi/acpica/acconvert.h b/drivers/acpi/acpica/acconvert.h
new file mode 100644
index 000000000000..c84223b60b35
--- /dev/null
+++ b/drivers/acpi/acpica/acconvert.h
@@ -0,0 +1,144 @@
+/******************************************************************************
+ *
+ * Module Name: acapps - common include for ACPI applications/tools
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2017, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#ifndef _ACCONVERT
+#define _ACCONVERT
+
+/* Definitions for comment state */
+
+#define ASL_COMMENT_STANDARD    1
+#define ASLCOMMENT_INLINE       2
+#define ASL_COMMENT_OPEN_PAREN  3
+#define ASL_COMMENT_CLOSE_PAREN 4
+#define ASL_COMMENT_CLOSE_BRACE 5
+
+/* Definitions for comment print function*/
+
+#define AML_COMMENT_STANDARD    1
+#define AMLCOMMENT_INLINE       2
+#define AML_COMMENT_END_NODE    3
+#define AML_NAMECOMMENT         4
+#define AML_COMMENT_CLOSE_BRACE 5
+#define AML_COMMENT_ENDBLK      6
+#define AML_COMMENT_INCLUDE     7
+
+#ifdef ACPI_ASL_COMPILER
+/*
+ * cvcompiler
+ */
+void
+cv_process_comment(struct asl_comment_state current_state,
+		   char *string_buffer, int c1);
+
+void
+cv_process_comment_type2(struct asl_comment_state current_state,
+			 char *string_buffer);
+
+u32 cv_calculate_comment_lengths(union acpi_parse_object *op);
+
+void cv_process_comment_state(char input);
+
+char *cv_append_inline_comment(char *inline_comment, char *to_add);
+
+void cv_add_to_comment_list(char *to_add);
+
+void cv_place_comment(u8 type, char *comment_string);
+
+u32 cv_parse_op_block_type(union acpi_parse_object *op);
+
+struct acpi_comment_node *cv_comment_node_calloc(void);
+
+void cg_write_aml_def_block_comment(union acpi_parse_object *op);
+
+void
+cg_write_one_aml_comment(union acpi_parse_object *op,
+			 char *comment_to_print, u8 input_option);
+
+void cg_write_aml_comment(union acpi_parse_object *op);
+
+/*
+ * cvparser
+ */
+void
+cv_init_file_tree(struct acpi_table_header *table,
+		  u8 *aml_start, u32 aml_length);
+
+void cv_clear_op_comments(union acpi_parse_object *op);
+
+struct acpi_file_node *cv_filename_exists(char *filename,
+					  struct acpi_file_node *head);
+
+void cv_label_file_node(union acpi_parse_object *op);
+
+void
+cv_capture_list_comments(struct acpi_parse_state *parser_state,
+			 struct acpi_comment_node *list_head,
+			 struct acpi_comment_node *list_tail);
+
+void cv_capture_comments_only(struct acpi_parse_state *parser_state);
+
+void cv_capture_comments(struct acpi_walk_state *walk_state);
+
+void cv_transfer_comments(union acpi_parse_object *op);
+
+/*
+ * cvdisasm
+ */
+void cv_switch_files(u32 level, union acpi_parse_object *op);
+
+u8 cv_file_has_switched(union acpi_parse_object *op);
+
+void cv_close_paren_write_comment(union acpi_parse_object *op, u32 level);
+
+void cv_close_brace_write_comment(union acpi_parse_object *op, u32 level);
+
+void
+cv_print_one_comment_list(struct acpi_comment_node *comment_list, u32 level);
+
+void
+cv_print_one_comment_type(union acpi_parse_object *op,
+			  u8 comment_type, char *end_str, u32 level);
+
+#endif
+
+#endif				/* _ACCONVERT */
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 1d955fe216c4..abe8c316908c 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -370,6 +370,59 @@ ACPI_GLOBAL(const char, *acpi_gbl_pld_shape_list[]);
 
 #endif
 
+/*
+ * Meant for the -ca option.
+ */
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_inline_comment, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_end_node_comment, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_open_brace_comment, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_close_brace_comment, NULL);
+
+ACPI_INIT_GLOBAL(char *, acpi_gbl_root_filename, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_filename, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_parent_filename, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_include_filename, NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_last_list_head, NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_def_blk_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_def_blk_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_reg_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_reg_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_inc_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_inc_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_end_blk_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_end_blk_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_addr_node,
+		 *acpi_gbl_comment_addr_list_head, NULL);
+
+ACPI_INIT_GLOBAL(union acpi_parse_object, *acpi_gbl_current_scope, NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_file_node, *acpi_gbl_file_tree_root, NULL);
+
+ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_reg_comment_cache);
+ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_comment_addr_cache);
+ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_file_cache);
+
+ACPI_INIT_GLOBAL(u8, gbl_capture_comments, FALSE);
+
+ACPI_INIT_GLOBAL(u8, acpi_gbl_debug_asl_conversion, FALSE);
+ACPI_INIT_GLOBAL(ACPI_FILE, acpi_gbl_conv_debug_file, NULL);
+
+ACPI_GLOBAL(char, acpi_gbl_table_sig[4]);
+
 /*****************************************************************************
  *
  * Application globals
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index ca984261acbb..f9b3f7fef462 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -53,7 +53,7 @@ typedef u32 acpi_mutex_handle;
 
 /* Total number of aml opcodes defined */
 
-#define AML_NUM_OPCODES                 0x82
+#define AML_NUM_OPCODES                 0x83
 
 /* Forward declarations */
 
@@ -754,21 +754,52 @@ union acpi_parse_value {
 #define ACPI_DISASM_ONLY_MEMBERS(a)
 #endif
 
+#if defined(ACPI_ASL_COMPILER)
+#define ACPI_CONVERTER_ONLY_MEMBERS(a)  a;
+#else
+#define ACPI_CONVERTER_ONLY_MEMBERS(a)
+#endif
+
 #define ACPI_PARSE_COMMON \
-	union acpi_parse_object         *parent;        /* Parent op */\
-	u8                              descriptor_type; /* To differentiate various internal objs */\
-	u8                              flags;          /* Type of Op */\
-	u16                             aml_opcode;     /* AML opcode */\
-	u8                              *aml;           /* Address of declaration in AML */\
-	union acpi_parse_object         *next;          /* Next op */\
-	struct acpi_namespace_node      *node;          /* For use by interpreter */\
-	union acpi_parse_value          value;          /* Value or args associated with the opcode */\
-	u8                              arg_list_length; /* Number of elements in the arg list */\
-	ACPI_DISASM_ONLY_MEMBERS (\
-	u16                             disasm_flags;   /* Used during AML disassembly */\
-	u8                              disasm_opcode;  /* Subtype used for disassembly */\
-	char                            *operator_symbol;/* Used for C-style operator name strings */\
-	char                            aml_op_name[16])	/* Op name (debug only) */
+	union acpi_parse_object         *parent;            /* Parent op */\
+	u8                              descriptor_type;    /* To differentiate various internal objs */\
+	u8                              flags;              /* Type of Op */\
+	u16                             aml_opcode;         /* AML opcode */\
+	u8                              *aml;               /* Address of declaration in AML */\
+	union acpi_parse_object         *next;              /* Next op */\
+	struct acpi_namespace_node      *node;              /* For use by interpreter */\
+	union acpi_parse_value          value;              /* Value or args associated with the opcode */\
+	u8                              arg_list_length;    /* Number of elements in the arg list */\
+	 ACPI_DISASM_ONLY_MEMBERS (\
+	u16                             disasm_flags;       /* Used during AML disassembly */\
+	u8                              disasm_opcode;      /* Subtype used for disassembly */\
+	char                            *operator_symbol;   /* Used for C-style operator name strings */\
+	char                            aml_op_name[16])    /* Op name (debug only) */\
+	 ACPI_CONVERTER_ONLY_MEMBERS (\
+	char                            *inline_comment;    /* Inline comment */\
+	char                            *end_node_comment;  /* End of node comment */\
+	char                            *name_comment;      /* Comment associated with the first parameter of the name node */\
+	char                            *close_brace_comment; /* Comments that come after } on the same as } */\
+	struct acpi_comment_node        *comment_list;      /* comments that appears before this node */\
+	struct acpi_comment_node        *end_blk_comment;   /* comments that at the end of a block but before ) or } */\
+	char                            *cv_filename;       /* Filename associated with this node. Used for ASL/ASL+ converter */\
+	char                            *cv_parent_filename)	/* Parent filename associated with this node. Used for ASL/ASL+ converter */
+
+/* categories of comments */
+
+typedef enum {
+	STANDARD_COMMENT = 1,
+	INLINE_COMMENT,
+	ENDNODE_COMMENT,
+	OPENBRACE_COMMENT,
+	CLOSE_BRACE_COMMENT,
+	STD_DEFBLK_COMMENT,
+	END_DEFBLK_COMMENT,
+	FILENAME_COMMENT,
+	PARENTFILENAME_COMMENT,
+	ENDBLK_COMMENT,
+	INCLUDE_COMMENT
+} asl_comment_types;
 
 /* Internal opcodes for disasm_opcode field above */
 
@@ -789,6 +820,34 @@ union acpi_parse_value {
 #define ACPI_DASM_CASE                  0x0E	/* If/Else is a Case in a Switch/Case block */
 #define ACPI_DASM_DEFAULT               0x0F	/* Else is a Default in a Switch/Case block */
 
+/*
+ * List struct used in the -ca option
+ */
+struct acpi_comment_node {
+	char *comment;
+	struct acpi_comment_node *next;
+};
+
+struct acpi_comment_addr_node {
+	u8 *addr;
+	struct acpi_comment_addr_node *next;
+};
+
+/*
+ * File node - used for "Include" operator file stack and
+ * depdendency tree for the -ca option
+ */
+struct acpi_file_node {
+	void *file;
+	char *filename;
+	char *file_start;	/* Points to AML and indicates when the AML for this particular file starts. */
+	char *file_end;		/* Points to AML and indicates when the AML for this particular file ends. */
+	struct acpi_file_node *next;
+	struct acpi_file_node *parent;
+	u8 include_written;
+	struct acpi_comment_node *include_comment;
+};
+
 /*
  * Generic operation (for example:  If, While, Store)
  */
@@ -814,6 +873,8 @@ struct acpi_parse_obj_asl {
 	ACPI_PARSE_COMMON union acpi_parse_object *child;
 	union acpi_parse_object *parent_method;
 	char *filename;
+	u8 file_changed;
+	char *parent_filename;
 	char *external_name;
 	char *namepath;
 	char name_seg[4];
@@ -843,6 +904,14 @@ union acpi_parse_object {
 	struct acpi_parse_obj_asl asl;
 };
 
+struct asl_comment_state {
+	u8 comment_type;
+	u32 spaces_before;
+	union acpi_parse_object *latest_parse_node;
+	union acpi_parse_object *parsing_paren_brace_node;
+	u8 capture_comments;
+};
+
 /*
  * Parse state - one state per parser invocation and each control
  * method.
diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h
index c3337514e0ed..c7f0c96cc00f 100644
--- a/drivers/acpi/acpica/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h
@@ -493,4 +493,39 @@
 
 #define ACPI_IS_OCTAL_DIGIT(d)              (((char)(d) >= '0') && ((char)(d) <= '7'))
 
+/*
+ * Macors used for the ASL-/ASL+ converter utility
+ */
+#ifdef ACPI_ASL_COMPILER
+
+#define ASL_CV_LABEL_FILENODE(a)         cv_label_file_node(a);
+#define ASL_CV_CAPTURE_COMMENTS_ONLY(a)   cv_capture_comments_only (a);
+#define ASL_CV_CAPTURE_COMMENTS(a)       cv_capture_comments (a);
+#define ASL_CV_TRANSFER_COMMENTS(a)      cv_transfer_comments (a);
+#define ASL_CV_CLOSE_PAREN(a,b)          cv_close_paren_write_comment(a,b);
+#define ASL_CV_CLOSE_BRACE(a,b)          cv_close_brace_write_comment(a,b);
+#define ASL_CV_SWITCH_FILES(a,b)         cv_switch_files(a,b);
+#define ASL_CV_CLEAR_OP_COMMENTS(a)       cv_clear_op_comments(a);
+#define ASL_CV_PRINT_ONE_COMMENT(a,b,c,d) cv_print_one_comment_type (a,b,c,d);
+#define ASL_CV_PRINT_ONE_COMMENT_LIST(a,b) cv_print_one_comment_list (a,b);
+#define ASL_CV_FILE_HAS_SWITCHED(a)       cv_file_has_switched(a)
+#define ASL_CV_INIT_FILETREE(a,b,c)      cv_init_file_tree(a,b,c);
+
+#else
+
+#define ASL_CV_LABEL_FILENODE(a)
+#define ASL_CV_CAPTURE_COMMENTS_ONLY(a)
+#define ASL_CV_CAPTURE_COMMENTS(a)
+#define ASL_CV_TRANSFER_COMMENTS(a)
+#define ASL_CV_CLOSE_PAREN(a,b)          acpi_os_printf (")");
+#define ASL_CV_CLOSE_BRACE(a,b)          acpi_os_printf ("}");
+#define ASL_CV_SWITCH_FILES(a,b)
+#define ASL_CV_CLEAR_OP_COMMENTS(a)
+#define ASL_CV_PRINT_ONE_COMMENT(a,b,c,d)
+#define ASL_CV_PRINT_ONE_COMMENT_LIST(a,b)
+#define ASL_CV_FILE_HAS_SWITCHED(a)       0
+#define ASL_CV_INIT_FILETREE(a,b,c)
+
+#endif
+
 #endif				/* ACMACROS_H */
diff --git a/drivers/acpi/acpica/acopcode.h b/drivers/acpi/acpica/acopcode.h
index e758f098ff4b..a5d9af758c52 100644
--- a/drivers/acpi/acpica/acopcode.h
+++ b/drivers/acpi/acpica/acopcode.h
@@ -90,6 +90,7 @@
 #define ARGP_BUFFER_OP                  ARGP_LIST3 (ARGP_PKGLENGTH,  ARGP_TERMARG,       ARGP_BYTELIST)
 #define ARGP_BYTE_OP                    ARGP_LIST1 (ARGP_BYTEDATA)
 #define ARGP_BYTELIST_OP                ARGP_LIST1 (ARGP_NAMESTRING)
+#define ARGP_COMMENT_OP                 ARGP_LIST2 (ARGP_BYTEDATA,   ARGP_COMMENT)
 #define ARGP_CONCAT_OP                  ARGP_LIST3 (ARGP_TERMARG,    ARGP_TERMARG,       ARGP_TARGET)
 #define ARGP_CONCAT_RES_OP              ARGP_LIST3 (ARGP_TERMARG,    ARGP_TERMARG,       ARGP_TARGET)
 #define ARGP_COND_REF_OF_OP             ARGP_LIST2 (ARGP_SIMPLENAME, ARGP_TARGET)
@@ -223,6 +224,7 @@
 #define ARGI_BUFFER_OP                  ARGI_LIST1 (ARGI_INTEGER)
 #define ARGI_BYTE_OP                    ARGI_INVALID_OPCODE
 #define ARGI_BYTELIST_OP                ARGI_INVALID_OPCODE
+#define ARGI_COMMENT_OP                 ARGI_INVALID_OPCODE
 #define ARGI_CONCAT_OP                  ARGI_LIST3 (ARGI_ANYTYPE,    ARGI_ANYTYPE,       ARGI_TARGETREF)
 #define ARGI_CONCAT_RES_OP              ARGI_LIST3 (ARGI_BUFFER,     ARGI_BUFFER,        ARGI_TARGETREF)
 #define ARGI_COND_REF_OF_OP             ARGI_LIST2 (ARGI_OBJECT_REF, ARGI_TARGETREF)
diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h
index 7e2b369487ae..176f7e9b4d0e 100644
--- a/drivers/acpi/acpica/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h
@@ -137,7 +137,8 @@
 #define AML_NOOP_OP                 (u16) 0xa3
 #define AML_RETURN_OP               (u16) 0xa4
 #define AML_BREAK_OP                (u16) 0xa5
-#define AML_BREAKPOINT_OP           (u16) 0xcc
+#define AML_COMMENT_OP              (u16) 0xa9
+#define AML_BREAKPOINT_OP          (u16) 0xcc
 #define AML_ONES_OP                 (u16) 0xff
 
 /*
@@ -236,6 +237,7 @@
 #define ARGP_SIMPLENAME             0x12	/* name_string | local_term | arg_term */
 #define ARGP_NAME_OR_REF            0x13	/* For object_type only */
 #define ARGP_MAX                    0x13
+#define ARGP_COMMENT                0x14
 
 /*
  * Resolved argument types for the AML Interpreter
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index cda1e151dbc7..eb9dfaca555f 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -47,6 +47,7 @@
 #include "amlcode.h"
 #include "acnamesp.h"
 #include "acdispat.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psargs")
@@ -502,6 +503,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 
 	ACPI_FUNCTION_TRACE(ps_get_next_field);
 
+	ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 	aml = parser_state->aml;
 
 	/* Determine field type */
@@ -546,6 +548,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 
 	/* Decode the field type */
 
+	ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 	switch (opcode) {
 	case AML_INT_NAMEDFIELD_OP:
 
@@ -555,6 +558,22 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 		acpi_ps_set_name(field, name);
 		parser_state->aml += ACPI_NAME_SIZE;
 
+		ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
+
+#ifdef ACPI_ASL_COMPILER
+		/*
+		 * Because the package length isn't represented as a parse tree object,
+		 * take comments surrounding this and add to the previously created
+		 * parse node.
+		 */
+		if (field->common.inline_comment) {
+			field->common.name_comment =
+			    field->common.inline_comment;
+		}
+		field->common.inline_comment = acpi_gbl_current_inline_comment;
+		acpi_gbl_current_inline_comment = NULL;
+#endif
+
 		/* Get the length which is encoded as a package length */
 
 		field->common.value.size =
@@ -609,11 +628,13 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 		if (ACPI_GET8(parser_state->aml) == AML_BUFFER_OP) {
 			parser_state->aml++;
 
+			ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 			pkg_end = parser_state->aml;
 			pkg_length =
 			    acpi_ps_get_next_package_length(parser_state);
 			pkg_end += pkg_length;
 
+			ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 			if (parser_state->aml < pkg_end) {
 
 				/* Non-empty list */
@@ -630,6 +651,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 				opcode = ACPI_GET8(parser_state->aml);
 				parser_state->aml++;
 
+				ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 				switch (opcode) {
 				case AML_BYTE_OP:	/* AML_BYTEDATA_ARG */
 
@@ -660,6 +682,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 
 				/* Fill in bytelist data */
 
+				ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 				arg->named.value.size = buffer_length;
 				arg->named.data = parser_state->aml;
 			}
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index b7da881b66da..b4224005783c 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -55,6 +55,7 @@
 #include "acparser.h"
 #include "acdispat.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psloop")
@@ -132,6 +133,21 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 		       !walk_state->arg_count) {
 			walk_state->aml = walk_state->parser_state.aml;
 
+			switch (op->common.aml_opcode) {
+			case AML_METHOD_OP:
+			case AML_BUFFER_OP:
+			case AML_PACKAGE_OP:
+			case AML_VARIABLE_PACKAGE_OP:
+			case AML_WHILE_OP:
+
+				break;
+
+			default:
+
+				ASL_CV_CAPTURE_COMMENTS(walk_state);
+				break;
+			}
+
 			status =
 			    acpi_ps_get_next_arg(walk_state,
 						 &(walk_state->parser_state),
@@ -480,6 +496,8 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
 	/* Iterative parsing loop, while there is more AML to process: */
 
 	while ((parser_state->aml < parser_state->aml_end) || (op)) {
+		ASL_CV_CAPTURE_COMMENTS(walk_state);
+
 		aml_op_start = parser_state->aml;
 		if (!op) {
 			status =
@@ -516,6 +534,20 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
 		 */
 		walk_state->arg_count = 0;
 
+		switch (op->common.aml_opcode) {
+		case AML_BYTE_OP:
+		case AML_WORD_OP:
+		case AML_DWORD_OP:
+		case AML_QWORD_OP:
+
+			break;
+
+		default:
+
+			ASL_CV_CAPTURE_COMMENTS(walk_state);
+			break;
+		}
+
 		/* Are there any arguments that must be processed? */
 
 		if (walk_state->arg_types) {
diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c
index 5c4aff0f4f26..5bcb61831706 100644
--- a/drivers/acpi/acpica/psobject.c
+++ b/drivers/acpi/acpica/psobject.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acparser.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psobject")
@@ -190,6 +191,7 @@ acpi_ps_build_named_op(struct acpi_walk_state *walk_state,
 	 */
 	while (GET_CURRENT_ARG_TYPE(walk_state->arg_types) &&
 	       (GET_CURRENT_ARG_TYPE(walk_state->arg_types) != ARGP_NAME)) {
+		ASL_CV_CAPTURE_COMMENTS(walk_state);
 		status =
 		    acpi_ps_get_next_arg(walk_state,
 					 &(walk_state->parser_state),
@@ -203,6 +205,18 @@ acpi_ps_build_named_op(struct acpi_walk_state *walk_state,
 		INCREMENT_ARG_LIST(walk_state->arg_types);
 	}
 
+	/* are there any inline comments associated with the name_seg?? If so, save this. */
+
+	ASL_CV_CAPTURE_COMMENTS(walk_state);
+
+#ifdef ACPI_ASL_COMPILER
+	if (acpi_gbl_current_inline_comment != NULL) {
+		unnamed_op->common.name_comment =
+		    acpi_gbl_current_inline_comment;
+		acpi_gbl_current_inline_comment = NULL;
+	}
+#endif
+
 	/*
 	 * Make sure that we found a NAME and didn't run out of arguments
 	 */
@@ -243,6 +257,30 @@ acpi_ps_build_named_op(struct acpi_walk_state *walk_state,
 
 	acpi_ps_append_arg(*op, unnamed_op->common.value.arg);
 
+#ifdef ACPI_ASL_COMPILER
+
+	/* save any comments that might be associated with unnamed_op. */
+
+	(*op)->common.inline_comment = unnamed_op->common.inline_comment;
+	(*op)->common.end_node_comment = unnamed_op->common.end_node_comment;
+	(*op)->common.close_brace_comment =
+	    unnamed_op->common.close_brace_comment;
+	(*op)->common.name_comment = unnamed_op->common.name_comment;
+	(*op)->common.comment_list = unnamed_op->common.comment_list;
+	(*op)->common.end_blk_comment = unnamed_op->common.end_blk_comment;
+	(*op)->common.cv_filename = unnamed_op->common.cv_filename;
+	(*op)->common.cv_parent_filename =
+	    unnamed_op->common.cv_parent_filename;
+	(*op)->named.aml = unnamed_op->common.aml;
+
+	unnamed_op->common.inline_comment = NULL;
+	unnamed_op->common.end_node_comment = NULL;
+	unnamed_op->common.close_brace_comment = NULL;
+	unnamed_op->common.name_comment = NULL;
+	unnamed_op->common.comment_list = NULL;
+	unnamed_op->common.end_blk_comment = NULL;
+#endif
+
 	if ((*op)->common.aml_opcode == AML_REGION_OP ||
 	    (*op)->common.aml_opcode == AML_DATA_REGION_OP) {
 		/*
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index b8f0617fd421..c343a0d5a3d2 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -652,7 +652,10 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
 
 	/* 81 */ ACPI_OP("External", ARGP_EXTERNAL_OP, ARGI_EXTERNAL_OP,
 			 ACPI_TYPE_ANY, AML_CLASS_EXECUTE, /* ? */
-			 AML_TYPE_EXEC_3A_0T_0R, AML_FLAGS_EXEC_3A_0T_0R)
+			 AML_TYPE_EXEC_3A_0T_0R, AML_FLAGS_EXEC_3A_0T_0R),
+/* 82 */ ACPI_OP("Comment", ARGP_COMMENT_OP, ARGI_COMMENT_OP,
+			 ACPI_TYPE_STRING, AML_CLASS_ARGUMENT,
+			 AML_TYPE_LITERAL, AML_CONSTANT)
 
 /*! [End] no source code translation !*/
 };
diff --git a/drivers/acpi/acpica/psopinfo.c b/drivers/acpi/acpica/psopinfo.c
index 89f95b7f26e9..eff22950232b 100644
--- a/drivers/acpi/acpica/psopinfo.c
+++ b/drivers/acpi/acpica/psopinfo.c
@@ -226,7 +226,7 @@ const u8 acpi_gbl_short_op_index[256] = {
 /* 0x90 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x73, 0x74,
 /* 0x98 */ 0x75, 0x76, _UNK, _UNK, 0x77, 0x78, 0x79, 0x7A,
 /* 0xA0 */ 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x60, 0x61,
-/* 0xA8 */ 0x62, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
+/* 0xA8 */ 0x62, 0x82, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
 /* 0xB0 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
 /* 0xB8 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
 /* 0xC0 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
diff --git a/drivers/acpi/acpica/pstree.c b/drivers/acpi/acpica/pstree.c
index a11d475b79cd..c06d6e2fc7a5 100644
--- a/drivers/acpi/acpica/pstree.c
+++ b/drivers/acpi/acpica/pstree.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acparser.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("pstree")
@@ -216,6 +217,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 
 	next = acpi_ps_get_arg(op, 0);
 	if (next) {
+		ASL_CV_LABEL_FILENODE(next);
 		return (next);
 	}
 
@@ -223,6 +225,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 
 	next = op->common.next;
 	if (next) {
+		ASL_CV_LABEL_FILENODE(next);
 		return (next);
 	}
 
@@ -233,6 +236,8 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 	while (parent) {
 		arg = acpi_ps_get_arg(parent, 0);
 		while (arg && (arg != origin) && (arg != op)) {
+
+			ASL_CV_LABEL_FILENODE(arg);
 			arg = arg->common.next;
 		}
 
@@ -247,6 +252,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 
 			/* Found sibling of parent */
 
+			ASL_CV_LABEL_FILENODE(parent->common.next);
 			return (parent->common.next);
 		}
 
@@ -254,6 +260,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 		parent = parent->common.parent;
 	}
 
+	ASL_CV_LABEL_FILENODE(next);
 	return (next);
 }
 
diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c
index 2fa38bb76a55..02642760cb93 100644
--- a/drivers/acpi/acpica/psutils.c
+++ b/drivers/acpi/acpica/psutils.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acparser.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psutils")
@@ -152,6 +153,15 @@ union acpi_parse_object *acpi_ps_alloc_op(u16 opcode, u8 *aml)
 		acpi_ps_init_op(op, opcode);
 		op->common.aml = aml;
 		op->common.flags = flags;
+		ASL_CV_CLEAR_OP_COMMENTS(op);
+
+		if (opcode == AML_SCOPE_OP) {
+			acpi_gbl_current_scope = op;
+		}
+	}
+
+	if (gbl_capture_comments) {
+		ASL_CV_TRANSFER_COMMENTS(op);
 	}
 
 	return (op);
@@ -174,6 +184,7 @@ void acpi_ps_free_op(union acpi_parse_object *op)
 {
 	ACPI_FUNCTION_NAME(ps_free_op);
 
+	ASL_CV_CLEAR_OP_COMMENTS(op);
 	if (op->common.aml_opcode == AML_INT_RETURN_VALUE_OP) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS,
 				  "Free retval op: %p\n", op));
diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c
index a3401bd29413..5594a359dbf1 100644
--- a/drivers/acpi/acpica/utalloc.c
+++ b/drivers/acpi/acpica/utalloc.c
@@ -142,6 +142,45 @@ acpi_status acpi_ut_create_caches(void)
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
+#ifdef ACPI_ASL_COMPILER
+	/*
+	 * For use with the ASL-/ASL+ option. This cache keeps track of regular
+	 * 0xA9 0x01 comments.
+	 */
+	status =
+	    acpi_os_create_cache("Acpi-Comment",
+				 sizeof(struct acpi_comment_node),
+				 ACPI_MAX_COMMENT_CACHE_DEPTH,
+				 &acpi_gbl_reg_comment_cache);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/*
+	 * This cache keeps track of the starting addresses of where the comments
+	 * lie. This helps prevent duplication of comments.
+	 */
+	status =
+	    acpi_os_create_cache("Acpi-Comment-Addr",
+				 sizeof(struct acpi_comment_addr_node),
+				 ACPI_MAX_COMMENT_CACHE_DEPTH,
+				 &acpi_gbl_comment_addr_cache);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/*
+	 * This cache will be used for nodes that represent files.
+	 */
+	status =
+	    acpi_os_create_cache("Acpi-File", sizeof(struct acpi_file_node),
+				 ACPI_MAX_COMMENT_CACHE_DEPTH,
+				 &acpi_gbl_file_cache);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+#endif
+
 #ifdef ACPI_DBG_TRACK_ALLOCATIONS
 
 	/* Memory allocation lists */
@@ -201,6 +240,17 @@ acpi_status acpi_ut_delete_caches(void)
 	(void)acpi_os_delete_cache(acpi_gbl_ps_node_ext_cache);
 	acpi_gbl_ps_node_ext_cache = NULL;
 
+#ifdef ACPI_ASL_COMPILER
+	(void)acpi_os_delete_cache(acpi_gbl_reg_comment_cache);
+	acpi_gbl_reg_comment_cache = NULL;
+
+	(void)acpi_os_delete_cache(acpi_gbl_comment_addr_cache);
+	acpi_gbl_comment_addr_cache = NULL;
+
+	(void)acpi_os_delete_cache(acpi_gbl_file_cache);
+	acpi_gbl_file_cache = NULL;
+#endif
+
 #ifdef ACPI_DBG_TRACK_ALLOCATIONS
 
 	/* Debug only - display leftover memory allocation, if any */
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index bd5ea3101eb7..615a885e2ca3 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -627,4 +627,5 @@ acpi_trace_point(acpi_trace_event_type type, u8 begin, u8 *aml, char *pathname)
 }
 
 ACPI_EXPORT_SYMBOL(acpi_trace_point)
+
 #endif
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index 07740072da55..6db3b4668b1a 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -78,6 +78,7 @@
 #define ACPI_MAX_EXTPARSE_CACHE_DEPTH   96	/* Parse tree objects */
 #define ACPI_MAX_OBJECT_CACHE_DEPTH     96	/* Interpreter operand objects */
 #define ACPI_MAX_NAMESPACE_CACHE_DEPTH  96	/* Namespace objects */
+#define ACPI_MAX_COMMENT_CACHE_DEPTH    96	/* Comments for the -ca option */
 
 /*
  * Should the subsystem abort the loading of an ACPI table if the
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 83666cb3ec2c..eb0eadcf9918 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -87,6 +87,7 @@
 #define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
 #define ACPI_SIG_WDDT           "WDDT"	/* Watchdog Timer Description Table */
 #define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
+#define ACPI_SIG_XXXX           "XXXX"	/* Intermediate AML header for ASL/ASL+ converter */
 
 #ifdef ACPI_UNDEFINED_TABLES
 /*
-- 
cgit v1.2.3


From bc5150ef3331167f08adc9fd5c1f947bd5fdd1ff Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 26 Apr 2017 16:20:17 +0800
Subject: ACPICA: Update version to 20170303

ACPICA commit db13f9ffbdf16c0b4fb92d051c14c7b96f379f3f
ACPICA commit c55bb526ac9233fe4abc5ea923e136354ce7779c

Version 20170224.
Version 20170303.

Link: https://github.com/acpica/acpica/commit/db13f9ff
Link: https://github.com/acpica/acpica/commit/c55bb526
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 3795386ea706..15c86ce4df53 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -46,7 +46,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20170119
+#define ACPI_CA_VERSION                 0x20170303
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
-- 
cgit v1.2.3


From 6524e494a8acf1a281f349e6f726918d41b5a8de Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 7 Apr 2017 14:56:58 +0800
Subject: drm/amd/powerplay: align with VBIOS to support new AVFS structure

Align the driver with the latest vbios structures.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c | 21 ++---
 drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h | 24 +++---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 91 ++++++++++++----------
 3 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
index b71525f838e6..de3d8f3f052b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
@@ -314,52 +314,45 @@ int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
 			le32_to_cpu(profile->gb_vdroop_table_ckson_a2);
 	param->ulGbFuseTableCksoffM1 =
 			le32_to_cpu(profile->avfsgb_fuse_table_cksoff_m1);
-	param->usGbFuseTableCksoffM2 =
+	param->ulGbFuseTableCksoffM2 =
 			le16_to_cpu(profile->avfsgb_fuse_table_cksoff_m2);
 	param->ulGbFuseTableCksoffB =
 			le32_to_cpu(profile->avfsgb_fuse_table_cksoff_b);
 	param->ulGbFuseTableCksonM1 =
 			le32_to_cpu(profile->avfsgb_fuse_table_ckson_m1);
-	param->usGbFuseTableCksonM2 =
+	param->ulGbFuseTableCksonM2 =
 			le16_to_cpu(profile->avfsgb_fuse_table_ckson_m2);
 	param->ulGbFuseTableCksonB =
 			le32_to_cpu(profile->avfsgb_fuse_table_ckson_b);
-	param->usMaxVoltage025mv =
-			le16_to_cpu(profile->max_voltage_0_25mv);
-	param->ucEnableGbVdroopTableCksoff =
-			profile->enable_gb_vdroop_table_cksoff;
+
 	param->ucEnableGbVdroopTableCkson =
 			profile->enable_gb_vdroop_table_ckson;
-	param->ucEnableGbFuseTableCksoff =
-			profile->enable_gb_fuse_table_cksoff;
 	param->ucEnableGbFuseTableCkson =
 			profile->enable_gb_fuse_table_ckson;
 	param->usPsmAgeComfactor =
 			le16_to_cpu(profile->psm_age_comfactor);
-	param->ucEnableApplyAvfsCksoffVoltage =
-			profile->enable_apply_avfs_cksoff_voltage;
 
 	param->ulDispclk2GfxclkM1 =
 			le32_to_cpu(profile->dispclk2gfxclk_a);
-	param->usDispclk2GfxclkM2 =
+	param->ulDispclk2GfxclkM2 =
 			le16_to_cpu(profile->dispclk2gfxclk_b);
 	param->ulDispclk2GfxclkB =
 			le32_to_cpu(profile->dispclk2gfxclk_c);
 	param->ulDcefclk2GfxclkM1 =
 			le32_to_cpu(profile->dcefclk2gfxclk_a);
-	param->usDcefclk2GfxclkM2 =
+	param->ulDcefclk2GfxclkM2 =
 			le16_to_cpu(profile->dcefclk2gfxclk_b);
 	param->ulDcefclk2GfxclkB =
 			le32_to_cpu(profile->dcefclk2gfxclk_c);
 	param->ulPixelclk2GfxclkM1 =
 			le32_to_cpu(profile->pixclk2gfxclk_a);
-	param->usPixelclk2GfxclkM2 =
+	param->ulPixelclk2GfxclkM2 =
 			le16_to_cpu(profile->pixclk2gfxclk_b);
 	param->ulPixelclk2GfxclkB =
 			le32_to_cpu(profile->pixclk2gfxclk_c);
 	param->ulPhyclk2GfxclkM1 =
 			le32_to_cpu(profile->phyclk2gfxclk_a);
-	param->usPhyclk2GfxclkM2 =
+	param->ulPhyclk2GfxclkM2 =
 			le16_to_cpu(profile->phyclk2gfxclk_b);
 	param->ulPhyclk2GfxclkB =
 			le32_to_cpu(profile->phyclk2gfxclk_c);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
index 7efe9b96cb33..be1579e87caf 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
@@ -69,7 +69,7 @@ struct pp_atomfwctrl_clock_dividers_soc15 {
 struct pp_atomfwctrl_avfs_parameters {
 	uint32_t   ulMaxVddc;
 	uint32_t   ulMinVddc;
-	uint8_t    ucMaxVidStep;
+
 	uint32_t   ulMeanNsigmaAcontant0;
 	uint32_t   ulMeanNsigmaAcontant1;
 	uint32_t   ulMeanNsigmaAcontant2;
@@ -82,30 +82,30 @@ struct pp_atomfwctrl_avfs_parameters {
 	uint32_t   ulGbVdroopTableCksonA0;
 	uint32_t   ulGbVdroopTableCksonA1;
 	uint32_t   ulGbVdroopTableCksonA2;
+
 	uint32_t   ulGbFuseTableCksoffM1;
-	uint16_t   usGbFuseTableCksoffM2;
-	uint32_t   ulGbFuseTableCksoffB;\
+	uint32_t   ulGbFuseTableCksoffM2;
+	uint32_t   ulGbFuseTableCksoffB;
+
 	uint32_t   ulGbFuseTableCksonM1;
-	uint16_t   usGbFuseTableCksonM2;
+	uint32_t   ulGbFuseTableCksonM2;
 	uint32_t   ulGbFuseTableCksonB;
-	uint16_t   usMaxVoltage025mv;
-	uint8_t    ucEnableGbVdroopTableCksoff;
+
 	uint8_t    ucEnableGbVdroopTableCkson;
-	uint8_t    ucEnableGbFuseTableCksoff;
 	uint8_t    ucEnableGbFuseTableCkson;
 	uint16_t   usPsmAgeComfactor;
-	uint8_t    ucEnableApplyAvfsCksoffVoltage;
+
 	uint32_t   ulDispclk2GfxclkM1;
-	uint16_t   usDispclk2GfxclkM2;
+	uint32_t   ulDispclk2GfxclkM2;
 	uint32_t   ulDispclk2GfxclkB;
 	uint32_t   ulDcefclk2GfxclkM1;
-	uint16_t   usDcefclk2GfxclkM2;
+	uint32_t   ulDcefclk2GfxclkM2;
 	uint32_t   ulDcefclk2GfxclkB;
 	uint32_t   ulPixelclk2GfxclkM1;
-	uint16_t   usPixelclk2GfxclkM2;
+	uint32_t   ulPixelclk2GfxclkM2;
 	uint32_t   ulPixelclk2GfxclkB;
 	uint32_t   ulPhyclk2GfxclkM1;
-	uint16_t   usPhyclk2GfxclkM2;
+	uint32_t   ulPhyclk2GfxclkM2;
 	uint32_t   ulPhyclk2GfxclkB;
 };
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 83949550edac..561b837b09be 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -2073,66 +2073,70 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 		result = pp_atomfwctrl_get_avfs_information(hwmgr, &avfs_params);
 		if (!result) {
 			pp_table->MinVoltageVid = (uint8_t)
-					convert_to_vid((uint16_t)(avfs_params.ulMaxVddc));
-			pp_table->MaxVoltageVid = (uint8_t)
 					convert_to_vid((uint16_t)(avfs_params.ulMinVddc));
-			pp_table->BtcGbVdroopTableCksOn.a0 =
-					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA0);
-			pp_table->BtcGbVdroopTableCksOn.a1 =
-					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA1);
-			pp_table->BtcGbVdroopTableCksOn.a2 =
-					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA2);
+			pp_table->MaxVoltageVid = (uint8_t)
+					convert_to_vid((uint16_t)(avfs_params.ulMaxVddc));
+
+			pp_table->AConstant[0] = cpu_to_le32(avfs_params.ulMeanNsigmaAcontant0);
+			pp_table->AConstant[1] = cpu_to_le32(avfs_params.ulMeanNsigmaAcontant1);
+			pp_table->AConstant[2] = cpu_to_le32(avfs_params.ulMeanNsigmaAcontant2);
+			pp_table->DC_tol_sigma = cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma);
+			pp_table->Platform_mean = cpu_to_le16(avfs_params.usMeanNsigmaPlatformMean);
+			pp_table->Platform_sigma = cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma);
+			pp_table->PSM_Age_CompFactor = cpu_to_le16(avfs_params.usPsmAgeComfactor);
 
 			pp_table->BtcGbVdroopTableCksOff.a0 =
 					cpu_to_le32(avfs_params.ulGbVdroopTableCksoffA0);
+			pp_table->BtcGbVdroopTableCksOff.a0_shift = 20;
 			pp_table->BtcGbVdroopTableCksOff.a1 =
 					cpu_to_le32(avfs_params.ulGbVdroopTableCksoffA1);
+			pp_table->BtcGbVdroopTableCksOff.a1_shift = 20;
 			pp_table->BtcGbVdroopTableCksOff.a2 =
 					cpu_to_le32(avfs_params.ulGbVdroopTableCksoffA2);
+			pp_table->BtcGbVdroopTableCksOff.a2_shift = 20;
+
+			pp_table->OverrideBtcGbCksOn = avfs_params.ucEnableGbVdroopTableCkson;
+			pp_table->BtcGbVdroopTableCksOn.a0 =
+					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA0);
+			pp_table->BtcGbVdroopTableCksOn.a0_shift = 20;
+			pp_table->BtcGbVdroopTableCksOn.a1 =
+					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA1);
+			pp_table->BtcGbVdroopTableCksOn.a1_shift = 20;
+			pp_table->BtcGbVdroopTableCksOn.a2 =
+					cpu_to_le32(avfs_params.ulGbVdroopTableCksonA2);
+			pp_table->BtcGbVdroopTableCksOn.a2_shift = 20;
 
 			pp_table->AvfsGbCksOn.m1 =
 					cpu_to_le32(avfs_params.ulGbFuseTableCksonM1);
 			pp_table->AvfsGbCksOn.m2 =
-					cpu_to_le16(avfs_params.usGbFuseTableCksonM2);
+					cpu_to_le16(avfs_params.ulGbFuseTableCksonM2);
 			pp_table->AvfsGbCksOn.b =
 					cpu_to_le32(avfs_params.ulGbFuseTableCksonB);
 			pp_table->AvfsGbCksOn.m1_shift = 24;
 			pp_table->AvfsGbCksOn.m2_shift = 12;
+			pp_table->AvfsGbCksOn.b_shift = 0;
 
+			pp_table->OverrideAvfsGbCksOn =
+					avfs_params.ucEnableGbFuseTableCkson;
 			pp_table->AvfsGbCksOff.m1 =
 					cpu_to_le32(avfs_params.ulGbFuseTableCksoffM1);
 			pp_table->AvfsGbCksOff.m2 =
-					cpu_to_le16(avfs_params.usGbFuseTableCksoffM2);
+					cpu_to_le16(avfs_params.ulGbFuseTableCksoffM2);
 			pp_table->AvfsGbCksOff.b =
 					cpu_to_le32(avfs_params.ulGbFuseTableCksoffB);
 			pp_table->AvfsGbCksOff.m1_shift = 24;
 			pp_table->AvfsGbCksOff.m2_shift = 12;
-
-			pp_table->AConstant[0] =
-					cpu_to_le32(avfs_params.ulMeanNsigmaAcontant0);
-			pp_table->AConstant[1] =
-					cpu_to_le32(avfs_params.ulMeanNsigmaAcontant1);
-			pp_table->AConstant[2] =
-					cpu_to_le32(avfs_params.ulMeanNsigmaAcontant2);
-			pp_table->DC_tol_sigma =
-					cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma);
-			pp_table->Platform_mean =
-					cpu_to_le16(avfs_params.usMeanNsigmaPlatformMean);
-			pp_table->PSM_Age_CompFactor =
-					cpu_to_le16(avfs_params.usPsmAgeComfactor);
-			pp_table->Platform_sigma =
-					cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma);
-
-			for (i = 0; i < dep_table->count; i++)
-				pp_table->StaticVoltageOffsetVid[i] = (uint8_t)
-						(dep_table->entries[i].sclk_offset *
+			pp_table->AvfsGbCksOff.b_shift = 0;
+
+			for (i = 0; i < dep_table->count; i++) {
+				if (dep_table->entries[i].sclk_offset == 0)
+					pp_table->StaticVoltageOffsetVid[i] = 248;
+				else
+					pp_table->StaticVoltageOffsetVid[i] =
+						(uint8_t)(dep_table->entries[i].sclk_offset *
 								VOLTAGE_VID_OFFSET_SCALE2 /
 								VOLTAGE_VID_OFFSET_SCALE1);
-
-			pp_table->OverrideBtcGbCksOn =
-					avfs_params.ucEnableGbVdroopTableCkson;
-			pp_table->OverrideAvfsGbCksOn =
-					avfs_params.ucEnableGbFuseTableCkson;
+			}
 
 			if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
 					data->disp_clk_quad_eqn_a) &&
@@ -2141,20 +2145,21 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m1 =
 						(int32_t)data->disp_clk_quad_eqn_a;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m2 =
-						(int16_t)data->disp_clk_quad_eqn_b;
+						(int32_t)data->disp_clk_quad_eqn_b;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b =
 						(int32_t)data->disp_clk_quad_eqn_c;
 			} else {
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m1 =
 						(int32_t)avfs_params.ulDispclk2GfxclkM1;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m2 =
-						(int16_t)avfs_params.usDispclk2GfxclkM2;
+						(int32_t)avfs_params.ulDispclk2GfxclkM2;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b =
 						(int32_t)avfs_params.ulDispclk2GfxclkB;
 			}
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m2_shift = 12;
+			pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b_shift = 0;
 
 			if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
 					data->dcef_clk_quad_eqn_a) &&
@@ -2163,20 +2168,21 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m1 =
 						(int32_t)data->dcef_clk_quad_eqn_a;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m2 =
-						(int16_t)data->dcef_clk_quad_eqn_b;
+						(int32_t)data->dcef_clk_quad_eqn_b;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b =
 						(int32_t)data->dcef_clk_quad_eqn_c;
 			} else {
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m1 =
 						(int32_t)avfs_params.ulDcefclk2GfxclkM1;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m2 =
-						(int16_t)avfs_params.usDcefclk2GfxclkM2;
+						(int32_t)avfs_params.ulDcefclk2GfxclkM2;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b =
 						(int32_t)avfs_params.ulDcefclk2GfxclkB;
 			}
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m2_shift = 12;
+			pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b_shift = 0;
 
 			if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
 					data->pixel_clk_quad_eqn_a) &&
@@ -2185,14 +2191,14 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m1 =
 						(int32_t)data->pixel_clk_quad_eqn_a;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m2 =
-						(int16_t)data->pixel_clk_quad_eqn_b;
+						(int32_t)data->pixel_clk_quad_eqn_b;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].b =
 						(int32_t)data->pixel_clk_quad_eqn_c;
 			} else {
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m1 =
 						(int32_t)avfs_params.ulPixelclk2GfxclkM1;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m2 =
-						(int16_t)avfs_params.usPixelclk2GfxclkM2;
+						(int32_t)avfs_params.ulPixelclk2GfxclkM2;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].b =
 						(int32_t)avfs_params.ulPixelclk2GfxclkB;
 			}
@@ -2207,20 +2213,21 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1 =
 						(int32_t)data->phy_clk_quad_eqn_a;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2 =
-						(int16_t)data->phy_clk_quad_eqn_b;
+						(int32_t)data->phy_clk_quad_eqn_b;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b =
 						(int32_t)data->phy_clk_quad_eqn_c;
 			} else {
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1 =
 						(int32_t)avfs_params.ulPhyclk2GfxclkM1;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2 =
-						(int16_t)avfs_params.usPhyclk2GfxclkM2;
+						(int32_t)avfs_params.ulPhyclk2GfxclkM2;
 				pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b =
 						(int32_t)avfs_params.ulPhyclk2GfxclkB;
 			}
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2_shift = 12;
+			pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b_shift = 0;
 		} else {
 			data->smu_features[GNLD_AVFS].supported = false;
 		}
-- 
cgit v1.2.3


From 6f2b1fcccb6dad1d68c0955144af3ad56bacb25c Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 21 Mar 2017 16:18:11 +0800
Subject: drm/amdgpu: split psp tmr init function

Rework in order to properly support suspend.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ed6e5799016e..6aba417b7c95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -152,11 +152,6 @@ static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd,
 static int psp_tmr_init(struct psp_context *psp)
 {
 	int ret;
-	struct psp_gfx_cmd_resp *cmd;
-
-	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
-	if (!cmd)
-		return -ENOMEM;
 
 	/*
 	 * Allocate 3M memory aligned to 1M from Frame Buffer (local
@@ -168,22 +163,30 @@ static int psp_tmr_init(struct psp_context *psp)
 	ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000,
 				      AMDGPU_GEM_DOMAIN_VRAM,
 				      &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
-	if (ret)
-		goto failed;
+
+	return ret;
+}
+
+static int psp_tmr_load(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
 
 	psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr, 1);
 	if (ret)
-		goto failed_mem;
+		goto failed;
 
 	kfree(cmd);
 
 	return 0;
 
-failed_mem:
-	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
 failed:
 	kfree(cmd);
 	return ret;
@@ -298,6 +301,10 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	if (ret)
 		goto failed_mem;
 
+	ret = psp_tmr_load(psp);
+	if (ret)
+		goto failed_mem;
+
 	ret = psp_asd_load(psp);
 	if (ret)
 		goto failed_mem;
-- 
cgit v1.2.3


From 53a5cf57d819f12b2de365aa3137c9175a032608 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 21 Mar 2017 16:51:00 +0800
Subject: drm/amdgpu: add psp firmware private memory

Needed for proper suspend support.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 23 +++++++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  6 ++++++
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 6aba417b7c95..f70ab550934c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -275,17 +275,25 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	if (!cmd)
 		return -ENOMEM;
 
-	ret = psp_bootloader_load_sysdrv(psp);
+	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &psp->fw_pri_bo,
+				      &psp->fw_pri_mc_addr,
+				      &psp->fw_pri_buf);
 	if (ret)
 		goto failed;
 
+	ret = psp_bootloader_load_sysdrv(psp);
+	if (ret)
+		goto failed_mem1;
+
 	ret = psp_bootloader_load_sos(psp);
 	if (ret)
-		goto failed;
+		goto failed_mem1;
 
 	ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
 	if (ret)
-		goto failed;
+		goto failed_mem1;
 
 	ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
 				      AMDGPU_GEM_DOMAIN_VRAM,
@@ -293,7 +301,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
 				      &psp->fence_buf_mc_addr,
 				      &psp->fence_buf);
 	if (ret)
-		goto failed;
+		goto failed_mem1;
 
 	memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
 
@@ -343,6 +351,9 @@ static int psp_load_fw(struct amdgpu_device *adev)
 failed_mem:
 	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
 			      &psp->fence_buf_mc_addr, &psp->fence_buf);
+failed_mem1:
+	amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
 failed:
 	kfree(cmd);
 	return ret;
@@ -392,6 +403,10 @@ static int psp_hw_fini(void *handle)
 	if (psp->tmr_buf)
 		amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
 
+	if (psp->fw_pri_buf)
+		amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+				      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index e9f35e025b59..b309b6a62c65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -32,6 +32,7 @@
 #define PSP_CMD_BUFFER_SIZE	0x1000
 #define PSP_ASD_BIN_SIZE	0x40000
 #define PSP_ASD_SHARED_MEM_SIZE	0x4000
+#define PSP_1_MEG		0x100000
 
 enum psp_ring_type
 {
@@ -71,6 +72,11 @@ struct psp_context
 				  enum AMDGPU_UCODE_ID ucode_type);
 	bool (*smu_reload_quirk)(struct psp_context *psp);
 
+	/* fence buffer */
+	struct amdgpu_bo 		*fw_pri_bo;
+	uint64_t 			fw_pri_mc_addr;
+	void				*fw_pri_buf;
+
 	/* sos firmware */
 	const struct firmware		*sos_fw;
 	uint32_t			sos_fw_version;
-- 
cgit v1.2.3


From 2b0c3aee2172451e9f982b25f3fdf59a1b687dc3 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Wed, 22 Mar 2017 10:16:05 +0800
Subject: drm/amdgpu: use private memory to store psp firmware data

Rework in order to properly support suspend.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 28 ++++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  1 -
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c   | 48 ++++++---------------------------
 3 files changed, 16 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index f70ab550934c..ed9c04b7a286 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -209,9 +209,9 @@ static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd,
 static int psp_asd_load(struct psp_context *psp)
 {
 	int ret;
-	struct amdgpu_bo *asd_bo, *asd_shared_bo;
-	uint64_t asd_mc_addr, asd_shared_mc_addr;
-	void *asd_buf, *asd_shared_buf;
+	struct amdgpu_bo *asd_shared_bo;
+	uint64_t asd_shared_mc_addr;
+	void *asd_shared_buf;
 	struct psp_gfx_cmd_resp *cmd;
 
 	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
@@ -224,38 +224,26 @@ static int psp_asd_load(struct psp_context *psp)
 	 */
 	ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, PAGE_SIZE,
 				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &asd_shared_bo, &asd_shared_mc_addr, &asd_buf);
+				      &asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
 	if (ret)
 		goto failed;
 
-	/*
-	 * Allocate 256k memory aligned to 4k from Frame Buffer (local
-	 * physical) for ASD firmware
-	 */
-	ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_BIN_SIZE, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &asd_bo, &asd_mc_addr, &asd_buf);
-	if (ret)
-		goto failed_mem;
+	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+	memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size);
 
-	memcpy(asd_buf, psp->asd_start_addr, psp->asd_ucode_size);
-
-	psp_prep_asd_cmd_buf(cmd, asd_mc_addr, asd_shared_mc_addr,
+	psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, asd_shared_mc_addr,
 			     psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr, 2);
 	if (ret)
-		goto failed_mem1;
+		goto failed_mem;
 
-	amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf);
 	amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
 	kfree(cmd);
 
 	return 0;
 
-failed_mem1:
-	amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf);
 failed_mem:
 	amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
 failed:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index b309b6a62c65..125a5dc0c0e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -30,7 +30,6 @@
 
 #define PSP_FENCE_BUFFER_SIZE	0x1000
 #define PSP_CMD_BUFFER_SIZE	0x1000
-#define PSP_ASD_BIN_SIZE	0x40000
 #define PSP_ASD_SHARED_MEM_SIZE	0x4000
 #define PSP_1_MEG		0x100000
 
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index c3588d1c7cb0..aae6b6540161 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -166,11 +166,8 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
 {
 	int ret;
 	uint32_t psp_gfxdrv_command_reg = 0;
-	struct amdgpu_bo *psp_sysdrv;
-	void *psp_sysdrv_virt = NULL;
-	uint64_t psp_sysdrv_mem;
 	struct amdgpu_device *adev = psp->adev;
-	uint32_t size, sol_reg;
+	uint32_t sol_reg;
 
 	/* Check sOS sign of life register to confirm sys driver and sOS
 	 * are already been loaded.
@@ -185,27 +182,14 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
 	if (ret)
 		return ret;
 
-	/*
-	 * Create a 1 meg GART memory to store the psp sys driver
-	 * binary with a 1 meg aligned address
-	 */
-	size = (psp->sys_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) &
-		(~(PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1));
-
-	ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &psp_sysdrv,
-				      &psp_sysdrv_mem,
-				      &psp_sysdrv_virt);
-	if (ret)
-		return ret;
+	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
 
 	/* Copy PSP System Driver binary to memory */
-	memcpy(psp_sysdrv_virt, psp->sys_start_addr, psp->sys_bin_size);
+	memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
 
 	/* Provide the sys driver to bootrom */
 	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36),
-	       (uint32_t)(psp_sysdrv_mem >> 20));
+	       (uint32_t)(psp->fw_pri_mc_addr >> 20));
 	psp_gfxdrv_command_reg = 1 << 16;
 	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
 	       psp_gfxdrv_command_reg);
@@ -216,8 +200,6 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
 	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
 			   0x80000000, 0x80000000, false);
 
-	amdgpu_bo_free_kernel(&psp_sysdrv, &psp_sysdrv_mem, &psp_sysdrv_virt);
-
 	return ret;
 }
 
@@ -225,11 +207,8 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
 {
 	int ret;
 	unsigned int psp_gfxdrv_command_reg = 0;
-	struct amdgpu_bo *psp_sos;
-	void *psp_sos_virt = NULL;
-	uint64_t psp_sos_mem;
 	struct amdgpu_device *adev = psp->adev;
-	uint32_t size, sol_reg;
+	uint32_t sol_reg;
 
 	/* Check sOS sign of life register to confirm sys driver and sOS
 	 * are already been loaded.
@@ -244,23 +223,14 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
 	if (ret)
 		return ret;
 
-	size = (psp->sos_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) &
-		(~((uint64_t)PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1));
-
-	ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &psp_sos,
-				      &psp_sos_mem,
-				      &psp_sos_virt);
-	if (ret)
-		return ret;
+	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
 
 	/* Copy Secure OS binary to PSP memory */
-	memcpy(psp_sos_virt, psp->sos_start_addr, psp->sos_bin_size);
+	memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
 
 	/* Provide the PSP secure OS to bootrom */
 	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36),
-	       (uint32_t)(psp_sos_mem >> 20));
+	       (uint32_t)(psp->fw_pri_mc_addr >> 20));
 	psp_gfxdrv_command_reg = 2 << 16;
 	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
 	       psp_gfxdrv_command_reg);
@@ -273,8 +243,6 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
 			   0, true);
 #endif
 
-	amdgpu_bo_free_kernel(&psp_sos, &psp_sos_mem, &psp_sos_virt);
-
 	return ret;
 }
 
-- 
cgit v1.2.3


From f5cfef98f736f9aa42e9ad41e67b5abd96b77835 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 21 Mar 2017 18:02:04 +0800
Subject: drm/amdgpu: split psp asd function

Rework in order to properly support suspend.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 45 ++++++++++++++++-----------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  5 +++-
 2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ed9c04b7a286..ea5616036bf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -206,48 +206,43 @@ static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd,
 	cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
 }
 
+static int psp_asd_init(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * Allocate 16k memory aligned to 4k from Frame Buffer (local
+	 * physical) for shared ASD <-> Driver
+	 */
+	ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+				      &psp->asd_shared_bo,
+				      &psp->asd_shared_mc_addr,
+				      &psp->asd_shared_buf);
+
+	return ret;
+}
+
 static int psp_asd_load(struct psp_context *psp)
 {
 	int ret;
-	struct amdgpu_bo *asd_shared_bo;
-	uint64_t asd_shared_mc_addr;
-	void *asd_shared_buf;
 	struct psp_gfx_cmd_resp *cmd;
 
 	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
 	if (!cmd)
 		return -ENOMEM;
 
-	/*
-	 * Allocate 16k memory aligned to 4k from Frame Buffer (local
-	 * physical) for shared ASD <-> Driver
-	 */
-	ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
-	if (ret)
-		goto failed;
-
 	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
 	memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size);
 
-	psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, asd_shared_mc_addr,
+	psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->asd_shared_mc_addr,
 			     psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr, 2);
-	if (ret)
-		goto failed_mem;
 
-	amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
 	kfree(cmd);
 
-	return 0;
-
-failed_mem:
-	amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
-failed:
-	kfree(cmd);
 	return ret;
 }
 
@@ -301,6 +296,10 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	if (ret)
 		goto failed_mem;
 
+	ret = psp_asd_init(psp);
+	if (ret)
+		goto failed_mem;
+
 	ret = psp_asd_load(psp);
 	if (ret)
 		goto failed_mem;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 125a5dc0c0e1..1f1f057c7c42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -90,12 +90,15 @@ struct psp_context
 	uint64_t 			tmr_mc_addr;
 	void				*tmr_buf;
 
-	/* asd firmware */
+	/* asd firmware and buffer */
 	const struct firmware		*asd_fw;
 	uint32_t			asd_fw_version;
 	uint32_t			asd_feature_version;
 	uint32_t			asd_ucode_size;
 	uint8_t				*asd_start_addr;
+	struct amdgpu_bo 		*asd_shared_bo;
+	uint64_t 			asd_shared_mc_addr;
+	void				*asd_shared_buf;
 
 	/* fence buffer */
 	struct amdgpu_bo 		*fence_buf_bo;
-- 
cgit v1.2.3


From be70bbda3fb9a1b876ed80e2ebc292203eb0ffec Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 21 Mar 2017 18:36:57 +0800
Subject: drm/amdgpu: split psp ring init function

Rework in order to properly support suspend.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 115 ++++++++++++++++++++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |   3 +
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c   |  11 ++-
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.h   |   2 +
 4 files changed, 87 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ea5616036bf6..0d20dc3ea84e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -55,6 +55,7 @@ static int psp_sw_init(void *handle)
 		psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos;
 		psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf;
 		psp->ring_init = psp_v3_1_ring_init;
+		psp->ring_create = psp_v3_1_ring_create;
 		psp->cmd_submit = psp_v3_1_cmd_submit;
 		psp->compare_sram_data = psp_v3_1_compare_sram_data;
 		psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
@@ -246,18 +247,79 @@ static int psp_asd_load(struct psp_context *psp)
 	return ret;
 }
 
-static int psp_load_fw(struct amdgpu_device *adev)
+static int psp_hw_start(struct psp_context *psp)
 {
 	int ret;
-	struct psp_gfx_cmd_resp *cmd;
-	int i;
+
+	ret = psp_bootloader_load_sysdrv(psp);
+	if (ret)
+		return ret;
+
+	ret = psp_bootloader_load_sos(psp);
+	if (ret)
+		return ret;
+
+	ret = psp_ring_create(psp, PSP_RING_TYPE__KM);
+	if (ret)
+		return ret;
+
+	ret = psp_tmr_load(psp);
+	if (ret)
+		return ret;
+
+	ret = psp_asd_load(psp);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int psp_np_fw_load(struct psp_context *psp)
+{
+	int i, ret;
 	struct amdgpu_firmware_info *ucode;
+	struct amdgpu_device* adev = psp->adev;
+
+	for (i = 0; i < adev->firmware.max_ucodes; i++) {
+		ucode = &adev->firmware.ucode[i];
+		if (!ucode->fw)
+			continue;
+
+		if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
+		    psp_smu_reload_quirk(psp))
+			continue;
+
+		ret = psp_prep_cmd_buf(ucode, psp->cmd);
+		if (ret)
+			return ret;
+
+		ret = psp_cmd_submit_buf(psp, ucode, psp->cmd,
+					 psp->fence_buf_mc_addr, i + 3);
+		if (ret)
+			return ret;
+
+#if 0
+		/* check if firmware loaded sucessfully */
+		if (!amdgpu_psp_check_fw_loading_status(adev, i))
+			return -EINVAL;
+#endif
+	}
+
+	return 0;
+}
+
+static int psp_load_fw(struct amdgpu_device *adev)
+{
+	int ret;
 	struct psp_context *psp = &adev->psp;
+	struct psp_gfx_cmd_resp *cmd;
 
 	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
 	if (!cmd)
 		return -ENOMEM;
 
+	psp->cmd = cmd;
+
 	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
 				      AMDGPU_GEM_DOMAIN_GTT,
 				      &psp->fw_pri_bo,
@@ -266,18 +328,6 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	if (ret)
 		goto failed;
 
-	ret = psp_bootloader_load_sysdrv(psp);
-	if (ret)
-		goto failed_mem1;
-
-	ret = psp_bootloader_load_sos(psp);
-	if (ret)
-		goto failed_mem1;
-
-	ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
-	if (ret)
-		goto failed_mem1;
-
 	ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
 				      AMDGPU_GEM_DOMAIN_VRAM,
 				      &psp->fence_buf_bo,
@@ -288,11 +338,11 @@ static int psp_load_fw(struct amdgpu_device *adev)
 
 	memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
 
-	ret = psp_tmr_init(psp);
+	ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
 	if (ret)
-		goto failed_mem;
+		goto failed_mem1;
 
-	ret = psp_tmr_load(psp);
+	ret = psp_tmr_init(psp);
 	if (ret)
 		goto failed_mem;
 
@@ -300,34 +350,13 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	if (ret)
 		goto failed_mem;
 
-	ret = psp_asd_load(psp);
+	ret = psp_hw_start(psp);
 	if (ret)
 		goto failed_mem;
 
-	for (i = 0; i < adev->firmware.max_ucodes; i++) {
-		ucode = &adev->firmware.ucode[i];
-		if (!ucode->fw)
-			continue;
-
-		if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
-		    psp_smu_reload_quirk(psp))
-			continue;
-
-		ret = psp_prep_cmd_buf(ucode, cmd);
-		if (ret)
-			goto failed_mem;
-
-		ret = psp_cmd_submit_buf(psp, ucode, cmd,
-					 psp->fence_buf_mc_addr, i + 3);
-		if (ret)
-			goto failed_mem;
-
-#if 0
-		/* check if firmware loaded sucessfully */
-		if (!amdgpu_psp_check_fw_loading_status(adev, i))
-			return -EINVAL;
-#endif
-	}
+	ret = psp_np_fw_load(psp);
+	if (ret)
+		goto failed_mem;
 
 	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
 			      &psp->fence_buf_mc_addr, &psp->fence_buf);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 1f1f057c7c42..28faba5b7dd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -57,6 +57,7 @@ struct psp_context
 {
 	struct amdgpu_device            *adev;
 	struct psp_ring                 km_ring;
+	struct psp_gfx_cmd_resp		*cmd;
 
 	int (*init_microcode)(struct psp_context *psp);
 	int (*bootloader_load_sysdrv)(struct psp_context *psp);
@@ -64,6 +65,7 @@ struct psp_context
 	int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode,
 			    struct psp_gfx_cmd_resp *cmd);
 	int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
+	int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type);
 	int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode,
 			  uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index);
 	bool (*compare_sram_data)(struct psp_context *psp,
@@ -113,6 +115,7 @@ struct amdgpu_psp_funcs {
 
 #define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type))
 #define psp_ring_init(psp, type) (psp)->ring_init((psp), (type))
+#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type))
 #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
 		(psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
 #define psp_compare_sram_data(psp, ucode, type) \
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index aae6b6540161..d351583785e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -268,7 +268,6 @@ int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd
 int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
 {
 	int ret = 0;
-	unsigned int psp_ring_reg = 0;
 	struct psp_ring *ring;
 	struct amdgpu_device *adev = psp->adev;
 
@@ -288,6 +287,16 @@ int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
 		return ret;
 	}
 
+	return 0;
+}
+
+int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
+{
+	int ret = 0;
+	unsigned int psp_ring_reg = 0;
+	struct psp_ring *ring = &psp->km_ring;
+	struct amdgpu_device *adev = psp->adev;
+
 	/* Write low address of the ring to C2PMSG_69 */
 	psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
 	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_69), psp_ring_reg);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
index e82eff741a08..5230d27867c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
@@ -39,6 +39,8 @@ extern int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
 				 struct psp_gfx_cmd_resp *cmd);
 extern int psp_v3_1_ring_init(struct psp_context *psp,
 			      enum psp_ring_type ring_type);
+extern int psp_v3_1_ring_create(struct psp_context *psp,
+				enum psp_ring_type ring_type);
 extern int psp_v3_1_cmd_submit(struct psp_context *psp,
 			       struct amdgpu_firmware_info *ucode,
 			       uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
-- 
cgit v1.2.3


From 93ea9b9f7cec596adac13b944a122e4c69de18a4 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Thu, 23 Mar 2017 11:20:25 +0800
Subject: drm/amdgpu: add hw_start and non-psp firmware loading into resume

Rework in order to properly support suspend.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 0d20dc3ea84e..68ccaedad0f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -435,18 +435,30 @@ static int psp_resume(void *handle)
 {
 	int ret;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct psp_context *psp = &adev->psp;
 
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 		return 0;
 
+	DRM_INFO("PSP is resuming...\n");
+
 	mutex_lock(&adev->firmware.mutex);
 
-	ret = psp_load_fw(adev);
+	ret = psp_hw_start(psp);
 	if (ret)
-		DRM_ERROR("PSP resume failed\n");
+		goto failed;
+
+	ret = psp_np_fw_load(psp);
+	if (ret)
+		goto failed;
 
 	mutex_unlock(&adev->firmware.mutex);
 
+	return 0;
+
+failed:
+	DRM_ERROR("PSP resume failed\n");
+	mutex_unlock(&adev->firmware.mutex);
 	return ret;
 }
 
-- 
cgit v1.2.3


From b1bb8c0118b3b9d44a6a31ea5242bdd9ed040a9b Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Fri, 7 Apr 2017 07:53:31 -0400
Subject: drm/amd/amdgpu: Introduce new read/write macros for SOC15
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h | 20 +++++++++++++++++++-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6a8129949333..c4f6211640da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1704,9 +1704,6 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
 #define WREG32_FIELD_OFFSET(reg, offset, field, val)	\
 	WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
 
-#define WREG32_FIELD15(ip, idx, reg, field, val)	\
-	WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
-
 /*
  * BIOS helpers.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 2b96c806baa1..e8df6d820dbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -45,13 +45,31 @@ struct nbio_pcie_index_data {
 	u32 index_offset;
 	u32 data_offset;
 };
-// Register Access Macro
+
+/* Register Access Macros */
 #define SOC15_REG_OFFSET(ip, inst, reg)       (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
                                                 (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
                                                     (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
                                                         (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
                                                             (ip##_BASE__INST##inst##_SEG4 + reg)))))
 
+#define WREG32_FIELD15(ip, idx, reg, field, val)	\
+	WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
+#define RREG32_SOC15(ip, inst, reg) \
+	RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
+		(1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
+		(2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
+		(3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
+		(ip##_BASE__INST##inst##_SEG4 + reg))))))
+
+#define WREG32_SOC15(ip, inst, reg, value) \
+	WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
+		(1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
+		(2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
+		(3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
+		(ip##_BASE__INST##inst##_SEG4 + reg))))), value)
+
 #endif
 
 
-- 
cgit v1.2.3


From 5e78835abdabc303ba980f50e9f71039e7689cc9 Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Fri, 7 Apr 2017 07:53:53 -0400
Subject: drm/amd/amdgpu: Port gfx9 driver over to new read/write macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 406 +++++++++++++++++-----------------
 1 file changed, 203 insertions(+), 203 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a447b70841c9..e58fb05bc904 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -705,19 +705,19 @@ static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
 
 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
 {
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX),
+	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
 		(SQ_IND_INDEX__FORCE_READ_MASK));
-	return RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA));
+	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
 }
 
 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
 			   uint32_t wave, uint32_t thread,
 			   uint32_t regno, uint32_t num, uint32_t *out)
 {
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX),
+	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
@@ -725,7 +725,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
 		(SQ_IND_INDEX__FORCE_READ_MASK) |
 		(SQ_IND_INDEX__AUTO_INCR_MASK));
 	while (num--)
-		*(out++) = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA));
+		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
 }
 
 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
@@ -953,7 +953,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
 	size = adev->gfx.ngg.buf[POS].size / 256;
 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_1), data);
+	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
 
 	data = 0;
 	size = adev->gfx.ngg.buf[CNTL].size / 256;
@@ -962,32 +962,32 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
 	size = adev->gfx.ngg.buf[PARAM].size / 1024;
 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_2), data);
+	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
 
 	/* Program buffer base address */
 	base = lower_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr);
 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE), data);
+	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
 
 	base = upper_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr);
 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE_HI), data);
+	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
 
 	base = lower_32_bits(adev->gfx.ngg.buf[POS].gpu_addr);
 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE), data);
+	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
 
 	base = upper_32_bits(adev->gfx.ngg.buf[POS].gpu_addr);
 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE_HI), data);
+	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
 
 	base = lower_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr);
 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE), data);
+	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
 
 	base = upper_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr);
 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI), data);
+	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
 
 	/* Clear GDS reserved memory */
 	r = amdgpu_ring_alloc(ring, 17);
@@ -1203,7 +1203,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
 	}
-	WREG32( SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
 }
 
 static u32 gfx_v9_0_create_bitmask(u32 bit_width)
@@ -1215,8 +1215,8 @@ static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
 
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_RB_BACKEND_DISABLE));
-	data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_RB_BACKEND_DISABLE));
+	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
+	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
 
 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
@@ -1276,8 +1276,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
 		soc15_grbm_select(adev, 0, 0, 0, i);
 		/* CP and shaders */
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+		WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
 	}
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
@@ -1304,8 +1304,8 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 		tmp = 0;
 		tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
 				    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), tmp);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), 0);
+		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
+		WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
 	}
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 
@@ -1320,7 +1320,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 	 */
 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE),
+	WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
@@ -1343,7 +1343,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
 			for (k = 0; k < adev->usec_timeout; k++) {
-				if (RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY)) == 0)
+				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
 					break;
 				udelay(1);
 			}
@@ -1357,7 +1357,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
 	for (k = 0; k < adev->usec_timeout; k++) {
-		if ((RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY)) & mask) == 0)
+		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
 			break;
 		udelay(1);
 	}
@@ -1366,7 +1366,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 					       bool enable)
 {
-	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
+	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
 
 	if (enable)
 		return;
@@ -1376,15 +1376,15 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), tmp);
+	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
 }
 
 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
 {
-	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
+	u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
 
 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp);
 
 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
 
@@ -1415,17 +1415,17 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
 
 #ifdef AMDGPU_RLC_DEBUG_RETRY
 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
-	rlc_ucode_ver = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6));
+	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
 	if(rlc_ucode_ver == 0x108) {
 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
 		 * default is 0x9C4 to create a 100us interval */
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3), 0x9C4);
+		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
 		 * to disable the page fault retry interrupts, default is 
 		 * 0x100 (256) */
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12), 0x100);
+		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
 	}
 #endif
 }
@@ -1446,11 +1446,11 @@ static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR),
+	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
 			RLCG_UCODE_LOADING_START_ADDRESS);
 	for (i = 0; i < fw_size; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA), le32_to_cpup(fw_data++));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), adev->gfx.rlc_fw_version);
+		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
+	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
 
 	return 0;
 }
@@ -1465,10 +1465,10 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
 	gfx_v9_0_rlc_stop(adev);
 
 	/* disable CG */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), 0);
+	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
 
 	/* disable PG */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), 0);
+	WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
 
 	gfx_v9_0_rlc_reset(adev);
 
@@ -1487,7 +1487,7 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 {
 	int i;
-	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL));
+	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
 
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
@@ -1496,7 +1496,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 			adev->gfx.gfx_ring[i].ready = false;
 	}
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
 	udelay(50);
 }
 
@@ -1529,30 +1529,30 @@ static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
 		(adev->gfx.pfp_fw->data +
 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), 0);
+	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
 	for (i = 0; i < fw_size; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA), le32_to_cpup(fw_data++));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), adev->gfx.pfp_fw_version);
+		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
+	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
 
 	/* CE */
 	fw_data = (const __le32 *)
 		(adev->gfx.ce_fw->data +
 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), 0);
+	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
 	for (i = 0; i < fw_size; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA), le32_to_cpup(fw_data++));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), adev->gfx.ce_fw_version);
+		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
+	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
 
 	/* ME */
 	fw_data = (const __le32 *)
 		(adev->gfx.me_fw->data +
 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), 0);
+	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
 	for (i = 0; i < fw_size; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_DATA), le32_to_cpup(fw_data++));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), adev->gfx.me_fw_version);
+		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
+	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
 
 	return 0;
 }
@@ -1594,8 +1594,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
 	int r, i;
 
 	/* init the CP */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT), adev->gfx.config.max_hw_contexts - 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID), 1);
+	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
+	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
 
 	gfx_v9_0_cp_gfx_enable(adev, true);
 
@@ -1650,10 +1650,10 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
 
 	/* Set the write pointer delay */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY), 0);
+	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
 
 	/* set the RB to use vmid 0 */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID), 0);
+	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
 
 	/* Set ring buffer size */
 	ring = &adev->gfx.gfx_ring[0];
@@ -1663,30 +1663,30 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
 #ifdef __BIG_ENDIAN
 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
 #endif
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
 
 	/* Initialize the ring buffer's write pointers */
 	ring->wptr = 0;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr));
+	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
 
 	/* set the wb address wether it's enabled or not */
 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR), lower_32_bits(rptr_addr));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI), upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
 
 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr));
+	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
+	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
 
 	mdelay(1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
 
 	rb_addr = ring->gpu_addr >> 8;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE), rb_addr);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI), upper_32_bits(rb_addr));
+	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
+	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
 
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
 	if (ring->use_doorbell) {
 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
 				    DOORBELL_OFFSET, ring->doorbell_index);
@@ -1695,13 +1695,13 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
 	} else {
 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
 	}
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
 
 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER), tmp);
+	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER),
+	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
 
 
@@ -1717,9 +1717,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 	int i;
 
 	if (enable) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), 0);
+		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
 	} else {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL),
+		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
 			adev->gfx.compute_ring[i].ready = false;
@@ -1756,21 +1756,21 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 	tmp = 0;
 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_LO),
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_HI),
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
  
 	/* MEC1 */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
 			 mec_hdr->jt_offset);
 	for (i = 0; i < mec_hdr->jt_size; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA),
+		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
 			adev->gfx.mec_fw_version);
 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
 
@@ -1823,12 +1823,12 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 
 	/* tell RLC which is KIQ queue */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
 	tmp &= 0xffffff00;
 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp);
+	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 	tmp |= 0x80;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp);
+	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 }
 
 static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring)
@@ -1898,14 +1898,14 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
 			(order_base_2(MEC_HPD_SIZE / 4) - 1));
 
 	mqd->cp_hqd_eop_control = tmp;
 
 	/* enable doorbell? */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 
 	if (ring->use_doorbell) {
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -1935,7 +1935,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
 
 	/* set MQD vmid to 0 */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
 	mqd->cp_mqd_control = tmp;
 
@@ -1945,7 +1945,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
 			    (order_base_2(ring->ring_size / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
@@ -1973,7 +1973,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	tmp = 0;
 	/* enable the doorbell if requested */
 	if (ring->use_doorbell) {
-		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 				DOORBELL_OFFSET, ring->doorbell_index);
 
@@ -2013,94 +2013,94 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 	/* disable wptr polling */
 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
 	       mqd->cp_hqd_eop_base_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
 	       mqd->cp_hqd_eop_base_addr_hi);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
 	       mqd->cp_hqd_eop_control);
 
 	/* enable doorbell? */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
 	       mqd->cp_hqd_pq_doorbell_control);
 
 	/* disable the queue if it's active */
-	if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1);
+	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
 		for (j = 0; j < adev->usec_timeout; j++) {
-			if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1))
+			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
 				break;
 			udelay(1);
 		}
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
 		       mqd->cp_hqd_dequeue_request);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR),
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
 		       mqd->cp_hqd_pq_rptr);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
 		       mqd->cp_hqd_pq_wptr_lo);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
 		       mqd->cp_hqd_pq_wptr_hi);
 	}
 
 	/* set the pointer to the MQD */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
 	       mqd->cp_mqd_base_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
 	       mqd->cp_mqd_base_addr_hi);
 
 	/* set MQD vmid to 0 */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
 	       mqd->cp_mqd_control);
 
 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
 	       mqd->cp_hqd_pq_base_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
 	       mqd->cp_hqd_pq_base_hi);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
 	       mqd->cp_hqd_pq_control);
 
 	/* set the wb address whether it's enabled or not */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
 				mqd->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 				mqd->cp_hqd_pq_rptr_report_addr_hi);
 
 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
 
 	/* enable the doorbell if requested */
 	if (ring->use_doorbell) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER),
+		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
 					(AMDGPU_DOORBELL64_KIQ *2) << 2);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER),
+		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
 					(AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2);
 	}
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
 	       mqd->cp_hqd_pq_doorbell_control);
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
 	       mqd->cp_hqd_pq_wptr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
 	       mqd->cp_hqd_pq_wptr_hi);
 
 	/* set the vmid for the queue */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid);
+	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
 	       mqd->cp_hqd_persistent_state);
 
 	/* activate the queue */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE),
+	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
 	       mqd->cp_hqd_active);
 
 	if (ring->use_doorbell)
@@ -2323,7 +2323,7 @@ static bool gfx_v9_0_is_idle(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (REG_GET_FIELD(RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)), 
+	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
 				GRBM_STATUS, GUI_ACTIVE))
 		return false;
 	else
@@ -2338,7 +2338,7 @@ static int gfx_v9_0_wait_for_idle(void *handle)
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		/* read MC_STATUS */
-		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)) & 
+		tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
 			GRBM_STATUS__GUI_ACTIVE_MASK;
 
 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
@@ -2355,7 +2355,7 @@ static int gfx_v9_0_soft_reset(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* GRBM_STATUS */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS));
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
@@ -2374,7 +2374,7 @@ static int gfx_v9_0_soft_reset(void *handle)
 	}
 
 	/* GRBM_STATUS2 */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2));
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
@@ -2391,17 +2391,17 @@ static int gfx_v9_0_soft_reset(void *handle)
 		gfx_v9_0_cp_compute_enable(adev, false);
 
 		if (grbm_soft_reset) {
-			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
+			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
 			tmp |= grbm_soft_reset;
 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
-			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
+			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
 
 			udelay(50);
 
 			tmp &= ~grbm_soft_reset;
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
-			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
+			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
 		}
 
 		/* Wait a little for things to settle down */
@@ -2415,9 +2415,9 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 	uint64_t clock;
 
 	mutex_lock(&adev->gfx.gpu_clock_mutex);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT), 1);
-	clock = (uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB)) |
-		((uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB)) << 32ULL);
+	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
 	return clock;
 }
@@ -2497,7 +2497,7 @@ static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
 		return;
 
 	/* if RLC is not enabled, do nothing */
-	rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
+	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
 		return;
 
@@ -2506,7 +2506,7 @@ static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
 	     AMD_CG_SUPPORT_GFX_3D_CGCG)) {
 		data = RLC_SAFE_MODE__CMD_MASK;
 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data);
+		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
 
 		/* wait for RLC_SAFE_MODE */
 		for (i = 0; i < adev->usec_timeout; i++) {
@@ -2526,7 +2526,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
 		return;
 
 	/* if RLC is not enabled, do nothing */
-	rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
+	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
 		return;
 
@@ -2537,7 +2537,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
 		 * mode.
 		 */
 		data = RLC_SAFE_MODE__CMD_MASK;
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data);
+		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
 		adev->gfx.rlc.in_safe_mode = false;
 	}
 }
@@ -2550,7 +2550,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
 	/* It is disabled by HW by default */
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
 			  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
@@ -2560,48 +2560,48 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
 
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
 
 		/* MGLS is a global flag to control all MGLS in GFX */
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
 			/* 2 - RLC memory Light sleep */
 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
-				def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
 				if (def != data)
-					WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data);
+					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
 			}
 			/* 3 - CP memory Light sleep */
 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
-				def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
 				if (def != data)
-					WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data);
+					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
 			}
 		}
 	} else {
 		/* 1 - MGCG_OVERRIDE */
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
 			 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
 
 		/* 2 - disable MGLS in RLC */
-		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data);
+			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
 		}
 
 		/* 3 - disable MGLS in CP */
-		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data);
+			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
 		}
 	}
 }
@@ -2616,37 +2616,37 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
 	/* Enable 3D CGCG/CGLS */
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
 		/* write cmd to clear cgcg/cgls ov */
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		/* unset CGCG override */
 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
 		/* update CGCG and CGLS override bits */
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
 		/* enable 3Dcgcg FSM(0x0020003f) */
-		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
 		data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
 
 		/* set IDLE_POLL_COUNT(0x00900100) */
-		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
+		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
+			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
 	} else {
 		/* Disable CGCG/CGLS */
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
 		/* disable cgcg, cgls should be disabled */
 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
 		/* disable cgcg and cgls in FSM */
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
 	}
 
 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
@@ -2660,7 +2660,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		/* unset CGCG override */
 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
@@ -2669,31 +2669,31 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
 		/* update CGCG and CGLS override bits */
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
 
 		/* enable cgcg FSM(0x0020003F) */
-		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
 		data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
 			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
 
 		/* set IDLE_POLL_COUNT(0x00900100) */
-		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
+		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
+			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
 	} else {
-		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
 		/* reset CGCG/CGLS bits */
 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
 		/* disable cgcg and cgls in FSM */
 		if (def != data)
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data);
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
 	}
 
 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
@@ -2760,12 +2760,12 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
 		*flags = 0;
 
 	/* AMD_CG_SUPPORT_GFX_MGCG */
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
 
 	/* AMD_CG_SUPPORT_GFX_CGCG */
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
 
@@ -2774,17 +2774,17 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
 
 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
 
 	/* AMD_CG_SUPPORT_GFX_CP_LS */
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
 
 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
 
@@ -2807,8 +2807,8 @@ static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
 	if (ring->use_doorbell) {
 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
 	} else {
-		wptr = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR));
-		wptr += (u64)RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI)) << 32;
+		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
+		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
 	}
 
 	return wptr;
@@ -2823,8 +2823,8 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
 		WDOORBELL64(ring->doorbell_index, ring->wptr);
 	} else {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr));
+		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
 	}
 }
 
@@ -3386,20 +3386,20 @@ static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
 	switch (type) {
 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
-			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL));
+			tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
 						 GENERIC2_INT_ENABLE, 0);
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp);
+			WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
 
 			tmp = RREG32(target);
 			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
 						 GENERIC2_INT_ENABLE, 0);
 			WREG32(target, tmp);
 		} else {
-			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL));
+			tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
 						 GENERIC2_INT_ENABLE, 1);
-			WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp);
+			WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
 
 			tmp = RREG32(target);
 			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
@@ -3612,7 +3612,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
 {
 	/* init asci gds info */
-	adev->gds.mem.total_size = RREG32(SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE));
+	adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
 	adev->gds.gws.total_size = 64;
 	adev->gds.oa.total_size = 16;
 
@@ -3641,8 +3641,8 @@ static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
 
-	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG));
-	data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG));
+	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
+	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
 
 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
@@ -3763,25 +3763,25 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE);
 	eop_gpu_addr >>= 8;
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), lower_32_bits(eop_gpu_addr));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), upper_32_bits(eop_gpu_addr));
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr));
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
 	mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr);
 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp);
 
 	/* enable doorbell? */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 	if (use_doorbell)
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 	else
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
 	mqd->cp_hqd_pq_doorbell_control = tmp;
 
 	/* disable the queue if it's active */
@@ -3790,40 +3790,40 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	mqd->cp_hqd_pq_rptr = 0;
 	mqd->cp_hqd_pq_wptr_lo = 0;
 	mqd->cp_hqd_pq_wptr_hi = 0;
-	if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1);
+	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
 		for (j = 0; j < adev->usec_timeout; j++) {
-			if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1))
+			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
 				break;
 			udelay(1);
 		}
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), mqd->cp_hqd_dequeue_request);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), mqd->cp_hqd_pq_rptr);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi);
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
 	}
 
 	/* set the pointer to the MQD */
 	mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
 	mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), mqd->cp_mqd_base_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), mqd->cp_mqd_base_addr_hi);
+	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
 
 	/* set MQD vmid to 0 */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp);
 	mqd->cp_mqd_control = tmp;
 
 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
 	hqd_gpu_addr = ring->gpu_addr >> 8;
 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), mqd->cp_hqd_pq_base_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), mqd->cp_hqd_pq_base_hi);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
 		(order_base_2(ring->ring_size / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
@@ -3835,7 +3835,7 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), tmp);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp);
 	mqd->cp_hqd_pq_control = tmp;
 
 	/* set the wb address wether it's enabled or not */
@@ -3843,27 +3843,27 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
 	mqd->cp_hqd_pq_rptr_report_addr_hi =
 	upper_32_bits(wb_gpu_addr) & 0xffff;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
 		mqd->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 		mqd->cp_hqd_pq_rptr_report_addr_hi);
 
 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
 		mqd->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
 		mqd->cp_hqd_pq_wptr_poll_addr_hi);
 
 	/* enable the doorbell if requested */
 	if (use_doorbell) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER),
+		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
 			(AMDGPU_DOORBELL64_KIQ * 2) << 2);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER),
+		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
 			(AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2);
-		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 			DOORBELL_OFFSET, ring->doorbell_index);
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
@@ -3874,25 +3874,25 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	} else {
 		mqd->cp_hqd_pq_doorbell_control = 0;
 	}
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
 		mqd->cp_hqd_pq_doorbell_control);
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
 
 	/* set the vmid for the queue */
 	mqd->cp_hqd_vmid = 0;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid);
+	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE));
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), tmp);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp);
 	mqd->cp_hqd_persistent_state = tmp;
 
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), mqd->cp_hqd_active);
+	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
-- 
cgit v1.2.3


From d7b1eeb2ca039d04f1a1fcb241920cb112b4b52a Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Fri, 7 Apr 2017 18:39:07 +0800
Subject: drm/amdgpu:fix race condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sequence is protected by spinlock so don't access sequence
in paramter seq when invoking this function.

~0 means to get the latest sequence number and 0 means none to
get.

Change-Id: Ib7a03f3cf5594deeb4ad333cc59b47a6bddfd1ad
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 3 +++
 include/uapi/drm/amdgpu_drm.h           | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index cf0500671353..90d1ac8a80f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -273,6 +273,9 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 
 	spin_lock(&ctx->ring_lock);
 
+	if (seq == ~0ull)
+		seq = ctx->rings[ring->idx].sequence - 1;
+
 	if (seq >= cring->sequence) {
 		spin_unlock(&ctx->ring_lock);
 		return ERR_PTR(-EINVAL);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 516a9f285730..92262d81d41e 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -295,7 +295,10 @@ union drm_amdgpu_gem_wait_idle {
 };
 
 struct drm_amdgpu_wait_cs_in {
-	/** Command submission handle */
+	/* Command submission handle
+         * handle equals 0 means none to wait for
+         * handle equal ~0ull meanas wait for the latest sequence number
+         */
 	__u64 handle;
 	/** Absolute timeout to wait */
 	__u64 timeout;
-- 
cgit v1.2.3


From e841cfc9ff530d37617e589490caac59a1d94200 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 12:48:21 +0530
Subject: PM / Domains: Fix DT example

The power-domain provider's #power-domain-cells field is set to 0 and
yet the children is using an index to point the power domain. Fix it by
removing the index field.

Fixes: 70bb510e4279 (dt/bindings / PM/Domains: Update binding for PM domain idle states)
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/devicetree/bindings/power/power_domain.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
index 940707d095cc..14bd9e945ff6 100644
--- a/Documentation/devicetree/bindings/power/power_domain.txt
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -81,7 +81,7 @@ Example 3:
 	child: power-controller@12341000 {
 		compatible = "foo,power-controller";
 		reg = <0x12341000 0x1000>;
-		power-domains = <&parent 0>;
+		power-domains = <&parent>;
 		#power-domain-cells = <0>;
 		domain-idle-states = <&DOMAIN_PWR_DN>;
 	};
-- 
cgit v1.2.3


From 5e68ebd0ac4842977c127160f9ecf1a0099f502f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 12:52:10 +0530
Subject: PM / Domains: Add DT file to MAINTAINERS

Add the power_domain.txt DT file to MAINTAINERS, otherwise
get_maintainers.pl doesn't pick the right set of maintainers for changes
to the DT file.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c45c02bc6082..4309ccb5eddf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5493,6 +5493,7 @@ L:	linux-pm@vger.kernel.org
 S:	Supported
 F:	drivers/base/power/domain*.c
 F:	include/linux/pm_domain.h
+F:	Documentation/devicetree/bindings/power/power_domain.txt
 
 GENERIC UIO DRIVER FOR PCI DEVICES
 M:	"Michael S. Tsirkin" <mst@redhat.com>
-- 
cgit v1.2.3


From 04434ab5120a362c11ae6e6cf196fc6a98c3365d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 21 Apr 2017 09:35:07 +0200
Subject: ACPI / LPSS: Call pwm_add_table() for Bay Trail PWM device

On Bay Trail systems with a Crystal Cove PMIC the Crystal Cove's PWM is
used to control the backlight brightness. On systems without one, the
Crystal Cove SoC's PWM is used and we need to call pwm_add_table() so
that the i915 driver can find the pwm for controlling the backlight.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpss.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 5edfd9c49044..10347e3d73ad 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -143,6 +143,22 @@ static void lpss_deassert_reset(struct lpss_private_data *pdata)
 	writel(val, pdata->mmio_base + offset);
 }
 
+/*
+ * BYT PWM used for backlight control by the i915 driver on systems without
+ * the Crystal Cove PMIC.
+ */
+static struct pwm_lookup byt_pwm_lookup[] = {
+	PWM_LOOKUP_WITH_MODULE("80860F09:00", 0, "0000:00:02.0",
+			       "pwm_backlight", 0, PWM_POLARITY_NORMAL,
+			       "pwm-lpss-platform"),
+};
+
+static void byt_pwm_setup(struct lpss_private_data *pdata)
+{
+	if (!acpi_dev_present("INT33FD", NULL, -1))
+		pwm_add_table(byt_pwm_lookup, ARRAY_SIZE(byt_pwm_lookup));
+}
+
 #define LPSS_I2C_ENABLE			0x6c
 
 static void byt_i2c_setup(struct lpss_private_data *pdata)
@@ -200,6 +216,7 @@ static const struct lpss_device_desc lpt_sdio_dev_desc = {
 
 static const struct lpss_device_desc byt_pwm_dev_desc = {
 	.flags = LPSS_SAVE_CTX,
+	.setup = byt_pwm_setup,
 };
 
 static const struct lpss_device_desc bsw_pwm_dev_desc = {
-- 
cgit v1.2.3


From 4573f0f21d6c40a9426d0418646d87bbf77d6ab5 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 10 Apr 2017 14:40:51 +0800
Subject: drm/amd/powerplay: fix suspend error on DPM disabled

Don't fail if DPM is disabled.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 9da5b0bb66d8..f73e80c4bf33 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -251,7 +251,9 @@ static int pp_suspend(void *handle)
 
 	ret = pp_check(pp_handle);
 
-	if (ret != 0)
+	if (ret == PP_DPM_DISABLED)
+		return 0;
+	else if (ret != 0)
 		return ret;
 
 	eventmgr = pp_handle->eventmgr;
-- 
cgit v1.2.3


From b4de2c5aab2d5cd84c2a648d825b0e48ca426e27 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 10 Apr 2017 15:29:42 +0800
Subject: drm/amdgpu: do not free fence buf when driver probes.

Fence buf needs to be used on suspend/resume phase.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 68ccaedad0f7..19180aaa8bf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -358,8 +358,6 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	if (ret)
 		goto failed_mem;
 
-	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
-			      &psp->fence_buf_mc_addr, &psp->fence_buf);
 	kfree(cmd);
 
 	return 0;
@@ -423,6 +421,10 @@ static int psp_hw_fini(void *handle)
 		amdgpu_bo_free_kernel(&psp->fw_pri_bo,
 				      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
 
+	if (psp->fence_buf_bo)
+		amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+				      &psp->fence_buf_mc_addr, &psp->fence_buf);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 692bb1ac038a0b736c2b6e9bba41ac2da38e16cf Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 10 Apr 2017 09:29:13 +0800
Subject: drm/amdgpu: fix to clear ASIC INIT COMPLETE bit on resuming phase

ASIC_INIT_COMPLETE bit must be cleared during S3 resuming phase,
because VBIOS will check the bit to decide if execute ASIC_Init
posting via kernel driver.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c     | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 7 +++++++
 2 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index ad4329922f79..1cf78f4dd339 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1727,6 +1727,12 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev)
 {
 	int i;
 
+	/*
+	 * VBIOS will check ASIC_INIT_COMPLETE bit to decide if
+	 * execute ASIC_Init posting via driver
+	 */
+	adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK;
+
 	for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
 		WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 4b9abd68e04f..d735cd1807b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -26,6 +26,7 @@
 #include "atomfirmware.h"
 #include "amdgpu_atomfirmware.h"
 #include "atom.h"
+#include "atombios.h"
 
 #define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t))
 
@@ -77,6 +78,12 @@ void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev)
 {
 	int i;
 
+	/*
+	 * VBIOS will check ASIC_INIT_COMPLETE bit to decide if
+	 * execute ASIC_Init posting via driver
+	 */
+	adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK;
+
 	for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
 		WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]);
 }
-- 
cgit v1.2.3


From cbcbea982af59d0a4c40dacfb6ff164a533fd1d9 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 11 Apr 2017 09:24:56 +0800
Subject: drm/amdgpu: fix to add buffer funcs check

This patch fixes the case when buffer funcs is empty and bo evict is
executing. It must double check buffer funcs, otherwise, a NULL
pointer dereference kernel panic will be encountered.

 BUG: unable to handle kernel NULL pointer dereference at 00000000000001a4
 IP: [<ffffffffa067b6cd>] amdgpu_evict_flags+0x3d/0xf0 [amdgpu]
 PGD 0

 Oops: 0000 [#1] SMP
 Modules linked in: amdgpu(OE) ttm drm_kms_helper drm i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt fmem(OE) physmem_drv(OE) rpcsec_gss_krb5 nfsv4 nfs fscache intel_rapl x86_pkg_temp_thermal intel_powerclamp snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic kvm_intel snd_hda_intel snd_hda_codec kvm snd_hda_core joydev eeepc_wmi asus_wmi sparse_keymap snd_hwdep snd_pcm irqbypass crct10dif_pclmul snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq crc32_pclmul snd_seq_device ghash_clmulni_intel aesni_intel aes_x86_64 snd_timer lrw gf128mul mei_me snd glue_helper ablk_helper cryptd tpm_infineon mei lpc_ich serio_raw soundcore shpchp mac_hid nfsd auth_rpcgss nfs_acl lockd grace coretemp sunrpc parport_pc ppdev lp parport autofs4 hid_generic mxm_wmi r8169 usbhid ahci
  psmouse libahci nvme mii hid nvme_core wmi video
 CPU: 3 PID: 1627 Comm: kworker/u8:17 Tainted: G           OE   4.9.0-custom #1
 Hardware name: ASUS All Series/Z87-A, BIOS 1802 01/28/2014
 Workqueue: events_unbound async_run_entry_fn
 task: ffff88021e7057c0 task.stack: ffffc9000262c000
 RIP: 0010:[<ffffffffa067b6cd>]  [<ffffffffa067b6cd>] amdgpu_evict_flags+0x3d/0xf0 [amdgpu]
 RSP: 0018:ffffc9000262fb30  EFLAGS: 00010246
 RAX: 0000000000000000 RBX: ffff88021e8a5858 RCX: 0000000000000000
 RDX: 0000000000000001 RSI: ffffc9000262fb58 RDI: ffff88021e8a5800
 RBP: ffffc9000262fb48 R08: 0000000000000000 R09: ffff88021e8a5814
 R10: 000000001def8f01 R11: ffff88021def8c80 R12: ffffc9000262fb58
 R13: ffff88021d2b1990 R14: 0000000000000000 R15: ffff88021e8a5858
 FS:  0000000000000000(0000) GS:ffff88022ed80000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00000000000001a4 CR3: 0000000001c07000 CR4: 00000000001406e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 35d53a0d9ba6..23f7fe8fdc3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -203,7 +203,9 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	abo = container_of(bo, struct amdgpu_bo, tbo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
-		if (adev->mman.buffer_funcs_ring->ready == false) {
+		if (adev->mman.buffer_funcs &&
+		    adev->mman.buffer_funcs_ring &&
+		    adev->mman.buffer_funcs_ring->ready == false) {
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 		} else {
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
-- 
cgit v1.2.3


From bc108ec78e2af5c7a5f6ef27e264b8093bd60e3b Mon Sep 17 00:00:00 2001
From: Trigger Huang <trigger.huang@amd.com>
Date: Tue, 11 Apr 2017 01:56:36 -0400
Subject: drm/amdgpu: Fix firmware UCODE_ID_STORAGE issue (v2)

In Tonga's virtualization environment, for firmware UCODE_ID_STORAGE,
there is no actual firmware data, but we still need alloc a BO and
tell the BO's mc address to HW, or world switch will hang on VFs.

v2: fix coding style (Alex)

Signed-off-by: Trigger Huang <trigger.huang@amd.com>
Reviewed-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index a1891c93cdbf..dfd1c98efa7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -382,10 +382,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
 	 * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
 	 * ucode info here
 	 */
-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
-		adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4;
-	else
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+		if (amdgpu_sriov_vf(adev))
+			adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 3;
+		else
+			adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4;
+	} else {
 		adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM;
+	}
 
 	for (i = 0; i < adev->firmware.max_ucodes; i++) {
 		ucode = &adev->firmware.ucode[i];
-- 
cgit v1.2.3


From 71f2af890a337dec5a792846a9db36943651e4d0 Mon Sep 17 00:00:00 2001
From: Frank Min <Frank.Min@amd.com>
Date: Fri, 7 Apr 2017 10:38:52 +0800
Subject: drm/amdgpu/vce4: update VCE initialization sequence for SRIOV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update the initialization sequence of VCE to make VCE work.

Signed-off-by: Frank Min <Frank.Min@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index edde5fe938d6..255326eac7ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -280,18 +280,11 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 		init_table += header->vce_table_offset;
 
 		ring = &adev->vce.ring[0];
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr);
 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr));
 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
 
 		/* BEGING OF MC_RESUME */
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
-
 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
@@ -322,6 +315,8 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 				0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 
 		/* end of MC_RESUME */
+		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
+				VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
 				~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
-- 
cgit v1.2.3


From a2f537e03b678887fdd8d0810b03ecad01ba5d36 Mon Sep 17 00:00:00 2001
From: Xiangliang Yu <Xiangliang.Yu@amd.com>
Date: Thu, 6 Apr 2017 14:43:48 +0800
Subject: drm/amdgpu/vce4: workaround VCE ring test slow issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add VCE ring test slow workaround for SRIOV.

Signed-off-by: Frank Min <Frank.Min@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index c853400805d1..7a8eaeaae94a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -955,7 +955,11 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 	uint32_t rptr = amdgpu_ring_get_rptr(ring);
 	unsigned i;
-	int r;
+	int r, timeout = adev->usec_timeout;
+
+	/* workaround VCE ring test slow issue for sriov*/
+	if (amdgpu_sriov_vf(adev))
+		timeout *= 10;
 
 	/* TODO: remove it if VCE can work for sriov */
 	if (amdgpu_sriov_vf(adev))
@@ -970,13 +974,13 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, VCE_CMD_END);
 	amdgpu_ring_commit(ring);
 
-	for (i = 0; i < adev->usec_timeout; i++) {
+	for (i = 0; i < timeout; i++) {
 		if (amdgpu_ring_get_rptr(ring) != rptr)
 			break;
 		DRM_UDELAY(1);
 	}
 
-	if (i < adev->usec_timeout) {
+	if (i < timeout) {
 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
 			 ring->idx, i);
 	} else {
-- 
cgit v1.2.3


From 0381631299e54128be5180d0a169b06cd3ce1ba6 Mon Sep 17 00:00:00 2001
From: Frank Min <Frank.Min@amd.com>
Date: Thu, 6 Apr 2017 14:46:50 +0800
Subject: drm/amdgpu/vce4: enable ring & ib test for sriov
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now VCE block can work for SRIOV, enable ring & ib test.

Signed-off-by: Frank Min <Frank.Min@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 7a8eaeaae94a..735c38d7db0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -961,10 +961,6 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 	if (amdgpu_sriov_vf(adev))
 		timeout *= 10;
 
-	/* TODO: remove it if VCE can work for sriov */
-	if (amdgpu_sriov_vf(adev))
-		return 0;
-
 	r = amdgpu_ring_alloc(ring, 16);
 	if (r) {
 		DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
@@ -1003,10 +999,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	struct dma_fence *fence = NULL;
 	long r;
 
-	/* TODO: remove it if VCE can work for sriov */
-	if (amdgpu_sriov_vf(ring->adev))
-		return 0;
-
 	/* skip vce ring1/2 ib test for now, since it's not reliable */
 	if (ring != &ring->adev->vce.ring[0])
 		return 0;
-- 
cgit v1.2.3


From 0eeb68b390377148e31f000db1c533a9a49bf950 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 30 Mar 2017 14:49:50 +0200
Subject: drm/amdgpu: add VMHUB to ring association
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the info which ring belonging to which VMHUB.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 1 +
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c    | 2 ++
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c    | 1 +
 5 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 63e56398ca9a..de12f7549e6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -99,6 +99,7 @@ struct amdgpu_ring_funcs {
 	uint32_t		align_mask;
 	u32			nop;
 	bool			support_64bit_ptrs;
+	unsigned		vmhub;
 
 	/* ring read/write ptr handling */
 	u64 (*get_rptr)(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e58fb05bc904..4a7fe9727e95 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3456,6 +3456,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_GFXHUB,
 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
@@ -3500,6 +3501,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_GFXHUB,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
@@ -3529,6 +3531,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_GFXHUB,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 21f38d882335..7ba3e85a96c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1473,6 +1473,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 	.align_mask = 0xf,
 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = sdma_v4_0_ring_get_rptr,
 	.get_wptr = sdma_v4_0_ring_get_wptr,
 	.set_wptr = sdma_v4_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 9bcf01469282..b5429223618f 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1448,6 +1448,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.align_mask = 0xf,
 	.nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0),
 	.support_64bit_ptrs = false,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = uvd_v7_0_ring_get_rptr,
 	.get_wptr = uvd_v7_0_ring_get_wptr,
 	.set_wptr = uvd_v7_0_ring_set_wptr,
@@ -1475,6 +1476,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 	.align_mask = 0x3f,
 	.nop = HEVC_ENC_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = uvd_v7_0_enc_ring_get_rptr,
 	.get_wptr = uvd_v7_0_enc_ring_get_wptr,
 	.set_wptr = uvd_v7_0_enc_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 255326eac7ed..f68ab3b24c31 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -1073,6 +1073,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
 	.align_mask = 0x3f,
 	.nop = VCE_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = vce_v4_0_ring_get_rptr,
 	.get_wptr = vce_v4_0_ring_get_wptr,
 	.set_wptr = vce_v4_0_ring_set_wptr,
-- 
cgit v1.2.3


From 4f618e737fafed22302d4b660eecfe1dce971b0f Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 6 Apr 2017 15:18:21 +0200
Subject: drm/amdgpu: drop VMID per ring tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

David suggested this a long time ago, instead of checking
each ring just walk over all the VMIDs in reverse LRU order.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Andres Rodriguez <andresx7@gmail.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 +++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 ---
 2 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7ed5302b511a..8d96da53990a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -463,17 +463,10 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 
 	job->vm_needs_flush = true;
 	/* Check if we can use a VMID already assigned to this VM */
-	i = ring->idx;
-	do {
+	list_for_each_entry_reverse(id, &adev->vm_manager.ids_lru, list) {
 		struct dma_fence *flushed;
 
-		id = vm->ids[i++];
-		if (i == AMDGPU_MAX_RINGS)
-			i = 0;
-
 		/* Check all the prerequisites to using this VMID */
-		if (!id)
-			continue;
 		if (amdgpu_vm_had_gpu_reset(adev, id))
 			continue;
 
@@ -503,7 +496,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			goto error;
 
 		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-		vm->ids[ring->idx] = id;
 
 		job->vm_id = id - adev->vm_manager.ids;
 		job->vm_needs_flush = false;
@@ -512,7 +504,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		mutex_unlock(&adev->vm_manager.lock);
 		return 0;
 
-	} while (i != ring->idx);
+	};
 
 	/* Still no ID to use? Then use the idle one found earlier */
 	id = idle;
@@ -532,7 +524,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 	atomic64_set(&id->owner, vm->client_id);
-	vm->ids[ring->idx] = id;
 
 	job->vm_id = id - adev->vm_manager.ids;
 	trace_amdgpu_vm_grab_id(vm, ring->idx, job);
@@ -2117,10 +2108,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	unsigned ring_instance;
 	struct amdgpu_ring *ring;
 	struct amd_sched_rq *rq;
-	int i, r;
+	int r;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		vm->ids[i] = NULL;
 	vm->va = RB_ROOT;
 	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
 	spin_lock_init(&vm->status_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d9e57290dc71..ba9c39317dd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -114,9 +114,6 @@ struct amdgpu_vm {
 	struct dma_fence	*last_dir_update;
 	uint64_t		last_eviction_counter;
 
-	/* for id and flush management per ring */
-	struct amdgpu_vm_id	*ids[AMDGPU_MAX_RINGS];
-
 	/* protecting freed */
 	spinlock_t		freed_lock;
 
-- 
cgit v1.2.3


From 7645670decdb677e2f415ff91609d31e5d4777d8 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 6 Apr 2017 17:52:39 +0200
Subject: drm/amdgpu: split VMID management by VMHUB
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This way GFX and MM won't fight for VMIDs any more.

Initially disabled since we need to stop flushing all HUBS
at the same time as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Andres Rodriguez <andresx7@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c     |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 82 ++++++++++++++++++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     | 15 ++++--
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c      |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c      |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c      |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c      |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      |  3 +-
 10 files changed, 69 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 483660742f75..724b4c1c80f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1854,7 +1854,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
-	mutex_init(&adev->vm_manager.lock);
 	atomic_set(&adev->irq.ih.lock, 0);
 	mutex_init(&adev->firmware.mutex);
 	mutex_init(&adev->pm.mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index aab857d89d03..2d11ac8d1aa9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -217,7 +217,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vm_id)
-			amdgpu_vm_reset_id(adev, job->vm_id);
+			amdgpu_vm_reset_id(adev, ring->funcs->vmhub,
+					   job->vm_id);
 		amdgpu_ring_undo(ring);
 		return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8d96da53990a..3455b2c8652c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -406,6 +406,9 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
+	/* Temporary use only the first VM manager */
+	unsigned vmhub = 0; /*ring->funcs->vmhub;*/
+	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	uint64_t fence_context = adev->fence_context + ring->idx;
 	struct dma_fence *updates = sync->last_vm_update;
 	struct amdgpu_vm_id *id, *idle;
@@ -413,16 +416,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	unsigned i;
 	int r = 0;
 
-	fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids,
-			       GFP_KERNEL);
+	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
 	if (!fences)
 		return -ENOMEM;
 
-	mutex_lock(&adev->vm_manager.lock);
+	mutex_lock(&id_mgr->lock);
 
 	/* Check if we have an idle VMID */
 	i = 0;
-	list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) {
+	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
 		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
 		if (!fences[i])
 			break;
@@ -430,7 +432,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	}
 
 	/* If we can't find a idle VMID to use, wait till one becomes available */
-	if (&idle->list == &adev->vm_manager.ids_lru) {
+	if (&idle->list == &id_mgr->ids_lru) {
 		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
 		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
 		struct dma_fence_array *array;
@@ -455,7 +457,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (r)
 			goto error;
 
-		mutex_unlock(&adev->vm_manager.lock);
+		mutex_unlock(&id_mgr->lock);
 		return 0;
 
 	}
@@ -463,7 +465,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 
 	job->vm_needs_flush = true;
 	/* Check if we can use a VMID already assigned to this VM */
-	list_for_each_entry_reverse(id, &adev->vm_manager.ids_lru, list) {
+	list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
 		struct dma_fence *flushed;
 
 		/* Check all the prerequisites to using this VMID */
@@ -495,13 +497,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (r)
 			goto error;
 
-		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+		list_move_tail(&id->list, &id_mgr->ids_lru);
 
-		job->vm_id = id - adev->vm_manager.ids;
+		job->vm_id = id - id_mgr->ids;
 		job->vm_needs_flush = false;
 		trace_amdgpu_vm_grab_id(vm, ring->idx, job);
 
-		mutex_unlock(&adev->vm_manager.lock);
+		mutex_unlock(&id_mgr->lock);
 		return 0;
 
 	};
@@ -522,14 +524,14 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 
 	id->pd_gpu_addr = job->vm_pd_addr;
 	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
-	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+	list_move_tail(&id->list, &id_mgr->ids_lru);
 	atomic64_set(&id->owner, vm->client_id);
 
-	job->vm_id = id - adev->vm_manager.ids;
+	job->vm_id = id - id_mgr->ids;
 	trace_amdgpu_vm_grab_id(vm, ring->idx, job);
 
 error:
-	mutex_unlock(&adev->vm_manager.lock);
+	mutex_unlock(&id_mgr->lock);
 	return r;
 }
 
@@ -581,7 +583,9 @@ static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id];
+	unsigned vmhub = ring->funcs->vmhub;
+	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
 		id->gds_base != job->gds_base ||
 		id->gds_size != job->gds_size ||
@@ -619,10 +623,10 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 		if (r)
 			return r;
 
-		mutex_lock(&adev->vm_manager.lock);
+		mutex_lock(&id_mgr->lock);
 		dma_fence_put(id->last_flush);
 		id->last_flush = fence;
-		mutex_unlock(&adev->vm_manager.lock);
+		mutex_unlock(&id_mgr->lock);
 	}
 
 	if (gds_switch_needed) {
@@ -657,9 +661,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
  *
  * Reset saved GDW, GWS and OA to force switch on next flush.
  */
-void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
+			unsigned vmid)
 {
-	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
+	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
 
 	id->gds_base = 0;
 	id->gds_size = 0;
@@ -2230,16 +2236,21 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  */
 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 {
-	unsigned i;
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vm_id_manager *id_mgr =
+			&adev->vm_manager.id_mgr[i];
 
-	INIT_LIST_HEAD(&adev->vm_manager.ids_lru);
+		mutex_init(&id_mgr->lock);
+		INIT_LIST_HEAD(&id_mgr->ids_lru);
 
-	/* skip over VMID 0, since it is the system VM */
-	for (i = 1; i < adev->vm_manager.num_ids; ++i) {
-		amdgpu_vm_reset_id(adev, i);
-		amdgpu_sync_create(&adev->vm_manager.ids[i].active);
-		list_add_tail(&adev->vm_manager.ids[i].list,
-			      &adev->vm_manager.ids_lru);
+		/* skip over VMID 0, since it is the system VM */
+		for (j = 1; j < id_mgr->num_ids; ++j) {
+			amdgpu_vm_reset_id(adev, i, j);
+			amdgpu_sync_create(&id_mgr->ids[i].active);
+			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
+		}
 	}
 
 	adev->vm_manager.fence_context =
@@ -2247,6 +2258,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		adev->vm_manager.seqno[i] = 0;
 
+
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 	atomic64_set(&adev->vm_manager.client_counter, 0);
 	spin_lock_init(&adev->vm_manager.prt_lock);
@@ -2262,13 +2274,19 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
  */
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
-	unsigned i;
+	unsigned i, j;
 
-	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
-		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vm_id_manager *id_mgr =
+			&adev->vm_manager.id_mgr[i];
 
-		amdgpu_sync_free(&adev->vm_manager.ids[i].active);
-		dma_fence_put(id->flushed_updates);
-		dma_fence_put(id->last_flush);
+		mutex_destroy(&id_mgr->lock);
+		for (j = 0; j < AMDGPU_NUM_VM; ++j) {
+			struct amdgpu_vm_id *id = &id_mgr->ids[j];
+
+			amdgpu_sync_free(&id->active);
+			dma_fence_put(id->flushed_updates);
+			dma_fence_put(id->last_flush);
+		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ba9c39317dd3..661a8f6826ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -146,12 +146,16 @@ struct amdgpu_vm_id {
 	uint32_t		oa_size;
 };
 
+struct amdgpu_vm_id_manager {
+	struct mutex		lock;
+	unsigned		num_ids;
+	struct list_head	ids_lru;
+	struct amdgpu_vm_id	ids[AMDGPU_NUM_VM];
+};
+
 struct amdgpu_vm_manager {
 	/* Handling of VMIDs */
-	struct mutex				lock;
-	unsigned				num_ids;
-	struct list_head			ids_lru;
-	struct amdgpu_vm_id			ids[AMDGPU_NUM_VM];
+	struct amdgpu_vm_id_manager		id_mgr[AMDGPU_MAX_VMHUBS];
 
 	/* Handling of VM fences */
 	u64					fence_context;
@@ -197,7 +201,8 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct dma_fence *fence,
 		      struct amdgpu_job *job);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
-void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
+			unsigned vmid);
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 8a8bc2fe6f2e..eca022eaff44 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1935,7 +1935,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 				   INDEX_STRIDE, 3);
 
 	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < adev->vm_manager.num_ids; i++) {
+	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
 		if (i == 0)
 			sh_mem_base = 0;
 		else
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index dad8a4cd1b37..0115c5f65c01 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3890,7 +3890,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
 				   INDEX_STRIDE, 3);
 	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < adev->vm_manager.num_ids; i++) {
+	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
 		vi_srbm_select(adev, 0, 0, 0, i);
 		/* CP and shaders */
 		if (i == 0) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 631aef38126d..31c50b2dbbc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -621,7 +621,7 @@ static int gmc_v6_0_vm_init(struct amdgpu_device *adev)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
 	adev->vm_manager.num_level = 1;
 	amdgpu_vm_manager_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 92abe12d92bb..2217d4d0f895 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -746,7 +746,7 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
 	adev->vm_manager.num_level = 1;
 	amdgpu_vm_manager_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index f2ccefc66fd4..e4f5df32f31e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -949,7 +949,7 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
 	adev->vm_manager.num_level = 1;
 	amdgpu_vm_manager_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3b045e0b114e..7f5cc75f0da5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -519,7 +519,8 @@ static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
 
 	/* TODO: fix num_level for APU when updating vm size and block size */
 	if (adev->flags & AMD_IS_APU)
-- 
cgit v1.2.3


From 2e81984988adf8de92b7d3a14ba0fe8310d0bcf8 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 30 Mar 2017 16:50:47 +0200
Subject: drm/amdgpu: invalidate only the currently needed VMHUB v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop invalidating both hubs from each engine.

v2: don't use hardcoded values

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |   3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  36 +++++------
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  60 +++++++++---------
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  | 111 +++++++++++++++------------------
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  57 ++++++++---------
 5 files changed, 119 insertions(+), 148 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3455b2c8652c..b38a8d7714c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -406,8 +406,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
-	/* Temporary use only the first VM manager */
-	unsigned vmhub = 0; /*ring->funcs->vmhub;*/
+	unsigned vmhub = ring->funcs->vmhub;
 	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	uint64_t fence_context = adev->fence_context + ring->idx;
 	struct dma_fence *updates = sync->last_vm_update;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 4a7fe9727e95..1badd294ff18 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2956,35 +2956,29 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-					   hub->ctx0_ptb_addr_lo32
-					   + (2 * vm_id),
-					   lower_32_bits(pd_addr));
+	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+				   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
+				   lower_32_bits(pd_addr));
 
-		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-					   hub->ctx0_ptb_addr_hi32
-					   + (2 * vm_id),
-					   upper_32_bits(pd_addr));
+	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+				   hub->ctx0_ptb_addr_hi32 + (2 * vm_id),
+				   upper_32_bits(pd_addr));
 
-		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-					   hub->vm_inv_eng0_req + eng, req);
+	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+				   hub->vm_inv_eng0_req + eng, req);
 
-		/* wait for the invalidate to complete */
-		gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
-				      eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
-	}
+	/* wait for the invalidate to complete */
+	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
+			      eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
 
 	/* compute doesn't have PFP */
 	if (usepfp) {
@@ -3463,7 +3457,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
 		5 +  /* COND_EXEC */
 		7 +  /* PIPELINE_SYNC */
-		46 + /* VM_FLUSH */
+		24 + /* VM_FLUSH */
 		8 +  /* FENCE for VM_FLUSH */
 		20 + /* GDS switch */
 		4 + /* double SWITCH_BUFFER,
@@ -3510,7 +3504,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		64 + /* gfx_v9_0_ring_emit_vm_flush */
+		24 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -3540,7 +3534,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		64 + /* gfx_v9_0_ring_emit_vm_flush */
+		24 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 7ba3e85a96c6..c3cf6bcadc60 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1039,44 +1039,40 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					 unsigned vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
-				  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-		amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
-				  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-		amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
-		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
-		/* flush TLB */
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
-				  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-		amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
-		amdgpu_ring_write(ring, req);
-
-		/* wait for flush */
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
-				  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
-				  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
-		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring, 1 << vm_id); /* reference */
-		amdgpu_ring_write(ring, 1 << vm_id); /* mask */
-		amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
-				  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
-	}
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
+	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+
+	/* flush TLB */
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
+	amdgpu_ring_write(ring, req);
+
+	/* wait for flush */
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, 1 << vm_id); /* reference */
+	amdgpu_ring_write(ring, 1 << vm_id); /* mask */
+	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
 
 static int sdma_v4_0_early_init(void *handle)
@@ -1481,7 +1477,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 		6 + /* sdma_v4_0_ring_emit_hdp_flush */
 		3 + /* sdma_v4_0_ring_emit_hdp_invalidate */
 		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
-		36 + /* sdma_v4_0_ring_emit_vm_flush */
+		18 + /* sdma_v4_0_ring_emit_vm_flush */
 		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
 	.emit_ib = sdma_v4_0_ring_emit_ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index b5429223618f..dc3a2145a452 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1034,42 +1034,38 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
 static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	uint32_t data0, data1, mask;
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
-		data1 = upper_32_bits(pd_addr);
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
-
-		data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
-		data1 = lower_32_bits(pd_addr);
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
-
-		data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
-		data1 = lower_32_bits(pd_addr);
-		mask = 0xffffffff;
-		uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
-
-		/* flush TLB */
-		data0 = (hub->vm_inv_eng0_req + eng) << 2;
-		data1 = req;
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
-
-		/* wait for flush */
-		data0 = (hub->vm_inv_eng0_ack + eng) << 2;
-		data1 = 1 << vm_id;
-		mask =  1 << vm_id;
-		uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
-	}
+	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
+	data1 = upper_32_bits(pd_addr);
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
+
+	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data1 = lower_32_bits(pd_addr);
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
+
+	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data1 = lower_32_bits(pd_addr);
+	mask = 0xffffffff;
+	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
+
+	/* flush TLB */
+	data0 = (hub->vm_inv_eng0_req + eng) << 2;
+	data1 = req;
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
+
+	/* wait for flush */
+	data0 = (hub->vm_inv_eng0_ack + eng) << 2;
+	data1 = 1 << vm_id;
+	mask =  1 << vm_id;
+	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
 }
 
 static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
@@ -1080,44 +1076,37 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
 static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 			 unsigned int vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, 0xffffffff);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		/* flush TLB */
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
-		amdgpu_ring_write(ring, req);
-
-		/* wait for flush */
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-		amdgpu_ring_write(ring, 1 << vm_id);
-		amdgpu_ring_write(ring, 1 << vm_id);
-	}
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, 0xffffffff);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	/* flush TLB */
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
+	amdgpu_ring_write(ring, req);
+
+	/* wait for flush */
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vm_id);
 }
 
 #if 0
@@ -1455,7 +1444,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.emit_frame_size =
 		2 + /* uvd_v7_0_ring_emit_hdp_flush */
 		2 + /* uvd_v7_0_ring_emit_hdp_invalidate */
-		34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */
+		34 + /* uvd_v7_0_ring_emit_vm_flush */
 		14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
 	.emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
 	.emit_ib = uvd_v7_0_ring_emit_ib,
@@ -1481,7 +1470,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 	.get_wptr = uvd_v7_0_enc_ring_get_wptr,
 	.set_wptr = uvd_v7_0_enc_ring_set_wptr,
 	.emit_frame_size =
-		17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */
+		17 + /* uvd_v7_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
 		1, /* uvd_v7_0_enc_ring_insert_end */
 	.emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index f68ab3b24c31..2ffafbe6cd80 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -968,44 +968,37 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 			 unsigned int vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, 0xffffffff);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		/* flush TLB */
-		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
-		amdgpu_ring_write(ring, req);
-
-		/* wait for flush */
-		amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
-		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-		amdgpu_ring_write(ring, 1 << vm_id);
-		amdgpu_ring_write(ring, 1 << vm_id);
-	}
+	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, 0xffffffff);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	/* flush TLB */
+	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
+	amdgpu_ring_write(ring, req);
+
+	/* wait for flush */
+	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
+	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vm_id);
 }
 
 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1079,7 +1072,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
 	.set_wptr = vce_v4_0_ring_set_wptr,
 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
 	.emit_frame_size =
-		17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */
+		17 + /* vce_v4_0_emit_vm_flush */
 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
 		1, /* vce_v4_0_ring_insert_end */
 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
-- 
cgit v1.2.3


From 4789c463cb04d725170406873caaec5208c99eb9 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 31 Mar 2017 11:03:50 +0200
Subject: drm/amdgpu: assign VM invalidation engine manually v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For Vega10 we have 18 VM invalidation engines for each VMHUB.

Start to assign them manually to the rings.

v2: add a BUG_ON if we use to many engines

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 16 ++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c    |  4 ++--
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c    |  2 +-
 6 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index de12f7549e6d..944443c5b90a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -179,6 +179,7 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64			cond_exe_gpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
+	unsigned		vm_inv_eng;
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1badd294ff18..87596e48f65d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2959,7 +2959,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 7f5cc75f0da5..a71521e10763 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -386,6 +386,22 @@ static int gmc_v9_0_early_init(void *handle)
 static int gmc_v9_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 0 };
+	unsigned i;
+
+	for(i = 0; i < adev->num_rings; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+		unsigned vmhub = ring->funcs->vmhub;
+
+		ring->vm_inv_eng = vm_inv_eng[vmhub]++;
+		dev_info(adev->dev, "ring %u uses VM inv eng %u on hub %u\n",
+			 ring->idx, ring->vm_inv_eng, ring->funcs->vmhub);
+	}
+
+	/* Engine 17 is used for GART flushes */
+	for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
+		BUG_ON(vm_inv_eng[i] > 17);
+
 	return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index c3cf6bcadc60..88c5d8f6f414 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1041,7 +1041,7 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index dc3a2145a452..c646570d1421 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1037,7 +1037,7 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	uint32_t data0, data1, mask;
-	unsigned eng = ring->idx;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
@@ -1078,7 +1078,7 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 2ffafbe6cd80..72d0edb9bc39 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -970,7 +970,7 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
-- 
cgit v1.2.3


From 87c910d806295df26069d0325f517ed72ce29d32 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 30 Mar 2017 16:56:20 +0200
Subject: drm/amdgpu: allow concurrent VM flushes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable concurrent VM flushes for Vega10.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 50 +++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b38a8d7714c7..4e6c1e4072ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -462,10 +462,11 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	}
 	kfree(fences);
 
-	job->vm_needs_flush = true;
+	job->vm_needs_flush = false;
 	/* Check if we can use a VMID already assigned to this VM */
 	list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
 		struct dma_fence *flushed;
+		bool needs_flush = false;
 
 		/* Check all the prerequisites to using this VMID */
 		if (amdgpu_vm_had_gpu_reset(adev, id))
@@ -477,16 +478,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (job->vm_pd_addr != id->pd_gpu_addr)
 			continue;
 
-		if (!id->last_flush)
-			continue;
-
-		if (id->last_flush->context != fence_context &&
-		    !dma_fence_is_signaled(id->last_flush))
-			continue;
+		if (!id->last_flush ||
+		    (id->last_flush->context != fence_context &&
+		     !dma_fence_is_signaled(id->last_flush)))
+			needs_flush = true;
 
 		flushed  = id->flushed_updates;
-		if (updates &&
-		    (!flushed || dma_fence_is_later(updates, flushed)))
+		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
+			needs_flush = true;
+
+		/* Concurrent flushes are only possible starting with Vega10 */
+		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
 			continue;
 
 		/* Good we can use this VMID. Remember this submission as
@@ -496,14 +498,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (r)
 			goto error;
 
-		list_move_tail(&id->list, &id_mgr->ids_lru);
-
-		job->vm_id = id - id_mgr->ids;
-		job->vm_needs_flush = false;
-		trace_amdgpu_vm_grab_id(vm, ring->idx, job);
+		if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
+			dma_fence_put(id->flushed_updates);
+			id->flushed_updates = dma_fence_get(updates);
+		}
 
-		mutex_unlock(&id_mgr->lock);
-		return 0;
+		if (needs_flush)
+			goto needs_flush;
+		else
+			goto no_flush_needed;
 
 	};
 
@@ -515,17 +518,20 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	if (r)
 		goto error;
 
-	dma_fence_put(id->last_flush);
-	id->last_flush = NULL;
-
+	id->pd_gpu_addr = job->vm_pd_addr;
 	dma_fence_put(id->flushed_updates);
 	id->flushed_updates = dma_fence_get(updates);
-
-	id->pd_gpu_addr = job->vm_pd_addr;
 	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
-	list_move_tail(&id->list, &id_mgr->ids_lru);
 	atomic64_set(&id->owner, vm->client_id);
 
+needs_flush:
+	job->vm_needs_flush = true;
+	dma_fence_put(id->last_flush);
+	id->last_flush = NULL;
+
+no_flush_needed:
+	list_move_tail(&id->list, &id_mgr->ids_lru);
+
 	job->vm_id = id - id_mgr->ids;
 	trace_amdgpu_vm_grab_id(vm, ring->idx, job);
 
-- 
cgit v1.2.3


From c5296d1401fb01ceb3a3cf781cc572b1847953c6 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 7 Apr 2017 15:31:13 +0200
Subject: drm/amdgpu: trace the vmhub in grab_id as well
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trace on which VMHUB we assigned an VMID.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 17 ++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  2 +-
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index ee9d0f346d75..a98e4b8dd136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -190,26 +190,29 @@ TRACE_EVENT(amdgpu_sched_run_job,
 
 
 TRACE_EVENT(amdgpu_vm_grab_id,
-	    TP_PROTO(struct amdgpu_vm *vm, int ring, struct amdgpu_job *job),
+	    TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		     struct amdgpu_job *job),
 	    TP_ARGS(vm, ring, job),
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_vm *, vm)
 			     __field(u32, ring)
-			     __field(u32, vmid)
+			     __field(u32, vm_id)
+			     __field(u32, vm_hub)
 			     __field(u64, pd_addr)
 			     __field(u32, needs_flush)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->vm = vm;
-			   __entry->ring = ring;
-			   __entry->vmid = job->vm_id;
+			   __entry->ring = ring->idx;
+			   __entry->vm_id = job->vm_id;
+			   __entry->vm_hub = ring->funcs->vmhub,
 			   __entry->pd_addr = job->vm_pd_addr;
 			   __entry->needs_flush = job->vm_needs_flush;
 			   ),
-	    TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx needs_flush=%u",
-		      __entry->vm, __entry->ring, __entry->vmid,
-		      __entry->pd_addr, __entry->needs_flush)
+	    TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
+		      __entry->vm, __entry->ring, __entry->vm_id,
+		      __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
 );
 
 TRACE_EVENT(amdgpu_vm_bo_map,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4e6c1e4072ca..f23f1b09d8a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -533,7 +533,7 @@ no_flush_needed:
 	list_move_tail(&id->list, &id_mgr->ids_lru);
 
 	job->vm_id = id - id_mgr->ids;
-	trace_amdgpu_vm_grab_id(vm, ring->idx, job);
+	trace_amdgpu_vm_grab_id(vm, ring, job);
 
 error:
 	mutex_unlock(&id_mgr->lock);
-- 
cgit v1.2.3


From 5f1bcf511f6e5f54fffa6365286af1e88b18f54c Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 7 Apr 2017 17:43:19 +0200
Subject: drm/amdgpu: trace vm hub during flush as well v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trace on which hub we are doing the flush.

v2: fix typo in commit message

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 20 ++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  2 +-
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index a98e4b8dd136..8601904e670a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -334,21 +334,25 @@ TRACE_EVENT(amdgpu_vm_copy_ptes,
 );
 
 TRACE_EVENT(amdgpu_vm_flush,
-	    TP_PROTO(uint64_t pd_addr, unsigned ring, unsigned id),
-	    TP_ARGS(pd_addr, ring, id),
+	    TP_PROTO(struct amdgpu_ring *ring, unsigned vm_id,
+		     uint64_t pd_addr),
+	    TP_ARGS(ring, vm_id, pd_addr),
 	    TP_STRUCT__entry(
-			     __field(u64, pd_addr)
 			     __field(u32, ring)
-			     __field(u32, id)
+			     __field(u32, vm_id)
+			     __field(u32, vm_hub)
+			     __field(u64, pd_addr)
 			     ),
 
 	    TP_fast_assign(
+			   __entry->ring = ring->idx;
+			   __entry->vm_id = vm_id;
+			   __entry->vm_hub = ring->funcs->vmhub;
 			   __entry->pd_addr = pd_addr;
-			   __entry->ring = ring;
-			   __entry->id = id;
 			   ),
-	    TP_printk("ring=%u, id=%u, pd_addr=%010Lx",
-		      __entry->ring, __entry->id, __entry->pd_addr)
+	    TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx",
+		      __entry->ring, __entry->vm_id,
+		      __entry->vm_hub,__entry->pd_addr)
 );
 
 TRACE_EVENT(amdgpu_bo_list_set,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f23f1b09d8a9..6b95176d3f75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -621,7 +621,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
 		struct dma_fence *fence;
 
-		trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
+		trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr);
 		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
 
 		r = amdgpu_fence_emit(ring, &fence);
-- 
cgit v1.2.3


From 080b24ebdf230c80fe63e64633c0d52aca5d1a8e Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 10 Apr 2017 15:32:43 -0400
Subject: drm/amdgpu: fix spelling in header comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 92262d81d41e..95260e5043af 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -297,7 +297,7 @@ union drm_amdgpu_gem_wait_idle {
 struct drm_amdgpu_wait_cs_in {
 	/* Command submission handle
          * handle equals 0 means none to wait for
-         * handle equal ~0ull meanas wait for the latest sequence number
+         * handle equals ~0ull means wait for the latest sequence number
          */
 	__u64 handle;
 	/** Absolute timeout to wait */
-- 
cgit v1.2.3


From 203eb0cb0eab990347e646c89a7134e60713133a Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 10 Apr 2017 15:36:32 -0400
Subject: drm/amdgpu: bump version number to note race fix and new fence
 functionality
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fixed in: "drm/amdgpu:fix race condition"

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4e0f7d2d87f1..bb47fb3ce341 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -63,9 +63,10 @@
  * - 3.11.0 - Add support for sensor query info (clocks, temp, etc).
  * - 3.12.0 - Add query for double offchip LDS buffers
  * - 3.13.0 - Add PRT support
+ * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	13
+#define KMS_DRIVER_MINOR	14
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
-- 
cgit v1.2.3


From 8f12bbe6d94a51f3ae314c27cc7b9b315adfe383 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 11 Apr 2017 19:20:20 +0200
Subject: drm/radeon: force the UVD DPB into VRAM as well
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Seems to be mandatory for WMV playback.

Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=100510

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_cs.c  | 10 ++++++----
 drivers/gpu/drm/radeon/radeon_uvd.c |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index df6b58c08544..3ac671f6c8e1 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -117,11 +117,13 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
 			   + !!r->write_domain;
 
-		/* the first reloc of an UVD job is the msg and that must be in
-		   VRAM, also but everything into VRAM on AGP cards and older
-		   IGP chips to avoid image corruptions */
+		/* The first reloc of an UVD job is the msg and that must be in
+		 * VRAM, the second reloc is the DPB and for WMV that must be in
+		 * VRAM as well. Also put everything into VRAM on AGP cards and older
+		 * IGP chips to avoid image corruptions
+		 */
 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
-		    (i == 0 || pci_find_capability(p->rdev->ddev->pdev,
+		    (i <= 0 || pci_find_capability(p->rdev->ddev->pdev,
 						   PCI_CAP_ID_AGP) ||
 		     p->rdev->family == CHIP_RS780 ||
 		     p->rdev->family == CHIP_RS880)) {
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index d34d1cf33895..7431eb4a11b7 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -621,7 +621,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
 	}
 
 	/* TODO: is this still necessary on NI+ ? */
-	if ((cmd == 0 || cmd == 0x3) &&
+	if ((cmd == 0 || cmd == 1 || cmd == 0x3) &&
 	    (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
 		DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
 			  start, end);
-- 
cgit v1.2.3


From 03161a6ecb7576492bb37e9bfc68a0330fc6a41d Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Thu, 13 Apr 2017 16:12:26 +0800
Subject: drm/amdgpu: fix dead lock if any ip block resume failed in s3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Driver must free the console lock whether driver resuming successful
or not. Otherwise, fb_console will be always waiting for the lock and
then cause system stuck.

[  244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
[  244.405543]       Tainted: G           OE   4.9.0-custom #1
[  244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
[  244.405543]       Tainted: G           OE   4.9.0-custom #1
[  244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  244.405550] kworker/0:0     D    0     4      2 0x00080000
[  244.405559] Workqueue: events console_callback
[  244.405564]  ffff88045a2cfc00 0000000000000000 ffff880462b75940 ffffffff81c0e500
[  244.405568]  ffff880476419280 ffffc900018f7c90 ffffffff817dcf62 000000000000003c
[  244.405572]  0000000100000000 0000000000000002 ffff880462b75940 ffff880462b75940
[  244.405573] Call Trace:
[  244.405580]  [<ffffffff817dcf62>] ? __schedule+0x222/0x6a0
[  244.405584]  [<ffffffff817dd416>] schedule+0x36/0x80
[  244.405588]  [<ffffffff817e041c>] schedule_timeout+0x1fc/0x390
[  244.405592]  [<ffffffff817df1b4>] __down_common+0xa5/0xf8
[  244.405598]  [<ffffffff810b2ca8>] ? put_prev_entity+0x48/0x710
[  244.405601]  [<ffffffff817df224>] __down+0x1d/0x1f
[  244.405606]  [<ffffffff810c71a1>] down+0x41/0x50
[  244.405611]  [<ffffffff810d380a>] console_lock+0x1a/0x40
[  244.405614]  [<ffffffff814e3c03>] console_callback+0x13/0x160
[  244.405617]  [<ffffffff817dcf6a>] ? __schedule+0x22a/0x6a0
[  244.405623]  [<ffffffff810954e3>] process_one_work+0x153/0x3f0
[  244.405628]  [<ffffffff81095cab>] worker_thread+0x12b/0x4b0
[  244.405633]  [<ffffffff81095b80>] ? rescuer_thread+0x350/0x350
[  244.405637]  [<ffffffff8109b473>] kthread+0xd3/0xf0
[  244.405641]  [<ffffffff8109b3a0>] ? kthread_park+0x60/0x60
[  244.405645]  [<ffffffff8109b3a0>] ? kthread_park+0x60/0x60
[  244.405649]  [<ffffffff817e1ee5>] ret_from_fork+0x25/0x30

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 724b4c1c80f6..fbda93a87648 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2215,7 +2215,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	struct drm_connector *connector;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_crtc *crtc;
-	int r;
+	int r = 0;
 
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
@@ -2227,11 +2227,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 		pci_set_power_state(dev->pdev, PCI_D0);
 		pci_restore_state(dev->pdev);
 		r = pci_enable_device(dev->pdev);
-		if (r) {
-			if (fbcon)
-				console_unlock();
-			return r;
-		}
+		if (r)
+			goto unlock;
 	}
 	if (adev->is_atom_fw)
 		amdgpu_atomfirmware_scratch_regs_restore(adev);
@@ -2248,7 +2245,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	r = amdgpu_resume(adev);
 	if (r) {
 		DRM_ERROR("amdgpu_resume failed (%d).\n", r);
-		return r;
+		goto unlock;
 	}
 	amdgpu_fence_driver_resume(adev);
 
@@ -2259,11 +2256,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	}
 
 	r = amdgpu_late_init(adev);
-	if (r) {
-		if (fbcon)
-			console_unlock();
-		return r;
-	}
+	if (r)
+		goto unlock;
 
 	/* pin cursors */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -2313,12 +2307,14 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	dev->dev->power.disable_depth--;
 #endif
 
-	if (fbcon) {
+	if (fbcon)
 		amdgpu_fbdev_set_suspend(adev, 0);
+
+unlock:
+	if (fbcon)
 		console_unlock();
-	}
 
-	return 0;
+	return r;
 }
 
 static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)
-- 
cgit v1.2.3


From b3124dfcce8147e1340a06952f32443dc7e102b1 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Fri, 14 Apr 2017 10:50:03 +0800
Subject: drm/amdgpu: fix to print incorrect wptr address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 88c5d8f6f414..bc8313b5be0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -289,8 +289,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 				"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
 				"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x \n",
 				me,
-				me,
 				lower_32_bits(ring->wptr << 2),
+				me,
 				upper_32_bits(ring->wptr << 2));
 		WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
 		WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
-- 
cgit v1.2.3


From aef1ba58f5c60ec2de38258cb76db7842ccb6e1b Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 17 Apr 2017 15:32:52 +0800
Subject: drm/ttm: cleanup unuse ret value

The ret must be 0 here, otherwise, the function will return after init_mem_type.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e44626a2e698..a6d7fcb99c0b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1394,7 +1394,7 @@ EXPORT_SYMBOL(ttm_bo_evict_mm);
 int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 			unsigned long p_size)
 {
-	int ret = -EINVAL;
+	int ret;
 	struct ttm_mem_type_manager *man;
 	unsigned i;
 
@@ -1412,7 +1412,6 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 		return ret;
 	man->bdev = bdev;
 
-	ret = 0;
 	if (type != TTM_PL_SYSTEM) {
 		ret = (*man->func->init)(man, p_size);
 		if (ret)
-- 
cgit v1.2.3


From 79690b84db904669256fe3ef66dd175ce201817d Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 17 Apr 2017 18:46:57 +0800
Subject: drm/amdgpu: Remove redundant itermediate return val in sdma_v4_0.c

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index bc8313b5be0b..91947753946a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -744,10 +744,8 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
 	if (r)
 		return r;
 	r = sdma_v4_0_rlc_resume(adev);
-	if (r)
-		return r;
 
-	return 0;
+	return r;
 }
 
 /**
@@ -1158,8 +1156,6 @@ static int sdma_v4_0_hw_init(void *handle)
 	sdma_v4_0_init_golden_registers(adev);
 
 	r = sdma_v4_0_start(adev);
-	if (r)
-		return r;
 
 	return r;
 }
-- 
cgit v1.2.3


From 4bdcc4ea3ac11eddacbadd43128e1aba0122700b Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 17 Apr 2017 19:44:23 +0800
Subject: drm/amd/amdgpu: coding style refine in sdma_v4_0.c

Replace 8 spaces with tabs.
correct {} braces, etc.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 151 ++++++++++++++++-----------------
 1 file changed, 75 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 91947753946a..ecc70a730a54 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -48,8 +48,7 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
 
-static const u32 golden_settings_sdma_4[] =
-{
+static const u32 golden_settings_sdma_4[] = {
 	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07,
 	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100,
 	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100,
@@ -76,8 +75,7 @@ static const u32 golden_settings_sdma_4[] =
 	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0
 };
 
-static const u32 golden_settings_sdma_vg10[] =
-{
+static const u32 golden_settings_sdma_vg10[] = {
 	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002,
 	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002,
 	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002,
@@ -87,16 +85,17 @@ static const u32 golden_settings_sdma_vg10[] =
 static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset)
 {
 	u32 base = 0;
+
 	switch (instance) {
-		case 0:
-			base = SDMA0_BASE.instance[0].segment[0];
-			break;
-		case 1:
-			base = SDMA1_BASE.instance[0].segment[0];
-			break;
-		default:
-			BUG();
-			break;
+	case 0:
+		base = SDMA0_BASE.instance[0].segment[0];
+		break;
+	case 1:
+		base = SDMA1_BASE.instance[0].segment[0];
+		break;
+	default:
+		BUG();
+		break;
 	}
 
 	return base + internal_offset;
@@ -159,7 +158,8 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_VEGA10:
 		chip_name = "vega10";
 		break;
-	default: BUG();
+	default:
+		BUG();
 	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -179,7 +179,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 		if (adev->sdma.instance[i].feature_version >= 20)
 			adev->sdma.instance[i].burst_nop = true;
 		DRM_DEBUG("psp_load == '%s'\n",
-				adev->firmware.load_type == AMDGPU_FW_LOAD_PSP? "true": "false");
+				adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
 
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
@@ -192,9 +192,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 	}
 out:
 	if (err) {
-		printk(KERN_ERR
-		       "sdma_v4_0: Failed to load firmware \"%s\"\n",
-		       fw_name);
+		DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
 		for (i = 0; i < adev->sdma.num_instances; i++) {
 			release_firmware(adev->sdma.instance[i].fw);
 			adev->sdma.instance[i].fw = NULL;
@@ -212,10 +210,10 @@ out:
  */
 static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
 {
-	u64* rptr;
+	u64 *rptr;
 
 	/* XXX check if swapping is necessary on BE */
-	rptr =((u64*)&ring->adev->wb.wb[ring->rptr_offs]);
+	rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
 
 	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
 	return ((*rptr) >> 2);
@@ -231,19 +229,20 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
 static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	u64* wptr = NULL;
-	uint64_t local_wptr=0;
+	u64 *wptr = NULL;
+	uint64_t local_wptr = 0;
 
 	if (ring->use_doorbell) {
 		/* XXX check if swapping is necessary on BE */
-		wptr = ((u64*)&adev->wb.wb[ring->wptr_offs]);
+		wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
 		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
 		*wptr = (*wptr) >> 2;
 		DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
 	} else {
 		u32 lowbit, highbit;
 		int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
-		wptr=&local_wptr;
+
+		wptr = &local_wptr;
 		lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2;
 		highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
 
@@ -285,9 +284,10 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
 	} else {
 		int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
+
 		DRM_DEBUG("Not using doorbell -- "
 				"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
-				"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x \n",
+				"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
 				me,
 				lower_32_bits(ring->wptr << 2),
 				me,
@@ -319,22 +319,22 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  * Schedule an IB in the DMA ring (VEGA10).
  */
 static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
-                                   struct amdgpu_ib *ib,
-                                   unsigned vm_id, bool ctx_switch)
+					struct amdgpu_ib *ib,
+					unsigned vm_id, bool ctx_switch)
 {
-        u32 vmid = vm_id & 0xf;
+	u32 vmid = vm_id & 0xf;
 
-        /* IB packet must end on a 8 DW boundary */
-        sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+	/* IB packet must end on a 8 DW boundary */
+	sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
 
-        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
-                          SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
-        /* base must be 32 byte aligned */
-        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
-        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-        amdgpu_ring_write(ring, ib->length_dw);
-        amdgpu_ring_write(ring, 0);
-        amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
+	/* base must be 32 byte aligned */
+	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, ib->length_dw);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, 0);
 
 }
 
@@ -523,7 +523,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
 	u32 doorbell;
 	u32 doorbell_offset;
 	u32 temp;
-	int i,r;
+	int i, r;
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		ring = &adev->sdma.instance[i].ring;
@@ -572,7 +572,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
 		doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL));
 		doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET));
 
-		if (ring->use_doorbell){
+		if (ring->use_doorbell) {
 			doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
 			doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
 					OFFSET, ring->doorbell_index);
@@ -694,9 +694,7 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
 
 
 		for (j = 0; j < fw_size; j++)
-		{
 			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
-		}
 
 		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
 	}
@@ -795,9 +793,8 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = le32_to_cpu(adev->wb.wb[index]);
-		if (tmp == 0xDEADBEEF) {
+		if (tmp == 0xDEADBEEF)
 			break;
-		}
 		DRM_UDELAY(1);
 	}
 
@@ -862,29 +859,29 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err1;
 
-        r = dma_fence_wait_timeout(f, false, timeout);
-        if (r == 0) {
-                DRM_ERROR("amdgpu: IB test timed out\n");
-                r = -ETIMEDOUT;
-                goto err1;
-        } else if (r < 0) {
-                DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
-                goto err1;
-        }
-        tmp = le32_to_cpu(adev->wb.wb[index]);
-        if (tmp == 0xDEADBEEF) {
-                DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
-                r = 0;
-        } else {
-                DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
-                r = -EINVAL;
-        }
+	r = dma_fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
+		goto err1;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+		goto err1;
+	}
+	tmp = le32_to_cpu(adev->wb.wb[index]);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
+	} else {
+		DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
+		r = -EINVAL;
+	}
 err1:
-        amdgpu_ib_free(adev, &ib, NULL);
-        dma_fence_put(f);
+	amdgpu_ib_free(adev, &ib, NULL);
+	dma_fence_put(f);
 err0:
-        amdgpu_wb_free(adev, index);
-        return r;
+	amdgpu_wb_free(adev, index);
+	return r;
 }
 
 
@@ -1191,10 +1188,12 @@ static bool sdma_v4_0_is_idle(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	u32 i;
+
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG));
+
 		if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
-	   		 return false;
+			return false;
 	}
 
 	return true;
@@ -1203,8 +1202,9 @@ static bool sdma_v4_0_is_idle(void *handle)
 static int sdma_v4_0_wait_for_idle(void *handle)
 {
 	unsigned i;
-	u32 sdma0,sdma1;
+	u32 sdma0, sdma1;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
 	for (i = 0; i < adev->usec_timeout; i++) {
 		sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG));
 		sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG));
@@ -1232,7 +1232,7 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
 
 	u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
 		sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) :
-	       	sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL);
+		sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL);
 
 	sdma_cntl = RREG32(reg_offset);
 	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
@@ -1324,7 +1324,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating(
 				  SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
 				  SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
 				  SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
-			if(def != data)
+			if (def != data)
 				WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
 		}
 	} else {
@@ -1374,17 +1374,17 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
 
 		/* 1-not override: enable sdma1 mem light sleep */
 		if (adev->asic_type == CHIP_VEGA10) {
-			 def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
-			 data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
-			 if (def != data)
-				 WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
+			def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
+			data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+			if (def != data)
+				WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
 		}
 	} else {
 		/* 0-override:disable sdma0 mem light sleep */
 		def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
 		data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
 		if (def != data)
-		       WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
+			WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
 
 		/* 0-override:disable sdma1 mem light sleep */
 		if (adev->asic_type == CHIP_VEGA10) {
@@ -1599,8 +1599,7 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
 	}
 }
 
-const struct amdgpu_ip_block_version sdma_v4_0_ip_block =
-{
+const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
 	.type = AMD_IP_BLOCK_TYPE_SDMA,
 	.major = 4,
 	.minor = 0,
-- 
cgit v1.2.3


From 2211a787dee798131a4774ee7df9aa758ebe07aa Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 17 Apr 2017 20:46:29 +0800
Subject: drm/amd/powerplay: delete dead functions in vega10.

Vega10 does not support AVFS BTC, remove function.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 30 ++--------------------
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c   |  9 -------
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h   |  1 -
 3 files changed, 2 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 561b837b09be..3e11abe9cd0a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -2411,35 +2411,9 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE(!result,
 			"Failed to upload PPtable!", return result);
 
-	if (data->smu_features[GNLD_AVFS].supported) {
-		uint32_t features_enabled;
-		result = vega10_get_smc_features(hwmgr->smumgr, &features_enabled);
-		PP_ASSERT_WITH_CODE(!result,
-				"Failed to Retrieve Enabled Features!",
-				return result);
-		if (!(features_enabled & (1 << FEATURE_AVFS_BIT))) {
-			result = vega10_perform_btc(hwmgr->smumgr);
-			PP_ASSERT_WITH_CODE(!result,
-					"Failed to Perform BTC!",
+	result = vega10_avfs_enable(hwmgr, true);
+	PP_ASSERT_WITH_CODE(!result, "Attempt to enable AVFS feature Failed!",
 					return result);
-			result = vega10_avfs_enable(hwmgr, true);
-			PP_ASSERT_WITH_CODE(!result,
-					"Attempt to enable AVFS feature Failed!",
-					return result);
-			result = vega10_save_vft_table(hwmgr->smumgr,
-					(uint8_t *)&(data->smc_state_table.avfs_table));
-			PP_ASSERT_WITH_CODE(!result,
-					"Attempt to save VFT table Failed!",
-					return result);
-		} else {
-			data->smu_features[GNLD_AVFS].enabled = true;
-			result = vega10_restore_vft_table(hwmgr->smumgr,
-					(uint8_t *)&(data->smc_state_table.avfs_table));
-			PP_ASSERT_WITH_CODE(!result,
-					"Attempt to restore VFT table Failed!",
-					return result;);
-		}
-	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 2685f02ab551..7a36a7404f73 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -299,15 +299,6 @@ int vega10_copy_table_to_smc(struct pp_smumgr *smumgr,
 	return 0;
 }
 
-int vega10_perform_btc(struct pp_smumgr *smumgr)
-{
-	PP_ASSERT_WITH_CODE(!vega10_send_msg_to_smc_with_parameter(
-			smumgr, PPSMC_MSG_RunBtc, 0),
-			"Attempt to run DC BTC Failed!",
-			return -1);
-	return 0;
-}
-
 int vega10_save_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table)
 {
 	PP_ASSERT_WITH_CODE(avfs_table,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h
index ad050212426d..71e9b6492647 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h
@@ -62,7 +62,6 @@ int vega10_get_smc_features(struct pp_smumgr *smumgr,
 		uint32_t *features_enabled);
 int vega10_save_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table);
 int vega10_restore_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table);
-int vega10_perform_btc(struct pp_smumgr *smumgr);
 
 int vega10_set_tools_address(struct pp_smumgr *smumgr);
 
-- 
cgit v1.2.3


From ad2fed9ad5907c35e132e43420a0e47ab22350f0 Mon Sep 17 00:00:00 2001
From: Junwei Zhang <Jerry.Zhang@amd.com>
Date: Thu, 23 Feb 2017 11:01:40 +0800
Subject: drm/amdgpu: fix double_offchip_lds_buf for gfx v6

Was incorrect for SI.

Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 4c4874fdf59f..0ce977783943 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1579,7 +1579,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
 
 static void gfx_v6_0_config_init(struct amdgpu_device *adev)
 {
-	adev->gfx.config.double_offchip_lds_buf = 1;
+	adev->gfx.config.double_offchip_lds_buf = 0;
 }
 
 static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
-- 
cgit v1.2.3


From ee73164a0d8d2fd98f666a5dd35da1d9a19ec009 Mon Sep 17 00:00:00 2001
From: Pixel Ding <Pixel.Ding@amd.com>
Date: Thu, 23 Feb 2017 11:10:33 +0800
Subject: drm/amdgpu/virt: don't check VALID bit for FLR completion message

The interrupt after FLR is missed sometimes due to hardware reason, so
guest driver get the notification of FLR completion via polling
message. Then host doesn't write VALID bit to avoid sending interrupt,
otherwise the completion will be handled twice.

So there's a valid message without VALID bit for FLR completion,
driver should handle it without checking.

Signed-off-by: Pixel Ding <Pixel.Ding@amd.com>
Reviewed-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 70a3dd13cb02..7bdc51b02326 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -368,9 +368,12 @@ static int xgpu_vi_mailbox_rcv_msg(struct amdgpu_device *adev,
 	u32 reg;
 	u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID);
 
-	reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
-	if (!(reg & mask))
-		return -ENOENT;
+	/* workaround: host driver doesn't set VALID for CMPL now */
+	if (event != IDH_FLR_NOTIFICATION_CMPL) {
+		reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
+		if (!(reg & mask))
+			return -ENOENT;
+	}
 
 	reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
 	if (reg != event)
-- 
cgit v1.2.3


From 8972e5d26996e3f04e1cf29b7d7edd3ec5614bf2 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 6 Mar 2017 13:34:57 +0100
Subject: drm/amdgpu: fix coding style and printing in amdgpu_doorbell_init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on commit "drm/radeon: remove useless and potentially wrong message".

The size of the info printing is incorrect and the PCI subsystems prints
the same info on boot anyway.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fbda93a87648..8aad6f4a5241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -422,12 +422,11 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev)
 	if (adev->doorbell.num_doorbells == 0)
 		return -EINVAL;
 
-	adev->doorbell.ptr = ioremap(adev->doorbell.base, adev->doorbell.num_doorbells * sizeof(u32));
-	if (adev->doorbell.ptr == NULL) {
+	adev->doorbell.ptr = ioremap(adev->doorbell.base,
+				     adev->doorbell.num_doorbells *
+				     sizeof(u32));
+	if (adev->doorbell.ptr == NULL)
 		return -ENOMEM;
-	}
-	DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)adev->doorbell.base);
-	DRM_INFO("doorbell mmio size: %u\n", (unsigned)adev->doorbell.size);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 98c24b24dfef209eb8bb164550e1567c7ba82c52 Mon Sep 17 00:00:00 2001
From: Xiangliang Yu <Xiangliang.Yu@amd.com>
Date: Fri, 14 Apr 2017 17:40:57 +0800
Subject: drm/amdgpu/mmhub_v1: bypass clockgating setting

For SRIOV doesn't need CG, so bypass it.

Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 62684510ddcd..dbfe48d1207a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -511,6 +511,9 @@ static int mmhub_v1_0_set_clockgating_state(void *handle,
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 		mmhub_v1_0_update_medium_grain_clock_gating(adev,
-- 
cgit v1.2.3


From fb82afab134ab71b3b52224b6da12daea8662c28 Mon Sep 17 00:00:00 2001
From: Xiangliang Yu <Xiangliang.Yu@amd.com>
Date: Fri, 14 Apr 2017 17:43:02 +0800
Subject: drm/amdgpu/gfx9: bypass clockgating setting

For SRIOV doesn't need clockgating, bypass it.

Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 87596e48f65d..cc60dafb7d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2740,6 +2740,9 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 		gfx_v9_0_update_gfx_clock_gating(adev,
-- 
cgit v1.2.3


From 7ef69843def65bdbf69a5be784e6a9e080f22ef6 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 18 Apr 2017 19:21:44 +0800
Subject: drm/amdgpu: fix memory clock can't switch on CI.

if we set only lowest mclk level enabled,
when we enable uvd dpm during boot time,
mclk will be fixed in the lowest level.
the mclk switch will fail if try to enable
other level of mclk at this time.
so set all mclk levels enabled.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 11ccda83d767..a6dda3470003 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -3036,6 +3036,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
 						      memory_clock,
 						      &memory_level->MinVddcPhases);
 
+	memory_level->EnabledForActivity = 1;
 	memory_level->EnabledForThrottle = 1;
 	memory_level->UpH = 0;
 	memory_level->DownH = 100;
@@ -3468,8 +3469,6 @@ static int ci_populate_all_memory_levels(struct amdgpu_device *adev)
 			return ret;
 	}
 
-	pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1;
-
 	if ((dpm_table->mclk_table.count >= 2) &&
 	    ((adev->pdev->device == 0x67B0) || (adev->pdev->device == 0x67B1))) {
 		pi->smc_state_table.MemoryLevel[1].MinVddc =
-- 
cgit v1.2.3


From 1c4ecf48cfee8e138c6483bafd1425628a413315 Mon Sep 17 00:00:00 2001
From: Trigger Huang <trigger.huang@amd.com>
Date: Mon, 17 Apr 2017 10:56:02 -0400
Subject: drm/amdgpu: Fix module unload hang by KIQ on Vega10

Apply commit 4e683cb2644f ("drm/amdgpu: Fix module unload hang by
KIQ IRQ set")to vega10
V2:
	delete reduant kiq irq funcs type check (suggested by Rex.Zhu)

Signed-off-by: Trigger Huang <trigger.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index cc60dafb7d94..574c017a50a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -631,7 +631,6 @@ static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev,
 		ring->pipe = 1;
 	}
 
-	irq->data = ring;
 	ring->queue = 0;
 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
 	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
@@ -647,7 +646,6 @@ static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring,
 {
 	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
 	amdgpu_ring_fini(ring);
-	irq->data = NULL;
 }
 
 /* create MQD for each compute queue */
@@ -3370,9 +3368,7 @@ static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
 					    enum amdgpu_interrupt_state state)
 {
 	uint32_t tmp, target;
-	struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
-
-	BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
+	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
 	if (ring->me == 1)
 		target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
@@ -3416,9 +3412,7 @@ static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev,
 			    struct amdgpu_iv_entry *entry)
 {
 	u8 me_id, pipe_id, queue_id;
-	struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
-
-	BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
+	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
 	me_id = (entry->ring_id & 0x0c) >> 2;
 	pipe_id = (entry->ring_id & 0x03) >> 0;
-- 
cgit v1.2.3


From 15ff510bbfd7170fa5af8924fe0503f4d116e7d2 Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Wed, 19 Apr 2017 09:01:42 -0400
Subject: drm/amd/amdgpu: Change comp GFXv6 ring name to remove space
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

umr expects the ring name to be a complete word.  This also
makes it consistent with GFXv7/8.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 0ce977783943..6a429e927297 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3292,7 +3292,7 @@ static int gfx_v6_0_sw_init(void *handle)
 		ring->me = 1;
 		ring->pipe = i;
 		ring->queue = i;
-		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
+		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
 		r = amdgpu_ring_init(adev, ring, 1024,
 				     &adev->gfx.eop_irq, irq_type);
-- 
cgit v1.2.3


From e182e234c8658c0880401052fd364b1a1a59b03d Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Wed, 19 Apr 2017 09:02:41 -0400
Subject: drm/amd/amdgpu: Change comp GFXv9 ring name to remove space
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

umr expects the ring name to be a complete word.  This also
makes it consistent with GFXv7/8.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 574c017a50a2..9a9cb90e2f5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1094,7 +1094,7 @@ static int gfx_v9_0_sw_init(void *handle)
 		ring->pipe = i / 8;
 		ring->queue = i % 8;
 		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
-		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
+		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
 		r = amdgpu_ring_init(adev, ring, 1024,
-- 
cgit v1.2.3


From 2bde7c32b1db162692f05c6be066b5bcd3d9fdbe Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 21 Apr 2017 13:48:08 +0200
Subject: ACPI / PMIC: xpower: Fix power_table addresses

The power table addresses should be contiguous, but there was a hole
where 0x34 was missing. On most devices this is not a problem as
addresses above 0x34 are used for the BUC# convertors which are not
used in the DSDTs I've access to but after the BUC# convertors
there is a field named GPI1 in the DSTDs, which does get used in some
cases and ended up turning BUC6 on and off due to the wrong addresses,
resulting in turning the entire device off (or causing it to reboot).

Removing the hole in the addresses fixes this, fixing one of my
Bay Trail tablets turning off while booting the mainline kernel.

While at it add comments with the field names used in the DSDTs to
make it easier to compare the register and bits used at each address
with the datasheet.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/pmic/intel_pmic_xpower.c | 50 +++++++++++++++++------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/acpi/pmic/intel_pmic_xpower.c b/drivers/acpi/pmic/intel_pmic_xpower.c
index 55f51115f016..1a76c784cd4c 100644
--- a/drivers/acpi/pmic/intel_pmic_xpower.c
+++ b/drivers/acpi/pmic/intel_pmic_xpower.c
@@ -27,97 +27,97 @@ static struct pmic_table power_table[] = {
 		.address = 0x00,
 		.reg = 0x13,
 		.bit = 0x05,
-	},
+	}, /* ALD1 */
 	{
 		.address = 0x04,
 		.reg = 0x13,
 		.bit = 0x06,
-	},
+	}, /* ALD2 */
 	{
 		.address = 0x08,
 		.reg = 0x13,
 		.bit = 0x07,
-	},
+	}, /* ALD3 */
 	{
 		.address = 0x0c,
 		.reg = 0x12,
 		.bit = 0x03,
-	},
+	}, /* DLD1 */
 	{
 		.address = 0x10,
 		.reg = 0x12,
 		.bit = 0x04,
-	},
+	}, /* DLD2 */
 	{
 		.address = 0x14,
 		.reg = 0x12,
 		.bit = 0x05,
-	},
+	}, /* DLD3 */
 	{
 		.address = 0x18,
 		.reg = 0x12,
 		.bit = 0x06,
-	},
+	}, /* DLD4 */
 	{
 		.address = 0x1c,
 		.reg = 0x12,
 		.bit = 0x00,
-	},
+	}, /* ELD1 */
 	{
 		.address = 0x20,
 		.reg = 0x12,
 		.bit = 0x01,
-	},
+	}, /* ELD2 */
 	{
 		.address = 0x24,
 		.reg = 0x12,
 		.bit = 0x02,
-	},
+	}, /* ELD3 */
 	{
 		.address = 0x28,
 		.reg = 0x13,
 		.bit = 0x02,
-	},
+	}, /* FLD1 */
 	{
 		.address = 0x2c,
 		.reg = 0x13,
 		.bit = 0x03,
-	},
+	}, /* FLD2 */
 	{
 		.address = 0x30,
 		.reg = 0x13,
 		.bit = 0x04,
-	},
+	}, /* FLD3 */
 	{
-		.address = 0x38,
+		.address = 0x34,
 		.reg = 0x10,
 		.bit = 0x03,
-	},
+	}, /* BUC1 */
 	{
-		.address = 0x3c,
+		.address = 0x38,
 		.reg = 0x10,
 		.bit = 0x06,
-	},
+	}, /* BUC2 */
 	{
-		.address = 0x40,
+		.address = 0x3c,
 		.reg = 0x10,
 		.bit = 0x05,
-	},
+	}, /* BUC3 */
 	{
-		.address = 0x44,
+		.address = 0x40,
 		.reg = 0x10,
 		.bit = 0x04,
-	},
+	}, /* BUC4 */
 	{
-		.address = 0x48,
+		.address = 0x44,
 		.reg = 0x10,
 		.bit = 0x01,
-	},
+	}, /* BUC5 */
 	{
-		.address = 0x4c,
+		.address = 0x48,
 		.reg = 0x10,
 		.bit = 0x00
-	},
+	}, /* BUC6 */
 };
 
 /* TMP0 - TMP5 are the same, all from GPADC */
-- 
cgit v1.2.3


From 775f55f1b5b65c6cfc52555d9a8b80de5429d21b Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Wed, 19 Apr 2017 11:03:04 -0400
Subject: drm/amd/amdgpu: Print out ring name in dev_info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So it's more obvious which rings are using which INV engines.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index a71521e10763..edf43769ae70 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -394,8 +394,9 @@ static int gmc_v9_0_late_init(void *handle)
 		unsigned vmhub = ring->funcs->vmhub;
 
 		ring->vm_inv_eng = vm_inv_eng[vmhub]++;
-		dev_info(adev->dev, "ring %u uses VM inv eng %u on hub %u\n",
-			 ring->idx, ring->vm_inv_eng, ring->funcs->vmhub);
+		dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n",
+			 ring->idx, ring->name, ring->vm_inv_eng,
+			 ring->funcs->vmhub);
 	}
 
 	/* Engine 17 is used for GART flushes */
-- 
cgit v1.2.3


From e0b2f8cff15b2d20a427c381247a8ff6bb28d21e Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 12 Apr 2017 17:34:26 +0800
Subject: drm/amdgpu: update smu9 driver interface

Updated interface between the driver and the SMU controller.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
index 2037910adcb1..d43f98a910b0 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
@@ -30,7 +30,7 @@
  * SMU TEAM: Always increment the interface version if
  * any structure is changed in this file
  */
-#define SMU9_DRIVER_IF_VERSION 0xB
+#define SMU9_DRIVER_IF_VERSION 0xD
 
 #define PPTABLE_V10_SMU_VERSION 1
 
@@ -302,7 +302,17 @@ typedef struct {
 
   uint32_t     DpmLevelPowerDelta;
 
-  uint32_t     Reserved[19];
+  uint8_t      EnableBoostState;
+  uint8_t      AConstant_Shift;
+  uint8_t      DC_tol_sigma_Shift;
+  uint8_t      PSM_Age_CompFactor_Shift;
+
+  uint16_t     BoostStartTemperature;
+  uint16_t     BoostStopTemperature;
+
+  PllSetting_t GfxBoostState;
+
+  uint32_t     Reserved[14];
 
   /* Padding - ignore */
   uint32_t     MmHubPadding[7]; /* SMU internal use */
@@ -464,4 +474,8 @@ typedef struct {
 #define DB_PCC_SHIFT 26
 #define DB_EDC_SHIFT 27
 
+#define REMOVE_FMAX_MARGIN_BIT     0x0
+#define REMOVE_DCTOL_MARGIN_BIT    0x1
+#define REMOVE_PLATFORM_MARGIN_BIT 0x2
+
 #endif
-- 
cgit v1.2.3


From e3c5e9826d60630236de105c05e7a58f12c45ab5 Mon Sep 17 00:00:00 2001
From: Trigger Huang <trigger.huang@amd.com>
Date: Mon, 17 Apr 2017 08:50:18 -0400
Subject: drm/amdgpu: Destroy psp ring in hw_fini

Fix issue that PSP initialization will fail if reload amdgpu module.
That's because the PSP ring must be destroyed to be ready for the
next time PSP initialization.

Changes in v2:
	- Move psp_ring_destroy before all BOs free (suggested by
	  Ray Huang).
Changes in v3:
	- Check firmware load type, if it is not PSP, we should do
	  nothing in fw_fini(), and of course will not destroy
	  PSP ring too (suggested by Ray Huang).

Signed-off-by: Trigger Huang <trigger.huang@amd.com>
Reviewed-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c |  9 +++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  3 +++
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c   | 27 +++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.h   |  2 ++
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 19180aaa8bf2..1e380fe29b5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -56,6 +56,7 @@ static int psp_sw_init(void *handle)
 		psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf;
 		psp->ring_init = psp_v3_1_ring_init;
 		psp->ring_create = psp_v3_1_ring_create;
+		psp->ring_destroy = psp_v3_1_ring_destroy;
 		psp->cmd_submit = psp_v3_1_cmd_submit;
 		psp->compare_sram_data = psp_v3_1_compare_sram_data;
 		psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
@@ -411,8 +412,12 @@ static int psp_hw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct psp_context *psp = &adev->psp;
 
-	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
-		amdgpu_ucode_fini_bo(adev);
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		return 0;
+
+	amdgpu_ucode_fini_bo(adev);
+
+	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
 	if (psp->tmr_buf)
 		amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 28faba5b7dd6..0301e4e0b297 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -66,6 +66,8 @@ struct psp_context
 			    struct psp_gfx_cmd_resp *cmd);
 	int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
 	int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type);
+	int (*ring_destroy)(struct psp_context *psp,
+			    enum psp_ring_type ring_type);
 	int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode,
 			  uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index);
 	bool (*compare_sram_data)(struct psp_context *psp,
@@ -116,6 +118,7 @@ struct amdgpu_psp_funcs {
 #define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type))
 #define psp_ring_init(psp, type) (psp)->ring_init((psp), (type))
 #define psp_ring_create(psp, type) (psp)->ring_create((psp), (type))
+#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type)))
 #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
 		(psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
 #define psp_compare_sram_data(psp, ucode, type) \
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index d351583785e5..60a6407ba267 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -321,6 +321,33 @@ int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
 	return ret;
 }
 
+int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type)
+{
+	int ret = 0;
+	struct psp_ring *ring;
+	unsigned int psp_ring_reg = 0;
+	struct amdgpu_device *adev = psp->adev;
+
+	ring = &psp->km_ring;
+
+	/* Write the ring destroy command to C2PMSG_64 */
+	psp_ring_reg = 3 << 16;
+	WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg);
+
+	/* there might be handshake issue with hardware which needs delay */
+	mdelay(20);
+
+	/* Wait for response flag (bit 31) in C2PMSG_64 */
+	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+			   0x80000000, 0x80000000, false);
+
+	if (ring->ring_mem)
+		amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+				      &ring->ring_mem_mc_addr,
+				      (void **)&ring->ring_mem);
+	return ret;
+}
+
 int psp_v3_1_cmd_submit(struct psp_context *psp,
 		        struct amdgpu_firmware_info *ucode,
 		        uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
index 5230d27867c9..9dcd0b25c4c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
@@ -41,6 +41,8 @@ extern int psp_v3_1_ring_init(struct psp_context *psp,
 			      enum psp_ring_type ring_type);
 extern int psp_v3_1_ring_create(struct psp_context *psp,
 				enum psp_ring_type ring_type);
+extern int psp_v3_1_ring_destroy(struct psp_context *psp,
+				enum psp_ring_type ring_type);
 extern int psp_v3_1_cmd_submit(struct psp_context *psp,
 			       struct amdgpu_firmware_info *ucode,
 			       uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
-- 
cgit v1.2.3


From fc6aa33da4b1043ad1b337d051770993418256d2 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 19 Apr 2017 14:41:19 +0200
Subject: drm/amdgpu: fix amdgpu_vm_clear_freed v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use amdgpu_vm_bo_update_mapping() instead of amdgpu_vm_bo_split_mapping() here.

We don't want any flags set in the cleared areas and splitting
shouldn't be necessary.

v2: fix typo in commit message

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6b95176d3f75..f34d822f92ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1631,8 +1631,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 
-		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
-					       0, 0, &f);
+		r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm,
+						mapping->start, mapping->last,
+						0, 0, &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
 		if (r) {
 			dma_fence_put(f);
-- 
cgit v1.2.3


From d0766e981b36608f9fe9b29985d4cd696099c3f8 Mon Sep 17 00:00:00 2001
From: "Zhang, Jerry" <Jerry.Zhang@amd.com>
Date: Wed, 19 Apr 2017 09:53:29 +0800
Subject: drm/amdgpu: PRT support for gfx9 (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix PRT handling on gfx9

v2: unify PRT bit for all ASICs
v3: move PRT flag checking in amdgpu_vm_bo_split_mapping()

Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Acked-by: David Zhou <david1.zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f34d822f92ac..c42a9979d056 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1338,6 +1338,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	flags &= ~AMDGPU_PTE_MTYPE_MASK;
 	flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
 
+	if ((mapping->flags & AMDGPU_PTE_PRT) &&
+	    (adev->asic_type >= CHIP_VEGA10)) {
+		flags |= AMDGPU_PTE_PRT;
+		flags &= ~AMDGPU_PTE_VALID;
+	}
+
 	trace_amdgpu_vm_bo_update(mapping);
 
 	pfn = mapping->offset >> PAGE_SHIFT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 661a8f6826ef..d97e28b4bdc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -65,7 +65,8 @@ struct amdgpu_bo_list_entry;
 
 #define AMDGPU_PTE_FRAG(x)	((x & 0x1fULL) << 7)
 
-#define AMDGPU_PTE_PRT		(1ULL << 63)
+/* TILED for VEGA10, reserved for older ASICs  */
+#define AMDGPU_PTE_PRT		(1ULL << 51)
 
 /* VEGA10 only */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
-- 
cgit v1.2.3


From 7ad87b96962a01830b0751d11a389b4039eb4460 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 24 Apr 2017 13:51:52 -0400
Subject: Revert "drm/amd/amdgpu: Set VCE/UVD off during late init"

This leads to hangs on init.

This reverts commit d1aff8ec49c3ece05cee9b6e63d44e96a420b068.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8aad6f4a5241..75f851bd5b63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -53,7 +53,6 @@
 #include "bif/bif_4_1_d.h"
 #include <linux/pci.h>
 #include <linux/firmware.h>
-#include "amdgpu_pm.h"
 
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
 static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
@@ -1583,9 +1582,6 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
 		}
 	}
 
-	amdgpu_dpm_enable_uvd(adev, false);
-	amdgpu_dpm_enable_vce(adev, false);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 42f72d0b3754a7ecf1208f90cef45ba89a89f2e7 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Thu, 20 Apr 2017 15:25:39 +0800
Subject: drm/amd/powerplay: add error message to remind user updating firmware

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 7a36a7404f73..21b61315fc40 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -369,8 +369,11 @@ static int vega10_verify_smc_interface(struct pp_smumgr *smumgr)
 			"Attempt to read SMC IF Version Number Failed!",
 			return -1);
 
-	if (smc_driver_if_version != SMU9_DRIVER_IF_VERSION)
-		return -1;
+	if (smc_driver_if_version != SMU9_DRIVER_IF_VERSION) {
+		pr_err("Your firmware(0x%x) doesn't match SMU9_DRIVER_IF_VERSION(0x%x). Please update your firmware!\n",
+		       smc_driver_if_version, SMU9_DRIVER_IF_VERSION);
+		return -EINVAL;
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 67131aa525d22fe21904bfd463520e98b5d67566 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 12 Apr 2017 17:32:35 +0800
Subject: drm/amd/powerplay: enable AGM logging while dpm disabled.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c   | 2 --
 drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 3e11abe9cd0a..deb112485f82 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -2583,8 +2583,6 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 			"Failed to configure telemetry!",
 			return tmp_result);
 
-	vega10_set_tools_address(hwmgr->smumgr);
-
 	smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
 			PPSMC_MSG_NumOfDisplays, 0);
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 21b61315fc40..9987d5b80d82 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -491,7 +491,7 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 	priv->smu_tables.entry[AVFSTABLE].table = kaddr;
 	priv->smu_tables.entry[AVFSTABLE].handle = handle;
 
-	tools_size = 0;
+	tools_size = 0x19000;
 	if (tools_size) {
 		smu_allocate_memory(smumgr->device,
 				tools_size,
@@ -511,6 +511,7 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 					smu_lower_32_bits(mc_addr);
 			priv->smu_tables.entry[TOOLSTABLE].table = kaddr;
 			priv->smu_tables.entry[TOOLSTABLE].handle = handle;
+			vega10_set_tools_address(smumgr);
 		}
 	}
 
-- 
cgit v1.2.3


From d475ce6296d702c16c96790a390ad7b4616f18b4 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 12 Apr 2017 17:52:07 +0800
Subject: drm/amd/powerplay: allocate fb for avfs fuse table on vega10.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c   | 36 ++++++++++++++++++++++
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h   |  1 +
 2 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 9987d5b80d82..5980f02f2ce9 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -515,6 +515,40 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 		}
 	}
 
+	/* allocate space for AVFS Fuse table */
+	smu_allocate_memory(smumgr->device,
+			sizeof(AvfsFuseOverride_t),
+			CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB,
+			PAGE_SIZE,
+			&mc_addr,
+			&kaddr,
+			&handle);
+
+	PP_ASSERT_WITH_CODE(kaddr,
+			"[vega10_smu_init] Out of memory for avfs fuse table.",
+			kfree(smumgr->backend);
+			cgs_free_gpu_mem(smumgr->device,
+			(cgs_handle_t)priv->smu_tables.entry[PPTABLE].handle);
+			cgs_free_gpu_mem(smumgr->device,
+			(cgs_handle_t)priv->smu_tables.entry[WMTABLE].handle);
+			cgs_free_gpu_mem(smumgr->device,
+			(cgs_handle_t)priv->smu_tables.entry[AVFSTABLE].handle);
+			cgs_free_gpu_mem(smumgr->device,
+			(cgs_handle_t)priv->smu_tables.entry[TOOLSTABLE].handle);
+			cgs_free_gpu_mem(smumgr->device,
+			(cgs_handle_t)handle);
+			return -1);
+
+	priv->smu_tables.entry[AVFSFUSETABLE].version = 0x01;
+	priv->smu_tables.entry[AVFSFUSETABLE].size = sizeof(AvfsFuseOverride_t);
+	priv->smu_tables.entry[AVFSFUSETABLE].table_id = TABLE_AVFS_FUSE_OVERRIDE;
+	priv->smu_tables.entry[AVFSFUSETABLE].table_addr_high =
+			smu_upper_32_bits(mc_addr);
+	priv->smu_tables.entry[AVFSFUSETABLE].table_addr_low =
+			smu_lower_32_bits(mc_addr);
+	priv->smu_tables.entry[AVFSFUSETABLE].table = kaddr;
+	priv->smu_tables.entry[AVFSFUSETABLE].handle = handle;
+
 	return 0;
 }
 
@@ -533,6 +567,8 @@ static int vega10_smu_fini(struct pp_smumgr *smumgr)
 		if (priv->smu_tables.entry[TOOLSTABLE].table)
 			cgs_free_gpu_mem(smumgr->device,
 					(cgs_handle_t)priv->smu_tables.entry[TOOLSTABLE].handle);
+		cgs_free_gpu_mem(smumgr->device,
+				(cgs_handle_t)priv->smu_tables.entry[AVFSFUSETABLE].handle);
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h
index 71e9b6492647..821425c1e4e0 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h
@@ -30,6 +30,7 @@ enum smu_table_id {
 	WMTABLE,
 	AVFSTABLE,
 	TOOLSTABLE,
+	AVFSFUSETABLE,
 	MAX_SMU_TABLE,
 };
 
-- 
cgit v1.2.3


From 2d5f5f949680bc02bd5f98ee9ec6d08f67e9ca4c Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 20 Apr 2017 16:38:36 +0800
Subject: drm/amd/powerplay: enable pcie dpm on Vega10.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index deb112485f82..ee40378c3e82 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -111,6 +111,8 @@ static void vega10_set_default_registry_data(struct pp_hwmgr *hwmgr)
 			hwmgr->feature_mask & PP_SOCCLK_DPM_MASK ? false : true;
 	data->registry_data.mclk_dpm_key_disabled =
 			hwmgr->feature_mask & PP_MCLK_DPM_MASK ? false : true;
+	data->registry_data.pcie_dpm_key_disabled =
+			hwmgr->feature_mask & PP_PCIE_DPM_MASK ? false : true;
 
 	data->registry_data.dcefclk_dpm_key_disabled =
 			hwmgr->feature_mask & PP_DCEFCLK_DPM_MASK ? false : true;
@@ -121,7 +123,6 @@ static void vega10_set_default_registry_data(struct pp_hwmgr *hwmgr)
 		data->registry_data.enable_tdc_limit_feature = 1;
 	}
 
-	data->registry_data.pcie_dpm_key_disabled = 1;
 	data->registry_data.disable_water_mark = 0;
 
 	data->registry_data.fan_control_support = 1;
-- 
cgit v1.2.3


From afc0255c9a5ecede2f235f4d117c2913059aa3c3 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 19 Apr 2017 16:00:21 +0800
Subject: drm/amd/powerplay: enable clock stretch feature on Vega10.

Correctly calculate CKSVidOffset

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index ee40378c3e82..0042c339415f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -123,6 +123,9 @@ static void vega10_set_default_registry_data(struct pp_hwmgr *hwmgr)
 		data->registry_data.enable_tdc_limit_feature = 1;
 	}
 
+	data->registry_data.clock_stretcher_support =
+			hwmgr->feature_mask & PP_CLOCK_STRETCH_MASK ? true : false;
+
 	data->registry_data.disable_water_mark = 0;
 
 	data->registry_data.fan_control_support = 1;
@@ -2045,10 +2048,10 @@ static int vega10_populate_clock_stretcher_table(struct pp_hwmgr *hwmgr)
 			table_info->vdd_dep_on_sclk;
 	uint32_t i;
 
-	for (i = 0; dep_table->count; i++) {
+	for (i = 0; i < dep_table->count; i++) {
 		pp_table->CksEnable[i] = dep_table->entries[i].cks_enable;
-		pp_table->CksVidOffset[i] = convert_to_vid(
-				dep_table->entries[i].cks_voffset);
+		pp_table->CksVidOffset[i] = (uint8_t)(dep_table->entries[i].cks_voffset
+				* VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1);
 	}
 
 	return 0;
@@ -2380,8 +2383,7 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 			"Failed to initialize UVD Level!",
 			return result);
 
-	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
-			PHM_PlatformCaps_ClockStretcher)) {
+	if (data->registry_data.clock_stretcher_support) {
 		result = vega10_populate_clock_stretcher_table(hwmgr);
 		PP_ASSERT_WITH_CODE(!result,
 				"Failed to populate Clock Stretcher Table!",
-- 
cgit v1.2.3


From 4bae05e196c518c2c95022fbdbee551bbc6893a5 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 20 Apr 2017 16:33:23 +0800
Subject: drm/amd/powerplay: Fix AVFS param.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 0042c339415f..278def7380e7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -2163,7 +2163,7 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m2_shift = 12;
-			pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b_shift = 0;
+			pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b_shift = 12;
 
 			if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
 					data->dcef_clk_quad_eqn_a) &&
@@ -2186,7 +2186,7 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m2_shift = 12;
-			pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b_shift = 0;
+			pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b_shift = 12;
 
 			if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
 					data->pixel_clk_quad_eqn_a) &&
@@ -2209,7 +2209,7 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m2_shift = 12;
-
+			pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].b_shift = 12;
 			if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
 					data->phy_clk_quad_eqn_a) &&
 				(PPREGKEY_VEGA10QUADRATICEQUATION_DFLT !=
@@ -2231,7 +2231,7 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 
 			pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1_shift = 24;
 			pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2_shift = 12;
-			pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b_shift = 0;
+			pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b_shift = 12;
 		} else {
 			data->smu_features[GNLD_AVFS].supported = false;
 		}
-- 
cgit v1.2.3


From 4fcae787866915161f22d627328fa8d2e13d215b Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 20 Apr 2017 12:11:47 +0200
Subject: drm/amdgpu: fix amdgpu_ttm_bo_eviction_valuable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BOs not mapped into the GART are always valuable for an eviction. Otherwise we
don't correctly swap them out on VRAM evictions during memory pressure.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 23f7fe8fdc3d..eadbcfad2299 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1040,11 +1040,17 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 					    const struct ttm_place *place)
 {
-	if (bo->mem.mem_type == TTM_PL_VRAM &&
-	    bo->mem.start == AMDGPU_BO_INVALID_OFFSET) {
-		unsigned long num_pages = bo->mem.num_pages;
-		struct drm_mm_node *node = bo->mem.mm_node;
+	unsigned long num_pages = bo->mem.num_pages;
+	struct drm_mm_node *node = bo->mem.mm_node;
 
+	if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
+		return ttm_bo_eviction_valuable(bo, place);
+
+	switch (bo->mem.mem_type) {
+	case TTM_PL_TT:
+		return true;
+
+	case TTM_PL_VRAM:
 		/* Check each drm MM node individually */
 		while (num_pages) {
 			if (place->fpfn < (node->start + node->size) &&
@@ -1054,8 +1060,10 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 			num_pages -= node->size;
 			++node;
 		}
+		break;
 
-		return false;
+	default:
+		break;
 	}
 
 	return ttm_bo_eviction_valuable(bo, place);
-- 
cgit v1.2.3


From 05a72a2864c2b27471e9f5365448563c78f9b114 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Thu, 13 Apr 2017 16:16:51 +0800
Subject: drm/amdgpu: add gtt print like vram when dump mm table V2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  9 +++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     | 10 +++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 0335c2f331e9..f7d22c44034d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -134,6 +134,15 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
 	return r;
 }
 
+void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
+	struct amdgpu_gtt_mgr *mgr = man->priv;
+
+	seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n",
+		   man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20);
+
+}
 /**
  * amdgpu_gtt_mgr_new - allocate a new node
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index eadbcfad2299..24ca251f82ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1411,6 +1411,8 @@ error_free:
 
 #if defined(CONFIG_DEBUG_FS)
 
+extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager
+				 *man);
 static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
@@ -1424,11 +1426,17 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
 	spin_lock(&glob->lru_lock);
 	drm_mm_print(mm, &p);
 	spin_unlock(&glob->lru_lock);
-	if (ttm_pl == TTM_PL_VRAM)
+	switch (ttm_pl) {
+	case TTM_PL_VRAM:
 		seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
 			   adev->mman.bdev.man[ttm_pl].size,
 			   (u64)atomic64_read(&adev->vram_usage) >> 20,
 			   (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
+		break;
+	case TTM_PL_TT:
+		amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]);
+		break;
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 5a0f3b5f6d798637b0af5d6d3ab3eb02063e0317 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 21 Apr 2017 10:05:56 +0200
Subject: drm/amdgpu: fix VM clearing in amdgpu_gem_object_close
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to check if the VM is swapped out before trying to update it.

Fixes: 23e0563e48f7 ("drm/amdgpu: clear freed mappings immediately when BO may be freed")
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 68 ++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 03a9c5cad222..94cb91cf93eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -139,6 +139,35 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 	return 0;
 }
 
+static int amdgpu_gem_vm_check(void *param, struct amdgpu_bo *bo)
+{
+	/* if anything is swapped out don't swap it in here,
+	   just abort and wait for the next CS */
+	if (!amdgpu_bo_gpu_accessible(bo))
+		return -ERESTARTSYS;
+
+	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
+		return -ERESTARTSYS;
+
+	return 0;
+}
+
+static bool amdgpu_gem_vm_ready(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				struct list_head *list)
+{
+	struct ttm_validate_buffer *entry;
+
+	list_for_each_entry(entry, list, head) {
+		struct amdgpu_bo *bo =
+			container_of(entry->bo, struct amdgpu_bo, tbo);
+		if (amdgpu_gem_vm_check(NULL, bo))
+			return false;
+	}
+
+	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_vm_check, NULL);
+}
+
 void amdgpu_gem_object_close(struct drm_gem_object *obj,
 			     struct drm_file *file_priv)
 {
@@ -148,15 +177,13 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	struct amdgpu_vm *vm = &fpriv->vm;
 
 	struct amdgpu_bo_list_entry vm_pd;
-	struct list_head list, duplicates;
+	struct list_head list;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct amdgpu_bo_va *bo_va;
-	struct dma_fence *fence = NULL;
 	int r;
 
 	INIT_LIST_HEAD(&list);
-	INIT_LIST_HEAD(&duplicates);
 
 	tv.bo = &bo->tbo;
 	tv.shared = true;
@@ -164,16 +191,18 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
 		return;
 	}
 	bo_va = amdgpu_vm_bo_find(vm, bo);
-	if (bo_va) {
-		if (--bo_va->ref_count == 0) {
-			amdgpu_vm_bo_rmv(adev, bo_va);
+	if (bo_va && --bo_va->ref_count == 0) {
+		amdgpu_vm_bo_rmv(adev, bo_va);
+
+		if (amdgpu_gem_vm_ready(adev, vm, &list)) {
+			struct dma_fence *fence = NULL;
 
 			r = amdgpu_vm_clear_freed(adev, vm, &fence);
 			if (unlikely(r)) {
@@ -502,19 +531,6 @@ out:
 	return r;
 }
 
-static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo)
-{
-	/* if anything is swapped out don't swap it in here,
-	   just abort and wait for the next CS */
-	if (!amdgpu_bo_gpu_accessible(bo))
-		return -ERESTARTSYS;
-
-	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
-		return -ERESTARTSYS;
-
-	return 0;
-}
-
 /**
  * amdgpu_gem_va_update_vm -update the bo_va in its VM
  *
@@ -533,19 +549,9 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 				    struct list_head *list,
 				    uint32_t operation)
 {
-	struct ttm_validate_buffer *entry;
 	int r = -ERESTARTSYS;
 
-	list_for_each_entry(entry, list, head) {
-		struct amdgpu_bo *bo =
-			container_of(entry->bo, struct amdgpu_bo, tbo);
-		if (amdgpu_gem_va_check(NULL, bo))
-			goto error;
-	}
-
-	r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check,
-				      NULL);
-	if (r)
+	if (!amdgpu_gem_vm_ready(adev, vm, list))
 		goto error;
 
 	r = amdgpu_vm_update_directories(adev, vm);
-- 
cgit v1.2.3


From 55ed8caf14fdfecf300d35cfe2e04b670b0191c1 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Fri, 21 Apr 2017 16:40:00 +0800
Subject: drm/amdgpu: increase gtt size to 3GB by default v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: address Alex's comment, add AMDGPU_DEFAULT_GTT_SIZE_MB.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++-
 5 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c4f6211640da..a4fc54c70f30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -110,6 +110,7 @@ extern int amdgpu_pos_buf_per_se;
 extern int amdgpu_cntl_sb_buf_per_se;
 extern int amdgpu_param_buf_per_se;
 
+#define AMDGPU_DEFAULT_GTT_SIZE_MB		3072ULL /* 3GB by default */
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
 #define AMDGPU_FENCE_JIFFIES_TIMEOUT		(HZ / 2)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 31c50b2dbbc1..a572979f186c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -346,7 +346,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
+		adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+					adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 2217d4d0f895..a9083a16a250 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -395,7 +395,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
+		adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+					adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index e4f5df32f31e..4ac99784160a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -557,7 +557,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
+		adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+					adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index edf43769ae70..e5d4dfeae3fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -486,7 +486,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
+		adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+					adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
 
-- 
cgit v1.2.3


From 23d2e5049c080abda50810d21c8be20b5d65d191 Mon Sep 17 00:00:00 2001
From: "Roger.He" <Hongbo.He@amd.com>
Date: Fri, 21 Apr 2017 14:24:26 +0800
Subject: drm/amdgpu: fix indent

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Roger.He <Hongbo.He@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 34 +++++++++++++++---------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 75f851bd5b63..61716a2d4484 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2420,25 +2420,25 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
 	uint32_t domain;
 	int r;
 
-       if (!bo->shadow)
-               return 0;
-
-       r = amdgpu_bo_reserve(bo, false);
-       if (r)
-               return r;
-       domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-       /* if bo has been evicted, then no need to recover */
-       if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
-               r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
+	if (!bo->shadow)
+		return 0;
+
+	r = amdgpu_bo_reserve(bo, false);
+	if (r)
+		return r;
+	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
+	/* if bo has been evicted, then no need to recover */
+	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+		r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
 						 NULL, fence, true);
-               if (r) {
-                       DRM_ERROR("recover page table failed!\n");
-                       goto err;
-               }
-       }
+		if (r) {
+			DRM_ERROR("recover page table failed!\n");
+			goto err;
+		}
+	}
 err:
-       amdgpu_bo_unreserve(bo);
-       return r;
+	amdgpu_bo_unreserve(bo);
+	return r;
 }
 
 /**
-- 
cgit v1.2.3


From 6c98d31ee83ddd351e89d5e2002e678fdaf9d2af Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Fri, 21 Apr 2017 17:58:42 +0800
Subject: drm/amdgpu: fix no-vmid job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[  132.036658] amdgpu 0000:22:00.0: VM IB without ID
[  132.036709] [drm:amdgpu_job_run [amdgpu]] *ERROR* Error scheduling IBs (-22)
[  132.036755] [drm:amd_sched_main [amdgpu]] *ERROR* Failed to run job!

root cause is fence is signaled during sync transfer.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 86a12424c162..c3cfeb335d99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -139,7 +139,7 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 
 	struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync);
 
-	if (fence == NULL && vm && !job->vm_id) {
+	while (fence == NULL && vm && !job->vm_id) {
 		struct amdgpu_ring *ring = job->ring;
 		int r;
 
-- 
cgit v1.2.3


From 51687759be93fbc553f2727e86be25c38126ba93 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Mon, 24 Apr 2017 17:09:15 +0800
Subject: drm/amdgpu: fix gpu reset crash
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[  413.687439] BUG: unable to handle kernel NULL pointer dereference at 0000000000000548
[  413.687479] IP: [<ffffffff8109b175>] to_live_kthread+0x5/0x60
[  413.687507] PGD 1efd12067
[  413.687519] PUD 1efd11067
[  413.687531] PMD 0

[  413.687543] Oops: 0000 [#1] SMP
[  413.687557] Modules linked in: amdgpu(OE) ttm(OE) drm_kms_helper(E) drm(E) i2c_algo_bit(E) fb_sys_fops(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) rpcsec_gss_krb5(E) nfsv4(E) nfs(E) fscache(E) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) snd_hda_intel(E) eeepc_wmi(E) snd_hda_codec(E) asus_wmi(E) snd_hda_core(E) sparse_keymap(E) snd_hwdep(E) video(E) snd_pcm(E) snd_seq_midi(E) joydev(E) snd_seq_midi_event(E) snd_rawmidi(E) snd_seq(E) snd_seq_device(E) snd_timer(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) snd(E) crc32_pclmul(E) ghash_clmulni_intel(E) soundcore(E) aesni_intel(E) aes_x86_64(E) lrw(E) gf128mul(E) glue_helper(E) ablk_helper(E) cryptd(E) shpchp(E) serio_raw(E) i2c_piix4(E) 8250_dw(E) i2c_designware_platform(E) i2c_designware_core(E) mac_hid(E) binfmt_misc(E)
[  413.687894]  parport_pc(E) ppdev(E) lp(E) parport(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sunrpc(E) autofs4(E) hid_generic(E) usbhid(E) hid(E) psmouse(E) ahci(E) r8169(E) mii(E) libahci(E) wmi(E)
[  413.687989] CPU: 13 PID: 1134 Comm: kworker/13:2 Tainted: G           OE   4.9.0-custom #4
[  413.688019] Hardware name: System manufacturer System Product Name/PRIME B350-PLUS, BIOS 0606 04/06/2017
[  413.688089] Workqueue: events amd_sched_job_timedout [amdgpu]
[  413.688116] task: ffff88020f9657c0 task.stack: ffffc90001a88000
[  413.688139] RIP: 0010:[<ffffffff8109b175>]  [<ffffffff8109b175>] to_live_kthread+0x5/0x60
[  413.688171] RSP: 0018:ffffc90001a8bd60  EFLAGS: 00010282
[  413.688191] RAX: ffff88020f0073f8 RBX: ffff88020f000000 RCX: 0000000000000000
[  413.688217] RDX: 0000000000000001 RSI: ffff88020f9670c0 RDI: 0000000000000000
[  413.688243] RBP: ffffc90001a8bd78 R08: 0000000000000000 R09: 0000000000001000
[  413.688269] R10: 0000006051b11a82 R11: 0000000000000001 R12: 0000000000000000
[  413.688295] R13: ffff88020f002770 R14: ffff88020f004838 R15: ffff8801b23c2c60
[  413.688321] FS:  0000000000000000(0000) GS:ffff88021ef40000(0000) knlGS:0000000000000000
[  413.688352] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  413.688373] CR2: 0000000000000548 CR3: 00000001efd0f000 CR4: 00000000003406e0
[  413.688399] Stack:
[  413.688407]  ffffffff8109b304 ffff88020f000000 0000000000000070 ffffc90001a8bdf0
[  413.688439]  ffffffffa05ce29d ffffffffa052feb7 ffffffffa07b5820 ffffc90001a8bda0
[  413.688470]  ffffffff00000018 ffff8801bb88f060 0000000001a8bdb8 ffff88021ef59280
[  413.688502] Call Trace:
[  413.688514]  [<ffffffff8109b304>] ? kthread_park+0x14/0x60
[  413.688555]  [<ffffffffa05ce29d>] amdgpu_gpu_reset+0x7d/0x670 [amdgpu]
[  413.688589]  [<ffffffffa052feb7>] ? drm_printk+0x97/0xa0 [drm]
[  413.688643]  [<ffffffffa0698136>] amdgpu_job_timedout+0x46/0x50 [amdgpu]
[  413.688700]  [<ffffffffa06969e7>] amd_sched_job_timedout+0x17/0x20 [amdgpu]
[  413.688727]  [<ffffffff81095493>] process_one_work+0x153/0x3f0
[  413.688751]  [<ffffffff81095c5b>] worker_thread+0x12b/0x4b0
[  413.688773]  [<ffffffff8100392e>] ? do_syscall_64+0x6e/0x180
[  413.688795]  [<ffffffff81095b30>] ? rescuer_thread+0x350/0x350
[  413.688818]  [<ffffffff8100392e>] ? do_syscall_64+0x6e/0x180
[  413.688839]  [<ffffffff8109b423>] kthread+0xd3/0xf0
[  413.688858]  [<ffffffff8109b350>] ? kthread_park+0x60/0x60
[  413.688881]  [<ffffffff817e1ee5>] ret_from_fork+0x25/0x30
[  413.688901] Code: 25 40 d3 00 00 48 8b 80 48 05 00 00 48 89 e5 5d 48 8b 40 c8 48 c1 e8 02 83 e0 01 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b b7 48 05 00 00 55 48 89 e5 48 85 f6 74 31 8b 97 f8 18 00
[  413.689045] RIP  [<ffffffff8109b175>] to_live_kthread+0x5/0x60
[  413.689064]  RSP <ffffc90001a8bd60>
[  413.689076] CR2: 0000000000000548
[  413.697985] ---[ end trace 0a314a64821f84e9 ]---

The root cause is some ring doesn't have scheduler, like KIQ ring

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 61716a2d4484..eadd6e0a4152 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2583,7 +2583,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring)
+		if (!ring || !ring->sched.thread)
 			continue;
 		kthread_park(ring->sched.thread);
 		amd_sched_hw_job_reset(&ring->sched);
@@ -2678,7 +2678,8 @@ retry:
 		}
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
-			if (!ring)
+
+			if (!ring || !ring->sched.thread)
 				continue;
 
 			amd_sched_job_recovery(&ring->sched);
@@ -2687,7 +2688,7 @@ retry:
 	} else {
 		dev_err(adev->dev, "asic resume failed (%d).\n", r);
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			if (adev->rings[i]) {
+			if (adev->rings[i] && adev->rings[i]->sched.thread) {
 				kthread_unpark(adev->rings[i]->sched.thread);
 			}
 		}
-- 
cgit v1.2.3


From e993ca4f3b18a649e20db387f09649694043b3d4 Mon Sep 17 00:00:00 2001
From: Daniel Wang <Daniel.Wang2@amd.com>
Date: Thu, 20 Apr 2017 11:45:09 +0800
Subject: drm/amdgpu/psp: skip loading SDMA/RLCG under SRIOV VF

Now GPU hypervisor will load SDMA and RLCG ucode, so skip it
in guest.

Signed-off-by: Daniel Wang <Daniel.Wang2@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 1e380fe29b5e..ac5e92e5d59d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -289,6 +289,12 @@ static int psp_np_fw_load(struct psp_context *psp)
 		if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
 		    psp_smu_reload_quirk(psp))
 			continue;
+		if (amdgpu_sriov_vf(adev) &&
+		   (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0
+		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1
+		    || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G))
+			/*skip ucode loading in SRIOV VF */
+			continue;
 
 		ret = psp_prep_cmd_buf(ucode, psp->cmd);
 		if (ret)
-- 
cgit v1.2.3


From b53b8cdac621da0a4bac5f83e0004dd9b74a9160 Mon Sep 17 00:00:00 2001
From: Daniel Wang <Daniel.Wang2@amd.com>
Date: Wed, 19 Apr 2017 16:09:08 +0800
Subject: drm/amdgpu/vce4: fix a PSP loading VCE issue

Fixed PSP loading issue for sriov.

Signed-off-by: Daniel Wang <Daniel.Wang2@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 72d0edb9bc39..08ff7f4f91aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -291,9 +291,21 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8);
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+				adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
+				adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
+				adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+		} else {
+		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+				adev->vce.gpu_addr >> 8);
+		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
+				adev->vce.gpu_addr >> 8);
+		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
+				adev->vce.gpu_addr >> 8);
+		}
 
 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 		size = VCE_V4_0_FW_SIZE;
-- 
cgit v1.2.3


From a92f5ec0c1341466bbfe98e1abc3cf5427ba4a67 Mon Sep 17 00:00:00 2001
From: Frank Min <Frank.Min@amd.com>
Date: Sun, 16 Apr 2017 13:37:07 +0800
Subject: drm/amdgpu/vce4: move mm table constructions functions into mmsch
 header file

Move mm table construction functions into mmsch header file so that
UVD can reuse it.

Signed-off-by: Frank Min <Frank.Min@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h |  57 +++++++++++++
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c   | 143 ++++++++++----------------------
 2 files changed, 103 insertions(+), 97 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
index 5f0fc8bf16a9..8af0bddf85e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
@@ -84,4 +84,61 @@ struct mmsch_v1_0_cmd_indirect_write {
 	uint32_t reg_value;
 };
 
+static inline void mmsch_v1_0_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt,
+					       uint32_t *init_table,
+					       uint32_t reg_offset,
+					       uint32_t value)
+{
+	direct_wt->cmd_header.reg_offset = reg_offset;
+	direct_wt->reg_value = value;
+	memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write));
+}
+
+static inline void mmsch_v1_0_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
+						      uint32_t *init_table,
+						      uint32_t reg_offset,
+						      uint32_t mask, uint32_t data)
+{
+	direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
+	direct_rd_mod_wt->mask_value = mask;
+	direct_rd_mod_wt->write_data = data;
+	memcpy((void *)init_table, direct_rd_mod_wt,
+	       sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
+}
+
+static inline void mmsch_v1_0_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll,
+						 uint32_t *init_table,
+						 uint32_t reg_offset,
+						 uint32_t mask, uint32_t wait)
+{
+	direct_poll->cmd_header.reg_offset = reg_offset;
+	direct_poll->mask_value = mask;
+	direct_poll->wait_value = wait;
+	memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling));
+}
+
+#define MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+	mmsch_v1_0_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \
+					   init_table, (reg), \
+					   (mask), (data)); \
+	init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
+	table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
+}
+
+#define MMSCH_V1_0_INSERT_DIRECT_WT(reg, value) { \
+	mmsch_v1_0_insert_direct_wt(&direct_wt, \
+				    init_table, (reg), \
+				    (value)); \
+	init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
+	table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
+}
+
+#define MMSCH_V1_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+	mmsch_v1_0_insert_direct_poll(&direct_poll, \
+				      init_table, (reg), \
+				      (mask), (wait)); \
+	init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
+	table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 08ff7f4f91aa..69efd3094f89 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -49,63 +49,6 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
 
-static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt,
-					  uint32_t *init_table,
-					  uint32_t reg_offset,
-					  uint32_t value)
-{
-	direct_wt->cmd_header.reg_offset = reg_offset;
-	direct_wt->reg_value = value;
-	memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write));
-}
-
-static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
-						 uint32_t *init_table,
-						 uint32_t reg_offset,
-						 uint32_t mask, uint32_t data)
-{
-	direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
-	direct_rd_mod_wt->mask_value = mask;
-	direct_rd_mod_wt->write_data = data;
-	memcpy((void *)init_table, direct_rd_mod_wt,
-	       sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
-}
-
-static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll,
-					    uint32_t *init_table,
-					    uint32_t reg_offset,
-					    uint32_t mask, uint32_t wait)
-{
-	direct_poll->cmd_header.reg_offset = reg_offset;
-	direct_poll->mask_value = mask;
-	direct_poll->wait_value = wait;
-	memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling));
-}
-
-#define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
-	mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \
-				      init_table, (reg), \
-				      (mask), (data)); \
-	init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
-	table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
-}
-
-#define INSERT_DIRECT_WT(reg, value) { \
-	mmsch_insert_direct_wt(&direct_wt, \
-			       init_table, (reg), \
-			       (value)); \
-	init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
-	table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
-}
-
-#define INSERT_DIRECT_POLL(reg, mask, wait) { \
-	mmsch_insert_direct_poll(&direct_poll, \
-				 init_table, (reg), \
-				 (mask), (wait)); \
-	init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
-	table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
-}
-
 /**
  * vce_v4_0_ring_get_rptr - get read pointer
  *
@@ -280,67 +223,73 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 		init_table += header->vce_table_offset;
 
 		ring = &adev->vce.ring[0];
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr));
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
+					    lower_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
+					    upper_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
+					    ring->ring_size / 4);
 
 		/* BEGING OF MC_RESUME */
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
-		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
-				adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
-		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
-				adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
-		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
-				adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
+						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
+						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
 		} else {
-		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
-				adev->vce.gpu_addr >> 8);
-		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
-				adev->vce.gpu_addr >> 8);
-		    INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
-				adev->vce.gpu_addr >> 8);
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+						adev->vce.gpu_addr >> 8);
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
+						adev->vce.gpu_addr >> 8);
+		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
+						adev->vce.gpu_addr >> 8);
 		}
 
 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 		size = VCE_V4_0_FW_SIZE;
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
+					    offset & 0x7FFFFFFF);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 
 		offset += size;
 		size = VCE_V4_0_STACK_SIZE;
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
+					    offset & 0x7FFFFFFF);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 
 		offset += size;
 		size = VCE_V4_0_DATA_SIZE;
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF);
-		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
+					    offset & 0x7FFFFFFF);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
-				0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
+						   0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 
 		/* end of MC_RESUME */
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
-				VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
-				~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
-				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
+						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
+						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
+						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
 
-		INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
-				VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
-				VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
+		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
+					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
+					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
 
 		/* clear BUSY flag */
-		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
-				~VCE_STATUS__JOB_BUSY_MASK, 0);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
+						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
 
 		/* add end packet */
 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
-- 
cgit v1.2.3


From 904cd3891dd390b109c8146974b47fca78b97c98 Mon Sep 17 00:00:00 2001
From: Xiangliang Yu <Xiangliang.Yu@amd.com>
Date: Fri, 21 Apr 2017 15:40:25 +0800
Subject: drm/amdgpu/virt: add two functions for MM table

Add two functions to allocate & free MM table memory.

Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 46 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  2 ++
 2 files changed, 48 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index ba8b8ae6234f..6bf5cea294f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -225,3 +225,49 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev)
 
 	return 0;
 }
+
+/**
+ * amdgpu_virt_alloc_mm_table() - alloc memory for mm table
+ * @amdgpu:	amdgpu device.
+ * MM table is used by UVD and VCE for its initialization
+ * Return: Zero if allocate success.
+ */
+int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr)
+		return 0;
+
+	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM,
+				    &adev->virt.mm_table.bo,
+				    &adev->virt.mm_table.gpu_addr,
+				    (void *)&adev->virt.mm_table.cpu_addr);
+	if (r) {
+		DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+		return r;
+	}
+
+	memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
+	DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+		 adev->virt.mm_table.gpu_addr,
+		 adev->virt.mm_table.cpu_addr);
+	return 0;
+}
+
+/**
+ * amdgpu_virt_free_mm_table() - free mm table memory
+ * @amdgpu:	amdgpu device.
+ * Free MM table memory
+ */
+void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
+{
+	if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr)
+		return;
+
+	amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
+			      &adev->virt.mm_table.gpu_addr,
+			      (void *)&adev->virt.mm_table.cpu_addr);
+	adev->virt.mm_table.gpu_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 1ee0a190b33b..a8ed162cc0bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -98,5 +98,7 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
 int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
+int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
+void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
 
 #endif
-- 
cgit v1.2.3


From 7006dde2ef6110b11a76b0f972950d6da0ff3d6c Mon Sep 17 00:00:00 2001
From: Xiangliang Yu <Xiangliang.Yu@amd.com>
Date: Fri, 21 Apr 2017 16:21:41 +0800
Subject: drm/amdgpu/vce4: replaced with virt_alloc_mm_table

Used virt_alloc_mm_table function to allocate MM table memory.

Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 69efd3094f89..139f964196b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -450,20 +450,9 @@ static int vce_v4_0_sw_init(void *handle)
 			return r;
 	}
 
-	if (amdgpu_sriov_vf(adev)) {
-		r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
-					    AMDGPU_GEM_DOMAIN_VRAM,
-					    &adev->virt.mm_table.bo,
-					    &adev->virt.mm_table.gpu_addr,
-					    (void *)&adev->virt.mm_table.cpu_addr);
-		if (!r) {
-			memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
-			printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n",
-			       adev->virt.mm_table.gpu_addr,
-			       adev->virt.mm_table.cpu_addr);
-		}
+	r = amdgpu_virt_alloc_mm_table(adev);
+	if (r)
 		return r;
-	}
 
 	return r;
 }
@@ -474,10 +463,7 @@ static int vce_v4_0_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* free MM table */
-	if (amdgpu_sriov_vf(adev))
-		amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
-				      &adev->virt.mm_table.gpu_addr,
-				      (void *)&adev->virt.mm_table.cpu_addr);
+	amdgpu_virt_free_mm_table(adev);
 
 	r = amdgpu_vce_suspend(adev);
 	if (r)
-- 
cgit v1.2.3


From 247ac95141740157bd86d29819ff46e7ab396113 Mon Sep 17 00:00:00 2001
From: Frank Min <Frank.Min@amd.com>
Date: Mon, 17 Apr 2017 11:28:12 +0800
Subject: drm/amdgpu/uvd7: add sriov uvd initialization sequences

Add UVD initialization for SRIOV.

Signed-off-by: Frank Min <Frank.Min@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 246 ++++++++++++++++++++++++++++++++++
 1 file changed, 246 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index c646570d1421..46c7bc490809 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -27,10 +27,14 @@
 #include "amdgpu_uvd.h"
 #include "soc15d.h"
 #include "soc15_common.h"
+#include "mmsch_v1_0.h"
 
 #include "vega10/soc15ip.h"
 #include "vega10/UVD/uvd_7_0_offset.h"
 #include "vega10/UVD/uvd_7_0_sh_mask.h"
+#include "vega10/VCE/vce_4_0_offset.h"
+#include "vega10/VCE/vce_4_0_default.h"
+#include "vega10/VCE/vce_4_0_sh_mask.h"
 #include "vega10/NBIF/nbif_6_1_offset.h"
 #include "vega10/HDP/hdp_4_0_offset.h"
 #include "vega10/MMHUB/mmhub_1_0_offset.h"
@@ -41,6 +45,7 @@ static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev);
 static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static int uvd_v7_0_start(struct amdgpu_device *adev);
 static void uvd_v7_0_stop(struct amdgpu_device *adev);
+static int uvd_v7_0_sriov_start(struct amdgpu_device *adev);
 
 /**
  * uvd_v7_0_ring_get_rptr - get read pointer
@@ -618,6 +623,247 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
 	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
 }
 
+static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
+				struct amdgpu_mm_table *table)
+{
+	uint32_t data = 0, loop;
+	uint64_t addr = table->gpu_addr;
+	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
+	uint32_t size;
+
+	size = header->header_size + header->vce_table_size + header->uvd_table_size;
+
+	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
+
+	/* 2, update vmid of descriptor */
+	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
+	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
+
+	/* 3, notify mmsch about the size of this descriptor */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
+
+	/* 4, set resp to zero */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
+
+	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
+
+	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
+	loop = 1000;
+	while ((data & 0x10000002) != 0x10000002) {
+		udelay(10);
+		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
+		loop--;
+		if (!loop)
+			break;
+	}
+
+	if (!loop) {
+		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	uint32_t offset, size, tmp;
+	uint32_t table_size = 0;
+	struct mmsch_v1_0_cmd_direct_write direct_wt = { {0} };
+	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} };
+	struct mmsch_v1_0_cmd_direct_polling direct_poll = { {0} };
+	//struct mmsch_v1_0_cmd_indirect_write indirect_wt = {{0}};
+	struct mmsch_v1_0_cmd_end end = { {0} };
+	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
+
+	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
+	end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+	if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) {
+		header->version = MMSCH_VERSION;
+		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
+
+		if (header->vce_table_offset == 0 && header->vce_table_size == 0)
+			header->uvd_table_offset = header->header_size;
+		else
+			header->uvd_table_offset = header->vce_table_size + header->vce_table_offset;
+
+		init_table += header->uvd_table_offset;
+
+		ring = &adev->uvd.ring;
+		size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
+
+		/* disable clock gating */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+						   ~UVD_POWER_STATUS__UVD_PG_MODE_MASK, 0);
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
+						   0xFFFFFFFF, 0x00000004);
+		/* mc resume*/
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+						    lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+						    upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+			offset = 0;
+		} else {
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+						    lower_32_bits(adev->uvd.gpu_addr));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+						    upper_32_bits(adev->uvd.gpu_addr));
+			offset = size;
+		}
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+					    AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size);
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+					    lower_32_bits(adev->uvd.gpu_addr + offset));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+					    upper_32_bits(adev->uvd.gpu_addr + offset));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+					    lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+					    upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2),
+					    AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG),
+					    adev->gfx.config.gb_addr_config);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG),
+					    adev->gfx.config.gb_addr_config);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG),
+					    adev->gfx.config.gb_addr_config);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
+		/* mc resume end*/
+
+		/* disable clock gating */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL),
+						   ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
+
+		/* disable interupt */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+						   ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+		/* stall UMC and register bus before resetting VCPU */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+						   ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+						   UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+		/* put LMI, VCPU, RBC etc... into reset */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+					    (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+						       UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
+
+		/* initialize UVD memory controller */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL),
+					    (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+						       UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+						       UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+						       UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+						       UVD_LMI_CTRL__REQ_MODE_MASK |
+						       0x00100000L));
+
+		/* disable byte swapping */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), 0);
+
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88);
+
+		/* take all subblocks out of reset, except VCPU */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+					    UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+		/* enable VCPU clock */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
+					    UVD_VCPU_CNTL__CLK_EN_MASK);
+
+		/* enable UMC */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+						   ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
+
+		/* boot up the VCPU */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0);
+
+		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02);
+
+		/* enable master interrupt */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+						   ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+						   (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+		/* clear the bit 4 of UVD_STATUS */
+		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
+						   ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
+
+		/* force RBC into idle state */
+		size = order_base_2(ring->ring_size);
+		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp);
+
+		/* set the write pointer delay */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0);
+
+		/* set the wb address */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR),
+					    (upper_32_bits(ring->gpu_addr) >> 2));
+
+		/* programm the RB_BASE for ring buffer */
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
+					    lower_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
+					    upper_32_bits(ring->gpu_addr));
+
+		ring->wptr = 0;
+		ring = &adev->uvd.ring_enc[0];
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4);
+
+		ring = &adev->uvd.ring_enc[1];
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO2), ring->gpu_addr);
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
+		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE2), ring->ring_size / 4);
+
+		/* add end packet */
+		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
+		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+		header->uvd_table_size = table_size;
+
+		return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table);
+	}
+	return -EINVAL; /* already initializaed ? */
+}
+
 /**
  * uvd_v7_0_start - start UVD block
  *
-- 
cgit v1.2.3


From beb2ced51b70a6cbccd8676b450e282fecf65565 Mon Sep 17 00:00:00 2001
From: Frank Min <Frank.Min@amd.com>
Date: Mon, 17 Apr 2017 11:45:35 +0800
Subject: drm/amdgpu/uvd7: add uvd doorbell initialization for sriov

Add UVD doorbell for SRIOV.

Signed-off-by: Frank Min <Frank.Min@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 46c7bc490809..552bfcdd3236 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -103,6 +103,9 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
+	if (ring->use_doorbell)
+		return adev->wb.wb[ring->wptr_offs];
+
 	if (ring == &adev->uvd.ring_enc[0])
 		return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR));
 	else
@@ -134,6 +137,13 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
+	if (ring->use_doorbell) {
+		/* XXX check if swapping is necessary on BE */
+		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+		return;
+	}
+
 	if (ring == &adev->uvd.ring_enc[0])
 		WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR),
 			lower_32_bits(ring->wptr));
@@ -421,6 +431,10 @@ static int uvd_v7_0_sw_init(void *handle)
 	for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
 		ring = &adev->uvd.ring_enc[i];
 		sprintf(ring->name, "uvd_enc%d", i);
+		if (amdgpu_sriov_vf(adev)) {
+			ring->use_doorbell = true;
+			ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
+		}
 		r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
 		if (r)
 			return r;
-- 
cgit v1.2.3


From 6fa336a777712fac9da763d3df348fcfc96633f8 Mon Sep 17 00:00:00 2001
From: Frank Min <Frank.Min@amd.com>
Date: Mon, 17 Apr 2017 11:51:44 +0800
Subject: drm/amdgpu/uvd7: add UVD hw init sequences for sriov

Add UVD hw init.

Signed-off-by: Frank Min <Frank.Min@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 101 +++++++++++++++++++---------------
 1 file changed, 56 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 552bfcdd3236..eca8f6e01e97 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -368,7 +368,10 @@ static int uvd_v7_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	adev->uvd.num_enc_rings = 2;
+	if (amdgpu_sriov_vf(adev))
+		adev->uvd.num_enc_rings = 1;
+	else
+		adev->uvd.num_enc_rings = 2;
 	uvd_v7_0_set_ring_funcs(adev);
 	uvd_v7_0_set_enc_ring_funcs(adev);
 	uvd_v7_0_set_irq_funcs(adev);
@@ -421,12 +424,14 @@ static int uvd_v7_0_sw_init(void *handle)
 	r = amdgpu_uvd_resume(adev);
 	if (r)
 		return r;
+	if (!amdgpu_sriov_vf(adev)) {
+		ring = &adev->uvd.ring;
+		sprintf(ring->name, "uvd");
+		r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+		if (r)
+			return r;
+	}
 
-	ring = &adev->uvd.ring;
-	sprintf(ring->name, "uvd");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
-	if (r)
-		return r;
 
 	for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
 		ring = &adev->uvd.ring_enc[i];
@@ -440,6 +445,10 @@ static int uvd_v7_0_sw_init(void *handle)
 			return r;
 	}
 
+	r = amdgpu_virt_alloc_mm_table(adev);
+	if (r)
+		return r;
+
 	return r;
 }
 
@@ -448,6 +457,8 @@ static int uvd_v7_0_sw_fini(void *handle)
 	int i, r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	amdgpu_virt_free_mm_table(adev);
+
 	r = amdgpu_uvd_suspend(adev);
 	if (r)
 		return r;
@@ -474,48 +485,53 @@ static int uvd_v7_0_hw_init(void *handle)
 	uint32_t tmp;
 	int i, r;
 
-	r = uvd_v7_0_start(adev);
+	if (amdgpu_sriov_vf(adev))
+		r = uvd_v7_0_sriov_start(adev);
+	else
+		r = uvd_v7_0_start(adev);
 	if (r)
 		goto done;
 
-	ring->ready = true;
-	r = amdgpu_ring_test_ring(ring);
-	if (r) {
-		ring->ready = false;
-		goto done;
-	}
+	if (!amdgpu_sriov_vf(adev)) {
+		ring->ready = true;
+		r = amdgpu_ring_test_ring(ring);
+		if (r) {
+			ring->ready = false;
+			goto done;
+		}
 
-	r = amdgpu_ring_alloc(ring, 10);
-	if (r) {
-		DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
-		goto done;
-	}
+		r = amdgpu_ring_alloc(ring, 10);
+		if (r) {
+			DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+			goto done;
+		}
 
-	tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-		mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
-	amdgpu_ring_write(ring, tmp);
-	amdgpu_ring_write(ring, 0xFFFFF);
+		tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+			mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
+		amdgpu_ring_write(ring, tmp);
+		amdgpu_ring_write(ring, 0xFFFFF);
 
-	tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-		mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
-	amdgpu_ring_write(ring, tmp);
-	amdgpu_ring_write(ring, 0xFFFFF);
+		tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+			mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
+		amdgpu_ring_write(ring, tmp);
+		amdgpu_ring_write(ring, 0xFFFFF);
 
-	tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-		mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
-	amdgpu_ring_write(ring, tmp);
-	amdgpu_ring_write(ring, 0xFFFFF);
+		tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+			mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
+		amdgpu_ring_write(ring, tmp);
+		amdgpu_ring_write(ring, 0xFFFFF);
 
-	/* Clear timeout status bits */
-	amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
-		mmUVD_SEMA_TIMEOUT_STATUS), 0));
-	amdgpu_ring_write(ring, 0x8);
+		/* Clear timeout status bits */
+		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
+			mmUVD_SEMA_TIMEOUT_STATUS), 0));
+		amdgpu_ring_write(ring, 0x8);
 
-	amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
-		mmUVD_SEMA_CNTL), 0));
-	amdgpu_ring_write(ring, 3);
+		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
+			mmUVD_SEMA_CNTL), 0));
+		amdgpu_ring_write(ring, 3);
 
-	amdgpu_ring_commit(ring);
+		amdgpu_ring_commit(ring);
+	}
 
 	for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
 		ring = &adev->uvd.ring_enc[i];
@@ -692,7 +708,6 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
 	struct mmsch_v1_0_cmd_direct_write direct_wt = { {0} };
 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} };
 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { {0} };
-	//struct mmsch_v1_0_cmd_indirect_write indirect_wt = {{0}};
 	struct mmsch_v1_0_cmd_end end = { {0} };
 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
@@ -863,11 +878,6 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4);
 
-		ring = &adev->uvd.ring_enc[1];
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO2), ring->gpu_addr);
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE2), ring->ring_size / 4);
-
 		/* add end packet */
 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
@@ -1489,7 +1499,8 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
 		amdgpu_fence_process(&adev->uvd.ring_enc[0]);
 		break;
 	case 120:
-		amdgpu_fence_process(&adev->uvd.ring_enc[1]);
+		if (!amdgpu_sriov_vf(adev))
+			amdgpu_fence_process(&adev->uvd.ring_enc[1]);
 		break;
 	default:
 		DRM_ERROR("Unhandled interrupt: %d %d\n",
-- 
cgit v1.2.3


From 91faed9ee150fb07b5b0e6601b96219a6c74301f Mon Sep 17 00:00:00 2001
From: Frank Min <Frank.Min@amd.com>
Date: Mon, 17 Apr 2017 11:19:45 +0800
Subject: drm/amdgpu/soc15: enable UVD code path for sriov

Enable UVD block for SRIOV.

Signed-off-by: Frank Min <Frank.Min@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 385de8617075..02baa1d7bc23 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -505,8 +505,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
 		amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block);
 		amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block);
-		if (!amdgpu_sriov_vf(adev))
-			amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block);
+		amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block);
 		amdgpu_ip_block_add(adev, &vce_v4_0_ip_block);
 		break;
 	default:
-- 
cgit v1.2.3


From c346fb74fb646352018800375c327ac35880b7e0 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Mon, 24 Apr 2017 16:38:05 +0800
Subject: drm/radeon: check return value of radeon_ring_lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Function radeon_ring_lock() returns an errno on failure, and its return
value should be validated. However, in functions r420_cp_errata_init()
and r420_cp_errata_fini(), its return value is not checked. This patch
adds the checks.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/r420.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 3eb0c4f9f796..45e1d4e60759 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -203,6 +203,7 @@ static void r420_clock_resume(struct radeon_device *rdev)
 
 static void r420_cp_errata_init(struct radeon_device *rdev)
 {
+	int r;
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 
 	/* RV410 and R420 can lock up if CP DMA to host memory happens
@@ -212,7 +213,8 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 	 * of the CP init, apparently.
 	 */
 	radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch);
-	radeon_ring_lock(rdev, ring, 8);
+	r = radeon_ring_lock(rdev, ring, 8);
+	WARN_ON(r);
 	radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
 	radeon_ring_write(ring, rdev->config.r300.resync_scratch);
 	radeon_ring_write(ring, 0xDEADBEEF);
@@ -221,12 +223,14 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 
 static void r420_cp_errata_fini(struct radeon_device *rdev)
 {
+	int r;
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 
 	/* Catch the RESYNC we dispatched all the way back,
 	 * at the very beginning of the CP init.
 	 */
-	radeon_ring_lock(rdev, ring, 8);
+	r = radeon_ring_lock(rdev, ring, 8);
+	WARN_ON(r);
 	radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 	radeon_ring_write(ring, R300_RB3D_DC_FINISH);
 	radeon_ring_unlock_commit(rdev, ring, false);
-- 
cgit v1.2.3


From 2f2429c38eddbc4f988589099fd86bb6bb65b7e4 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Mon, 24 Apr 2017 16:45:51 +0800
Subject: drm/radeon: check return value of radeon_fence_emit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Function radeon_fence_emit() returns -ENOMEM if there is no enough
memory. And in this case, function radeon_ring_unlock_undo() rather than
function radeon_ring_unlock_commit() should be called. However, in
function radeon_test_create_and_emit_fence(), the return value of
radeon_fence_emit() is ignored. This patch adds the check.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_test.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 4fdc7bda7a7d..f5e9abfadb56 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -298,7 +298,12 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev,
 			DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
 			return r;
 		}
-		radeon_fence_emit(rdev, fence, ring->idx);
+		r = radeon_fence_emit(rdev, fence, ring->idx);
+		if (r) {
+			DRM_ERROR("Failed to emit fence\n");
+			radeon_ring_unlock_undo(rdev, ring);
+			return r;
+		}
 		radeon_ring_unlock_commit(rdev, ring, false);
 	}
 	return 0;
-- 
cgit v1.2.3


From effaf848b957fbf72a3b6a1ad87f5e031eda0b75 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Mon, 24 Apr 2017 01:02:46 +0200
Subject: drm/amdgpu: Add missing lb_vblank_lead_lines setup to DCE-6 path.

This apparently got lost when implementing the new DCE-6 support
and would cause failures in pageflip scheduling and timestamping.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 307269bda4fa..e146d252aa30 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -979,7 +979,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 	u32 priority_a_mark = 0, priority_b_mark = 0;
 	u32 priority_a_cnt = PRIORITY_OFF;
 	u32 priority_b_cnt = PRIORITY_OFF;
-	u32 tmp, arb_control3;
+	u32 tmp, arb_control3, lb_vblank_lead_lines = 0;
 	fixed20_12 a, b, c;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
@@ -1091,6 +1091,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 		c.full = dfixed_div(c, a);
 		priority_b_mark = dfixed_trunc(c);
 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
+
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
@@ -1120,6 +1122,9 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 	/* save values for DPM */
 	amdgpu_crtc->line_time = line_time;
 	amdgpu_crtc->wm_high = latency_watermark_a;
+
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
 }
 
 /* watermark setup */
-- 
cgit v1.2.3


From ae45bbc2ba8f92b5a773fece6f5792f497c89282 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Mon, 24 Apr 2017 01:33:08 +0200
Subject: drm/radeon: Avoid overflows/divide-by-zero in latency_watermark
 calculations.

At dot clocks > approx. 250 Mhz, some of these calcs will overflow and
cause miscalculation of latency watermarks, and for some overflows also
divide-by-zero driver crash. Make calcs more overflow resistant.

This is a direct port of the corresponding patch from amdgpu-kms,
copy-paste for cik from dce-8 and si from dce-6, with a slightly
simpler variant for evergreen dce-4/5.

Only tested on DCE-4 evergreen with a Radeon HD-5770.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/cik.c       | 19 +++----------------
 drivers/gpu/drm/radeon/evergreen.c |  8 +-------
 drivers/gpu/drm/radeon/si.c        | 19 +++----------------
 3 files changed, 7 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 53710dd7d5dd..4f034cbc4a6a 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -9150,23 +9150,10 @@ static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index d1b1e0cc3c25..3c9c133bcccc 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2188,13 +2188,7 @@ static u32 evergreen_latency_watermark(struct evergreen_wm_params *wm)
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
 
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(dfixed_trunc(a), dfixed_trunc(b));
+	lb_fill_bw = min(dfixed_trunc(a), wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 528e5a49a214..3efdfd04069a 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2204,23 +2204,10 @@ static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
-- 
cgit v1.2.3


From e6b9a6c84b936e61f1ccff779494f3bd38775503 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Mon, 24 Apr 2017 01:33:09 +0200
Subject: drm/radeon: Make display watermark calculations more accurate

Avoid big roundoff errors in scanline/hactive durations for
high pixel clocks, especially for >= 500 Mhz, and thereby
program more accurate display fifo watermarks.

This is a port of the corresponding amdgpu patch.

Implemented for DCE 4,6,8.
Tested on Evergreen/DCE-4 with Radeon HD-5770.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/cik.c       | 10 +++++-----
 drivers/gpu/drm/radeon/evergreen.c | 10 +++++-----
 drivers/gpu/drm/radeon/si.c        | 10 +++++-----
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 4f034cbc4a6a..ccebe0f8d2e1 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -9261,14 +9261,14 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
 {
 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
 	struct dce8_wm_params wm_low, wm_high;
-	u32 pixel_period;
+	u32 active_time;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 	u32 tmp, wm_mask;
 
 	if (radeon_crtc->base.enabled && num_heads && mode) {
-		pixel_period = 1000000 / (u32)mode->clock;
-		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
+		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
 
 		/* watermark for high clocks */
 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
@@ -9284,7 +9284,7 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
 
 		wm_high.disp_clk = mode->clock;
 		wm_high.src_width = mode->crtc_hdisplay;
-		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.active_time = active_time;
 		wm_high.blank_time = line_time - wm_high.active_time;
 		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -9324,7 +9324,7 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
 
 		wm_low.disp_clk = mode->clock;
 		wm_low.src_width = mode->crtc_hdisplay;
-		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.active_time = active_time;
 		wm_low.blank_time = line_time - wm_low.active_time;
 		wm_low.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 3c9c133bcccc..f130ec41ee4b 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2255,7 +2255,7 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
 	struct evergreen_wm_params wm_low, wm_high;
 	u32 dram_channels;
-	u32 pixel_period;
+	u32 active_time;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 	u32 priority_a_mark = 0, priority_b_mark = 0;
@@ -2266,8 +2266,8 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
 	fixed20_12 a, b, c;
 
 	if (radeon_crtc->base.enabled && num_heads && mode) {
-		pixel_period = 1000000 / (u32)mode->clock;
-		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
+		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
 		priority_a_cnt = 0;
 		priority_b_cnt = 0;
 		dram_channels = evergreen_get_number_of_dram_channels(rdev);
@@ -2285,7 +2285,7 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
 
 		wm_high.disp_clk = mode->clock;
 		wm_high.src_width = mode->crtc_hdisplay;
-		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.active_time = active_time;
 		wm_high.blank_time = line_time - wm_high.active_time;
 		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -2312,7 +2312,7 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
 
 		wm_low.disp_clk = mode->clock;
 		wm_low.src_width = mode->crtc_hdisplay;
-		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.active_time = active_time;
 		wm_low.blank_time = line_time - wm_low.active_time;
 		wm_low.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 3efdfd04069a..ceee87f029d9 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2274,7 +2274,7 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
 	struct dce6_wm_params wm_low, wm_high;
 	u32 dram_channels;
-	u32 pixel_period;
+	u32 active_time;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 	u32 priority_a_mark = 0, priority_b_mark = 0;
@@ -2284,8 +2284,8 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
 	fixed20_12 a, b, c;
 
 	if (radeon_crtc->base.enabled && num_heads && mode) {
-		pixel_period = 1000000 / (u32)mode->clock;
-		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
+		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
 		priority_a_cnt = 0;
 		priority_b_cnt = 0;
 
@@ -2307,7 +2307,7 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
 
 		wm_high.disp_clk = mode->clock;
 		wm_high.src_width = mode->crtc_hdisplay;
-		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.active_time = active_time;
 		wm_high.blank_time = line_time - wm_high.active_time;
 		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -2334,7 +2334,7 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
 
 		wm_low.disp_clk = mode->clock;
 		wm_low.src_width = mode->crtc_hdisplay;
-		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.active_time = active_time;
 		wm_low.blank_time = line_time - wm_low.active_time;
 		wm_low.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-- 
cgit v1.2.3


From 8ab25b4f51ffd2f859afaffa191d50a1eda4128f Mon Sep 17 00:00:00 2001
From: Alex Xie <AlexBin.Xie@amd.com>
Date: Mon, 24 Apr 2017 13:30:43 -0400
Subject: drm/amdgpu: Fix use of interruptible waiting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If amdgpu_bo_reserve function is interrupted by signal,
amdgpu_bo_kunmap function is not called.

Signed-off-by: Alex Xie <AlexBin.Xie@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index eadd6e0a4152..ebc0022a7ab4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -349,7 +349,7 @@ static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev)
 	if (adev->vram_scratch.robj == NULL) {
 		return;
 	}
-	r = amdgpu_bo_reserve(adev->vram_scratch.robj, false);
+	r = amdgpu_bo_reserve(adev->vram_scratch.robj, true);
 	if (likely(r == 0)) {
 		amdgpu_bo_kunmap(adev->vram_scratch.robj);
 		amdgpu_bo_unpin(adev->vram_scratch.robj);
-- 
cgit v1.2.3


From 7a6901d7d7e49ad50d477cd8f8ef79d079b5c6c5 Mon Sep 17 00:00:00 2001
From: Alex Xie <AlexBin.Xie@amd.com>
Date: Mon, 24 Apr 2017 13:52:41 -0400
Subject: drm/amdgpu: Fix use of interruptible waiting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. The signal interrupt can affect the expected behaviour.
2. There is no mechanism to handle the corresponding error.

Signed-off-by: Alex Xie <AlexBin.Xie@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ebc0022a7ab4..3f01c4b92280 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2140,7 +2140,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 
 		if (amdgpu_crtc->cursor_bo) {
 			struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-			r = amdgpu_bo_reserve(aobj, false);
+			r = amdgpu_bo_reserve(aobj, true);
 			if (r == 0) {
 				amdgpu_bo_unpin(aobj);
 				amdgpu_bo_unreserve(aobj);
@@ -2153,7 +2153,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 		robj = gem_to_amdgpu_bo(rfb->obj);
 		/* don't unpin kernel fb objects */
 		if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
-			r = amdgpu_bo_reserve(robj, false);
+			r = amdgpu_bo_reserve(robj, true);
 			if (r == 0) {
 				amdgpu_bo_unpin(robj);
 				amdgpu_bo_unreserve(robj);
@@ -2260,7 +2260,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 
 		if (amdgpu_crtc->cursor_bo) {
 			struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-			r = amdgpu_bo_reserve(aobj, false);
+			r = amdgpu_bo_reserve(aobj, true);
 			if (r == 0) {
 				r = amdgpu_bo_pin(aobj,
 						  AMDGPU_GEM_DOMAIN_VRAM,
-- 
cgit v1.2.3


From 1d28479776fe932c32bda2366501fb2408557397 Mon Sep 17 00:00:00 2001
From: Alex Xie <AlexBin.Xie@amd.com>
Date: Mon, 24 Apr 2017 13:53:04 -0400
Subject: drm/amdgpu: Fix use of interruptible waiting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. The signal interrupt can affect the expected behaviour.
2. There is no good mechanism to handle the corresponding error.

Signed-off-by: Alex Xie <AlexBin.Xie@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3f01c4b92280..234d90a12482 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2423,7 +2423,7 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
 	if (!bo->shadow)
 		return 0;
 
-	r = amdgpu_bo_reserve(bo, false);
+	r = amdgpu_bo_reserve(bo, true);
 	if (r)
 		return r;
 	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-- 
cgit v1.2.3


From f3aa745eedc851742f6d6574ee2322c021b8f33c Mon Sep 17 00:00:00 2001
From: Alex Xie <AlexBin.Xie@amd.com>
Date: Mon, 24 Apr 2017 14:27:00 -0400
Subject: drm/amdgpu: Fix use of interruptible waiting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. The signal interrupt can affect the expected behaviour.
2. There is no good mechanism to handle the corresponding error.

Signed-off-by: Alex Xie <AlexBin.Xie@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cb89fff863c0..60de77b153b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -295,7 +295,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
 	if (*bo == NULL)
 		return;
 
-	if (likely(amdgpu_bo_reserve(*bo, false) == 0)) {
+	if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
 		if (cpu_addr)
 			amdgpu_bo_kunmap(*bo);
 
-- 
cgit v1.2.3


From d159f26caaa91c88e9078c69fd159be9cec3a005 Mon Sep 17 00:00:00 2001
From: Alex Xie <AlexBin.Xie@amd.com>
Date: Mon, 24 Apr 2017 15:26:57 -0400
Subject: drm/amdgpu: Real return value can be over-written when clean up
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Alex Xie <AlexBin.Xie@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index cc97eee93226..726b3e018eeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -117,6 +117,11 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
 	}
 
 out_cleanup:
+	/* Check error value now. The value can be overwritten when clean up.*/
+	if (r) {
+		DRM_ERROR("Error while benchmarking BO move.\n");
+	}
+
 	if (sobj) {
 		r = amdgpu_bo_reserve(sobj, false);
 		if (likely(r == 0)) {
@@ -133,10 +138,6 @@ out_cleanup:
 		}
 		amdgpu_bo_unref(&dobj);
 	}
-
-	if (r) {
-		DRM_ERROR("Error while benchmarking BO move.\n");
-	}
 }
 
 void amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
-- 
cgit v1.2.3


From 4a9ed1009b8b01409bdda899c0ff0d7fd2c6d094 Mon Sep 17 00:00:00 2001
From: Alex Xie <AlexBin.Xie@amd.com>
Date: Mon, 24 Apr 2017 15:33:16 -0400
Subject: drm/amdgpu: Fix use of interruptible waiting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. The signal interrupt can affect the expected behaviour.
2. There is no good mechanism to handle the corresponding error.
When signal interrupt happens, unpin is not called.
As a result, inside AMDGPU, the statistic of pin size will be wrong.

Signed-off-by: Alex Xie <AlexBin.Xie@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 726b3e018eeb..1beae5b930d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -123,7 +123,7 @@ out_cleanup:
 	}
 
 	if (sobj) {
-		r = amdgpu_bo_reserve(sobj, false);
+		r = amdgpu_bo_reserve(sobj, true);
 		if (likely(r == 0)) {
 			amdgpu_bo_unpin(sobj);
 			amdgpu_bo_unreserve(sobj);
@@ -131,7 +131,7 @@ out_cleanup:
 		amdgpu_bo_unref(&sobj);
 	}
 	if (dobj) {
-		r = amdgpu_bo_reserve(dobj, false);
+		r = amdgpu_bo_reserve(dobj, true);
 		if (likely(r == 0)) {
 			amdgpu_bo_unpin(dobj);
 			amdgpu_bo_unreserve(dobj);
-- 
cgit v1.2.3


From 82521316395a37a2ec1d7e396c055f3c7de9d93b Mon Sep 17 00:00:00 2001
From: "Roger.He" <Hongbo.He@amd.com>
Date: Fri, 21 Apr 2017 13:08:43 +0800
Subject: drm/amdgpu: validate shadow before restoring from it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Roger.He <Hongbo.He@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 +++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 +
 3 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 234d90a12482..3c1754df4c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2429,6 +2429,18 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
 	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
 	/* if bo has been evicted, then no need to recover */
 	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+		r = amdgpu_bo_validate(bo->shadow);
+		if (r) {
+			DRM_ERROR("bo validate failed!\n");
+			goto err;
+		}
+
+		r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem);
+		if (r) {
+			DRM_ERROR("%p bind failed\n", bo->shadow);
+			goto err;
+		}
+
 		r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
 						 NULL, fence, true);
 		if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 60de77b153b7..365883d7948d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -543,6 +543,27 @@ err:
 	return r;
 }
 
+int amdgpu_bo_validate(struct amdgpu_bo *bo)
+{
+	uint32_t domain;
+	int r;
+
+	if (bo->pin_count)
+		return 0;
+
+	domain = bo->prefered_domains;
+
+retry:
+	amdgpu_ttm_placement_from_domain(bo, domain);
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
+		domain = bo->allowed_domains;
+		goto retry;
+	}
+
+	return r;
+}
+
 int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_bo *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 15a723adca76..382485115b06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -175,6 +175,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 			       struct amdgpu_bo *bo,
 			       struct reservation_object *resv,
 			       struct dma_fence **fence, bool direct);
+int amdgpu_bo_validate(struct amdgpu_bo *bo);
 int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_bo *bo,
-- 
cgit v1.2.3


From a6bef67e2abae4f19de8fb4ec34acb1df8156a75 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Mon, 24 Apr 2017 17:39:00 +0800
Subject: drm/amdgpu: fix NULL pointer error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[  141.420491] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
[  141.420532] IP: [<ffffffff81579ee1>] fence_remove_callback+0x11/0x60
[  141.420563] PGD 20a030067
[  141.420575] PUD 2088ca067
[  141.420587] PMD 0

[  141.420599] Oops: 0000 [#1] SMP
[  141.420612] Modules linked in: amdgpu(OE) ttm(OE) drm_kms_helper(E) drm(E) i2c_algo_bit(E) fb_sys_fops(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) rpcsec_gss_krb5(E) nfsv4(E) nfs(E) fscache(E) eeepc_wmi(E) asus_wmi(E) sparse_keymap(E) snd_hda_codec_realtek(E) video(E) snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) snd_hda_intel(E) joydev(E) snd_hda_codec(E) snd_seq_midi(E) snd_seq_midi_event(E) snd_hda_core(E) snd_hwdep(E) snd_rawmidi(E) snd_pcm(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) snd_seq(E) crc32_pclmul(E) ghash_clmulni_intel(E) snd_seq_device(E) snd_timer(E) aesni_intel(E) aes_x86_64(E) lrw(E) gf128mul(E) glue_helper(E) ablk_helper(E) cryptd(E) snd(E) soundcore(E) serio_raw(E) shpchp(E) i2c_piix4(E) i2c_designware_platform(E) 8250_dw(E) i2c_designware_core(E) mac_hid(E) binfmt_misc(E)
[  141.420948]  nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sunrpc(E) parport_pc(E) ppdev(E) lp(E) parport(E) autofs4(E) hid_generic(E) usbhid(E) hid(E) psmouse(E) r8169(E) ahci(E) mii(E) libahci(E) wmi(E)
[  141.421042] CPU: 14 PID: 223 Comm: kworker/14:2 Tainted: G           OE   4.9.0-custom #4
[  141.421074] Hardware name: System manufacturer System Product Name/PRIME B350-PLUS, BIOS 0606 04/06/2017
[  141.421146] Workqueue: events amd_sched_job_timedout [amdgpu]
[  141.421169] task: ffff88020b03ba80 task.stack: ffffc900016f4000
[  141.421193] RIP: 0010:[<ffffffff81579ee1>]  [<ffffffff81579ee1>] fence_remove_callback+0x11/0x60
[  141.421229] RSP: 0018:ffffc900016f7d30  EFLAGS: 00010202
[  141.421250] RAX: ffff8801c049fc00 RBX: ffff8801d4d8dc00 RCX: 0000000000000000
[  141.421278] RDX: 0000000000000001 RSI: ffff8801c049fcc0 RDI: 0000000000000000
[  141.421307] RBP: ffffc900016f7d48 R08: 0000000000000000 R09: 0000000000000000
[  141.421334] R10: 00000020ed512a30 R11: 0000000000000001 R12: 0000000000000000
[  141.421362] R13: ffff880209ba4ba0 R14: ffff880209ba4c58 R15: ffff8801c055cc60
[  141.421390] FS:  0000000000000000(0000) GS:ffff88021ef80000(0000) knlGS:0000000000000000
[  141.421421] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  141.421443] CR2: 0000000000000030 CR3: 000000020b554000 CR4: 00000000003406e0
[  141.421471] Stack:
[  141.421480]  ffff8801d4d8dc00 ffff880209ba4c48 ffff880209ba4ba0 ffffc900016f7d78
[  141.421513]  ffffffffa0697920 ffff880209ba0000 0000000000000000 ffff880209ba2770
[  141.421549]  ffff880209ba4b08 ffffc900016f7df0 ffffffffa05ce2ae ffffffffa0509eb7
[  141.421583] Call Trace:
[  141.421628]  [<ffffffffa0697920>] amd_sched_hw_job_reset+0x50/0xb0 [amdgpu]
[  141.421676]  [<ffffffffa05ce2ae>] amdgpu_gpu_reset+0x8e/0x690 [amdgpu]
[  141.421712]  [<ffffffffa0509eb7>] ? drm_printk+0x97/0xa0 [drm]
[  141.421770]  [<ffffffffa0698156>] amdgpu_job_timedout+0x46/0x50 [amdgpu]
[  141.421829]  [<ffffffffa0696a07>] amd_sched_job_timedout+0x17/0x20 [amdgpu]
[  141.421859]  [<ffffffff81095493>] process_one_work+0x153/0x3f0
[  141.421884]  [<ffffffff81095c5b>] worker_thread+0x12b/0x4b0
[  141.421907]  [<ffffffff81095b30>] ? rescuer_thread+0x350/0x350
[  141.421931]  [<ffffffff8109b423>] kthread+0xd3/0xf0
[  141.421951]  [<ffffffff8109b350>] ? kthread_park+0x60/0x60
[  141.421975]  [<ffffffff817e1ee5>] ret_from_fork+0x25/0x30
[  141.421996] Code: ac 81 e8 a3 1f b0 ff 48 c7 c0 ea ff ff ff e9 48 ff ff ff 0f 1f 80 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 49 89 fc 53 <48> 8b 7f 30 48 89 f3 e8 73 7c 26 00 48 8b 13 48 39 d3 41 0f 95
[  141.422156] RIP  [<ffffffff81579ee1>] fence_remove_callback+0x11/0x60
[  141.422183]  RSP <ffffc900016f7d30>
[  141.422197] CR2: 0000000000000030
[  141.433483] ---[ end trace bc0949bf7ddd6d4b ]---

if the job is reset twice, then the parent could be NULL.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index acd882a188bc..5fd12db0fc58 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -387,7 +387,9 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
 
 	spin_lock(&sched->job_list_lock);
 	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
-		if (dma_fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) {
+		if (s_job->s_fence->parent &&
+		    dma_fence_remove_callback(s_job->s_fence->parent,
+					      &s_job->s_fence->cb)) {
 			dma_fence_put(s_job->s_fence->parent);
 			s_job->s_fence->parent = NULL;
 		}
-- 
cgit v1.2.3


From cca7ecb32b5920f05bb940cbe01dde19c0125620 Mon Sep 17 00:00:00 2001
From: Alex Xie <AlexBin.Xie@amd.com>
Date: Wed, 26 Apr 2017 13:31:01 -0400
Subject: drm/amdgpu: Fix use of interruptible waiting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Either in cgs functions or for callers of cgs functions:
1. The signal interrupt can affect the expected behaviour
2. There is no good mechanism to handle the corresponding error
3. There is no chance of deadlock in these single BO waiting
4. There is no clear benefit for interruptible waiting
5. Future caller of these functions might have same issue.

Signed-off-by: Alex Xie <AlexBin.Xie@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 1c7e6c28f93a..013f5f14dd75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -89,7 +89,7 @@ static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem,
 			       AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, &bo);
 	if (ret)
 		return ret;
-	ret = amdgpu_bo_reserve(bo, false);
+	ret = amdgpu_bo_reserve(bo, true);
 	if (unlikely(ret != 0))
 		return ret;
 
@@ -107,7 +107,7 @@ static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t km
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle;
 
 	if (obj) {
-		int r = amdgpu_bo_reserve(obj, false);
+		int r = amdgpu_bo_reserve(obj, true);
 		if (likely(r == 0)) {
 			amdgpu_bo_unpin(obj);
 			amdgpu_bo_unreserve(obj);
@@ -215,7 +215,7 @@ static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
 
 	if (obj) {
-		int r = amdgpu_bo_reserve(obj, false);
+		int r = amdgpu_bo_reserve(obj, true);
 		if (likely(r == 0)) {
 			amdgpu_bo_kunmap(obj);
 			amdgpu_bo_unpin(obj);
@@ -239,7 +239,7 @@ static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h
 	min_offset = obj->placements[0].fpfn << PAGE_SHIFT;
 	max_offset = obj->placements[0].lpfn << PAGE_SHIFT;
 
-	r = amdgpu_bo_reserve(obj, false);
+	r = amdgpu_bo_reserve(obj, true);
 	if (unlikely(r != 0))
 		return r;
 	r = amdgpu_bo_pin_restricted(obj, obj->prefered_domains,
@@ -252,7 +252,7 @@ static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t
 {
 	int r;
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, false);
+	r = amdgpu_bo_reserve(obj, true);
 	if (unlikely(r != 0))
 		return r;
 	r = amdgpu_bo_unpin(obj);
@@ -265,7 +265,7 @@ static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h
 {
 	int r;
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, false);
+	r = amdgpu_bo_reserve(obj, true);
 	if (unlikely(r != 0))
 		return r;
 	r = amdgpu_bo_kmap(obj, map);
@@ -277,7 +277,7 @@ static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t
 {
 	int r;
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, false);
+	r = amdgpu_bo_reserve(obj, true);
 	if (unlikely(r != 0))
 		return r;
 	amdgpu_bo_kunmap(obj);
-- 
cgit v1.2.3


From 12d39245f6bea9dfddc29d75225a1d5ffc290186 Mon Sep 17 00:00:00 2001
From: Alex Xie <AlexBin.Xie@amd.com>
Date: Tue, 25 Apr 2017 17:09:24 -0400
Subject: drm/amdgpu: Fix use of interruptible waiting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no good mechanism to handle the corresponding error.
When signal interrupt happens, unpin is not called.
As a result, inside AMDGPU, the statistic of pin size will be wrong.

Signed-off-by: Alex Xie <AlexBin.Xie@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 96926a221bd5..d46773b8f7e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -169,7 +169,7 @@ static void amdgpu_flip_cleanup_unpin(struct amdgpu_flip_work *work,
 void amdgpu_crtc_cleanup_flip_ctx(struct amdgpu_flip_work *work,
 				  struct amdgpu_bo *new_abo)
 {
-	if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) {
+	if (unlikely(amdgpu_bo_reserve(new_abo, true) != 0)) {
 		DRM_ERROR("failed to reserve new abo in error path\n");
 		amdgpu_flip_work_cleanup(work);
 		return;
-- 
cgit v1.2.3


From effa290caa9fa23d13b4531fcf55142a107a3b2a Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 21 Apr 2017 17:26:07 +0800
Subject: drm/amd/powerplay: correct UlvOffsetVid on Vega10.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 278def7380e7..5bb18a9a1056 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -1436,9 +1436,7 @@ static int vega10_populate_ulv_state(struct pp_hwmgr *hwmgr)
 			(struct phm_ppt_v2_information *)(hwmgr->pptable);
 
 	data->smc_state_table.pp_table.UlvOffsetVid =
-			(uint8_t)(table_info->us_ulv_voltage_offset *
-					VOLTAGE_VID_OFFSET_SCALE2 /
-					VOLTAGE_VID_OFFSET_SCALE1);
+			(uint8_t)table_info->us_ulv_voltage_offset;
 
 	data->smc_state_table.pp_table.UlvSmnclkDid =
 			(uint8_t)(table_info->us_ulv_smnclk_did);
@@ -2342,6 +2340,7 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 			(uint8_t)(table_info->uc_vce_dpm_voltage_mode);
 	pp_table->Mp0DpmVoltageMode =
 			(uint8_t)(table_info->uc_mp0_dpm_voltage_mode);
+
 	pp_table->DisplayDpmVoltageMode =
 			(uint8_t)(table_info->uc_dcef_dpm_voltage_mode);
 
-- 
cgit v1.2.3


From 97782cc93f4440ba2bc111b8c84644b304c56676 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 21 Apr 2017 18:33:05 +0800
Subject: drm/amd/powerplay: disable cks by default on vega10.

run gpu test auto reboot when enable cks right now.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 5bb18a9a1056..be2b602fb8b4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -124,7 +124,7 @@ static void vega10_set_default_registry_data(struct pp_hwmgr *hwmgr)
 	}
 
 	data->registry_data.clock_stretcher_support =
-			hwmgr->feature_mask & PP_CLOCK_STRETCH_MASK ? true : false;
+			hwmgr->feature_mask & PP_CLOCK_STRETCH_MASK ? false : true;
 
 	data->registry_data.disable_water_mark = 0;
 
-- 
cgit v1.2.3


From 00c4855ef874e41d6b9f9b19d1d1e04bc70f0b31 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 21 Apr 2017 18:52:12 +0800
Subject: drm/amd/powerplay: refine set pcie dpm default table on vega10.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index be2b602fb8b4..5e3e89be9fb6 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -1182,29 +1182,9 @@ static int vega10_setup_default_pcie_table(struct pp_hwmgr *hwmgr)
 		else
 			pcie_table->lclk[i] =
 					bios_pcie_table->entries[i].pcie_sclk;
-
-		pcie_table->count++;
 	}
 
-	if (data->registry_data.pcieSpeedOverride)
-		pcie_table->pcie_gen[i] = data->registry_data.pcieSpeedOverride;
-	else
-		pcie_table->pcie_gen[i] =
-			bios_pcie_table->entries[bios_pcie_table->count - 1].gen_speed;
-
-	if (data->registry_data.pcieLaneOverride)
-		pcie_table->pcie_lane[i] = data->registry_data.pcieLaneOverride;
-	else
-		pcie_table->pcie_lane[i] =
-			bios_pcie_table->entries[bios_pcie_table->count - 1].lane_width;
-
-	if (data->registry_data.pcieClockOverride)
-		pcie_table->lclk[i] = data->registry_data.pcieClockOverride;
-	else
-		pcie_table->lclk[i] =
-			bios_pcie_table->entries[bios_pcie_table->count - 1].pcie_sclk;
-
-	pcie_table->count++;
+	pcie_table->count = NUM_LINK_LEVELS;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 2c55b16bf0e1492ba662d884c81d324538cafce1 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 27 Apr 2017 17:13:39 +0200
Subject: drm/amdgpu: remove unused and mostly unimplemented CGS functions v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Those functions are all unused and some not even implemented.

v2: keep cgs_get_pci_resource, it is used by the ACP driver.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c  | 198 -----------------------
 drivers/gpu/drm/amd/include/cgs_common.h | 270 -------------------------------
 2 files changed, 468 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 013f5f14dd75..c6dba1eaefbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -42,82 +42,6 @@ struct amdgpu_cgs_device {
 	struct amdgpu_device *adev =					\
 		((struct amdgpu_cgs_device *)cgs_device)->adev
 
-static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type,
-				   uint64_t *mc_start, uint64_t *mc_size,
-				   uint64_t *mem_size)
-{
-	CGS_FUNC_ADEV;
-	switch(type) {
-	case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
-	case CGS_GPU_MEM_TYPE__VISIBLE_FB:
-		*mc_start = 0;
-		*mc_size = adev->mc.visible_vram_size;
-		*mem_size = adev->mc.visible_vram_size - adev->vram_pin_size;
-		break;
-	case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
-	case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
-		*mc_start = adev->mc.visible_vram_size;
-		*mc_size = adev->mc.real_vram_size - adev->mc.visible_vram_size;
-		*mem_size = *mc_size;
-		break;
-	case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
-	case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
-		*mc_start = adev->mc.gtt_start;
-		*mc_size = adev->mc.gtt_size;
-		*mem_size = adev->mc.gtt_size - adev->gart_pin_size;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem,
-				uint64_t size,
-				uint64_t min_offset, uint64_t max_offset,
-				cgs_handle_t *kmem_handle, uint64_t *mcaddr)
-{
-	CGS_FUNC_ADEV;
-	int ret;
-	struct amdgpu_bo *bo;
-	struct page *kmem_page = vmalloc_to_page(kmem);
-	int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT;
-
-	struct sg_table *sg = drm_prime_pages_to_sg(&kmem_page, npages);
-	ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false,
-			       AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, &bo);
-	if (ret)
-		return ret;
-	ret = amdgpu_bo_reserve(bo, true);
-	if (unlikely(ret != 0))
-		return ret;
-
-	/* pin buffer into GTT */
-	ret = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_GTT,
-				       min_offset, max_offset, mcaddr);
-	amdgpu_bo_unreserve(bo);
-
-	*kmem_handle = (cgs_handle_t)bo;
-	return ret;
-}
-
-static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t kmem_handle)
-{
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle;
-
-	if (obj) {
-		int r = amdgpu_bo_reserve(obj, true);
-		if (likely(r == 0)) {
-			amdgpu_bo_unpin(obj);
-			amdgpu_bo_unreserve(obj);
-		}
-		amdgpu_bo_unref(&obj);
-
-	}
-	return 0;
-}
-
 static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
 				    enum cgs_gpu_mem_type type,
 				    uint64_t size, uint64_t align,
@@ -349,62 +273,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
 	WARN(1, "Invalid indirect register space");
 }
 
-static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, unsigned addr)
-{
-	CGS_FUNC_ADEV;
-	uint8_t val;
-	int ret = pci_read_config_byte(adev->pdev, addr, &val);
-	if (WARN(ret, "pci_read_config_byte error"))
-		return 0;
-	return val;
-}
-
-static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, unsigned addr)
-{
-	CGS_FUNC_ADEV;
-	uint16_t val;
-	int ret = pci_read_config_word(adev->pdev, addr, &val);
-	if (WARN(ret, "pci_read_config_word error"))
-		return 0;
-	return val;
-}
-
-static uint32_t amdgpu_cgs_read_pci_config_dword(struct cgs_device *cgs_device,
-						 unsigned addr)
-{
-	CGS_FUNC_ADEV;
-	uint32_t val;
-	int ret = pci_read_config_dword(adev->pdev, addr, &val);
-	if (WARN(ret, "pci_read_config_dword error"))
-		return 0;
-	return val;
-}
-
-static void amdgpu_cgs_write_pci_config_byte(struct cgs_device *cgs_device, unsigned addr,
-					     uint8_t value)
-{
-	CGS_FUNC_ADEV;
-	int ret = pci_write_config_byte(adev->pdev, addr, value);
-	WARN(ret, "pci_write_config_byte error");
-}
-
-static void amdgpu_cgs_write_pci_config_word(struct cgs_device *cgs_device, unsigned addr,
-					     uint16_t value)
-{
-	CGS_FUNC_ADEV;
-	int ret = pci_write_config_word(adev->pdev, addr, value);
-	WARN(ret, "pci_write_config_word error");
-}
-
-static void amdgpu_cgs_write_pci_config_dword(struct cgs_device *cgs_device, unsigned addr,
-					      uint32_t value)
-{
-	CGS_FUNC_ADEV;
-	int ret = pci_write_config_dword(adev->pdev, addr, value);
-	WARN(ret, "pci_write_config_dword error");
-}
-
-
 static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device,
 				       enum cgs_resource_type resource_type,
 				       uint64_t size,
@@ -477,56 +345,6 @@ static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigne
 		adev->mode_info.atom_context, table, args);
 }
 
-static int amdgpu_cgs_create_pm_request(struct cgs_device *cgs_device, cgs_handle_t *request)
-{
-	/* TODO */
-	return 0;
-}
-
-static int amdgpu_cgs_destroy_pm_request(struct cgs_device *cgs_device, cgs_handle_t request)
-{
-	/* TODO */
-	return 0;
-}
-
-static int amdgpu_cgs_set_pm_request(struct cgs_device *cgs_device, cgs_handle_t request,
-				     int active)
-{
-	/* TODO */
-	return 0;
-}
-
-static int amdgpu_cgs_pm_request_clock(struct cgs_device *cgs_device, cgs_handle_t request,
-				       enum cgs_clock clock, unsigned freq)
-{
-	/* TODO */
-	return 0;
-}
-
-static int amdgpu_cgs_pm_request_engine(struct cgs_device *cgs_device, cgs_handle_t request,
-					enum cgs_engine engine, int powered)
-{
-	/* TODO */
-	return 0;
-}
-
-
-
-static int amdgpu_cgs_pm_query_clock_limits(struct cgs_device *cgs_device,
-					    enum cgs_clock clock,
-					    struct cgs_clock_limits *limits)
-{
-	/* TODO */
-	return 0;
-}
-
-static int amdgpu_cgs_set_camera_voltages(struct cgs_device *cgs_device, uint32_t mask,
-					  const uint32_t *voltages)
-{
-	DRM_ERROR("not implemented");
-	return -EPERM;
-}
-
 struct cgs_irq_params {
 	unsigned src_id;
 	cgs_irq_source_set_func_t set;
@@ -1269,9 +1087,6 @@ static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
 }
 
 static const struct cgs_ops amdgpu_cgs_ops = {
-	.gpu_mem_info = amdgpu_cgs_gpu_mem_info,
-	.gmap_kmem = amdgpu_cgs_gmap_kmem,
-	.gunmap_kmem = amdgpu_cgs_gunmap_kmem,
 	.alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem,
 	.free_gpu_mem = amdgpu_cgs_free_gpu_mem,
 	.gmap_gpu_mem = amdgpu_cgs_gmap_gpu_mem,
@@ -1282,23 +1097,10 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 	.write_register = amdgpu_cgs_write_register,
 	.read_ind_register = amdgpu_cgs_read_ind_register,
 	.write_ind_register = amdgpu_cgs_write_ind_register,
-	.read_pci_config_byte = amdgpu_cgs_read_pci_config_byte,
-	.read_pci_config_word = amdgpu_cgs_read_pci_config_word,
-	.read_pci_config_dword = amdgpu_cgs_read_pci_config_dword,
-	.write_pci_config_byte = amdgpu_cgs_write_pci_config_byte,
-	.write_pci_config_word = amdgpu_cgs_write_pci_config_word,
-	.write_pci_config_dword = amdgpu_cgs_write_pci_config_dword,
 	.get_pci_resource = amdgpu_cgs_get_pci_resource,
 	.atom_get_data_table = amdgpu_cgs_atom_get_data_table,
 	.atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs,
 	.atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table,
-	.create_pm_request = amdgpu_cgs_create_pm_request,
-	.destroy_pm_request = amdgpu_cgs_destroy_pm_request,
-	.set_pm_request = amdgpu_cgs_set_pm_request,
-	.pm_request_clock = amdgpu_cgs_pm_request_clock,
-	.pm_request_engine = amdgpu_cgs_pm_request_engine,
-	.pm_query_clock_limits = amdgpu_cgs_pm_query_clock_limits,
-	.set_camera_voltages = amdgpu_cgs_set_camera_voltages,
 	.get_firmware_info = amdgpu_cgs_get_firmware_info,
 	.rel_firmware = amdgpu_cgs_rel_firmware,
 	.set_powergating_state = amdgpu_cgs_set_powergating_state,
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index 17b9d41f3e87..0a94f749e3c0 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -53,20 +53,6 @@ enum cgs_ind_reg {
 	CGS_IND_REG__AUDIO_ENDPT
 };
 
-/**
- * enum cgs_clock - Clocks controlled by the SMU
- */
-enum cgs_clock {
-	CGS_CLOCK__SCLK,
-	CGS_CLOCK__MCLK,
-	CGS_CLOCK__VCLK,
-	CGS_CLOCK__DCLK,
-	CGS_CLOCK__ECLK,
-	CGS_CLOCK__ACLK,
-	CGS_CLOCK__ICLK,
-	/* ... */
-};
-
 /**
  * enum cgs_engine - Engines that can be statically power-gated
  */
@@ -81,15 +67,6 @@ enum cgs_engine {
 	/* ... */
 };
 
-/**
- * enum cgs_voltage_planes - Voltage planes for external camera HW
- */
-enum cgs_voltage_planes {
-	CGS_VOLTAGE_PLANE__SENSOR0,
-	CGS_VOLTAGE_PLANE__SENSOR1,
-	/* ... */
-};
-
 /*
  * enum cgs_ucode_id - Firmware types for different IPs
  */
@@ -146,17 +123,6 @@ enum cgs_resource_type {
 	CGS_RESOURCE_TYPE_ROM,
 };
 
-/**
- * struct cgs_clock_limits - Clock limits
- *
- * Clocks are specified in 10KHz units.
- */
-struct cgs_clock_limits {
-	unsigned min;		/**< Minimum supported frequency */
-	unsigned max;		/**< Maxumim supported frequency */
-	unsigned sustainable;	/**< Thermally sustainable frequency */
-};
-
 /**
  * struct cgs_firmware_info - Firmware information
  */
@@ -220,54 +186,6 @@ struct cgs_acpi_method_info {
 	uint32_t padding[9];
 };
 
-/**
- * cgs_gpu_mem_info() - Return information about memory heaps
- * @cgs_device: opaque device handle
- * @type:	memory type
- * @mc_start:	Start MC address of the heap (output)
- * @mc_size:	MC address space size (output)
- * @mem_size:	maximum amount of memory available for allocation (output)
- *
- * This function returns information about memory heaps. The type
- * parameter is used to select the memory heap. The mc_start and
- * mc_size for GART heaps may be bigger than the memory available for
- * allocation.
- *
- * mc_start and mc_size are undefined for non-contiguous FB memory
- * types, since buffers allocated with these types may or may not be
- * GART mapped.
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_gpu_mem_info_t)(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type,
-				  uint64_t *mc_start, uint64_t *mc_size,
-				  uint64_t *mem_size);
-
-/**
- * cgs_gmap_kmem() - map kernel memory to GART aperture
- * @cgs_device:	opaque device handle
- * @kmem:	pointer to kernel memory
- * @size:	size to map
- * @min_offset: minimum offset from start of GART aperture
- * @max_offset: maximum offset from start of GART aperture
- * @kmem_handle: kernel memory handle (output)
- * @mcaddr:	MC address (output)
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_gmap_kmem_t)(struct cgs_device *cgs_device, void *kmem, uint64_t size,
-			       uint64_t min_offset, uint64_t max_offset,
-			       cgs_handle_t *kmem_handle, uint64_t *mcaddr);
-
-/**
- * cgs_gunmap_kmem() - unmap kernel memory
- * @cgs_device:	opaque device handle
- * @kmem_handle: kernel memory handle returned by gmap_kmem
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_gunmap_kmem_t)(struct cgs_device *cgs_device, cgs_handle_t kmem_handle);
-
 /**
  * cgs_alloc_gpu_mem() - Allocate GPU memory
  * @cgs_device:	opaque device handle
@@ -391,62 +309,6 @@ typedef uint32_t (*cgs_read_ind_register_t)(struct cgs_device *cgs_device, enum
 typedef void (*cgs_write_ind_register_t)(struct cgs_device *cgs_device, enum cgs_ind_reg space,
 					 unsigned index, uint32_t value);
 
-/**
- * cgs_read_pci_config_byte() - Read byte from PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address
- *
- * Return:  Value read
- */
-typedef uint8_t (*cgs_read_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr);
-
-/**
- * cgs_read_pci_config_word() - Read word from PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address, must be word-aligned
- *
- * Return:  Value read
- */
-typedef uint16_t (*cgs_read_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr);
-
-/**
- * cgs_read_pci_config_dword() - Read dword from PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address, must be dword-aligned
- *
- * Return:  Value read
- */
-typedef uint32_t (*cgs_read_pci_config_dword_t)(struct cgs_device *cgs_device,
-						unsigned addr);
-
-/**
- * cgs_write_pci_config_byte() - Write byte to PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address
- * @value:	value to write
- */
-typedef void (*cgs_write_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr,
-					    uint8_t value);
-
-/**
- * cgs_write_pci_config_word() - Write byte to PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address, must be word-aligned
- * @value:	value to write
- */
-typedef void (*cgs_write_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr,
-					    uint16_t value);
-
-/**
- * cgs_write_pci_config_dword() - Write byte to PCI configuration space
- * @cgs_device:	opaque device handle
- * @addr:	address, must be dword-aligned
- * @value:	value to write
- */
-typedef void (*cgs_write_pci_config_dword_t)(struct cgs_device *cgs_device, unsigned addr,
-					     uint32_t value);
-
-
 /**
  * cgs_get_pci_resource() - provide access to a device resource (PCI BAR)
  * @cgs_device:	opaque device handle
@@ -500,87 +362,6 @@ typedef int (*cgs_atom_get_cmd_table_revs_t)(struct cgs_device *cgs_device, unsi
 typedef int (*cgs_atom_exec_cmd_table_t)(struct cgs_device *cgs_device,
 					 unsigned table, void *args);
 
-/**
- * cgs_create_pm_request() - Create a power management request
- * @cgs_device:	opaque device handle
- * @request:	handle of created PM request (output)
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_create_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t *request);
-
-/**
- * cgs_destroy_pm_request() - Destroy a power management request
- * @cgs_device:	opaque device handle
- * @request:	handle of created PM request
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_destroy_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request);
-
-/**
- * cgs_set_pm_request() - Activate or deactiveate a PM request
- * @cgs_device:	opaque device handle
- * @request:	PM request handle
- * @active:	0 = deactivate, non-0 = activate
- *
- * While a PM request is active, its minimum clock requests are taken
- * into account as the requested engines are powered up. When the
- * request is inactive, the engines may be powered down and clocks may
- * be lower, depending on other PM requests by other driver
- * components.
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_set_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request,
-				    int active);
-
-/**
- * cgs_pm_request_clock() - Request a minimum frequency for a specific clock
- * @cgs_device:	opaque device handle
- * @request:	PM request handle
- * @clock:	which clock?
- * @freq:	requested min. frequency in 10KHz units (0 to clear request)
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_pm_request_clock_t)(struct cgs_device *cgs_device, cgs_handle_t request,
-				      enum cgs_clock clock, unsigned freq);
-
-/**
- * cgs_pm_request_engine() - Request an engine to be powered up
- * @cgs_device:	opaque device handle
- * @request:	PM request handle
- * @engine:	which engine?
- * @powered:	0 = powered down, non-0 = powered up
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_pm_request_engine_t)(struct cgs_device *cgs_device, cgs_handle_t request,
-				       enum cgs_engine engine, int powered);
-
-/**
- * cgs_pm_query_clock_limits() - Query clock frequency limits
- * @cgs_device:	opaque device handle
- * @clock:	which clock?
- * @limits:	clock limits
- *
- * Return:  0 on success, -errno otherwise
- */
-typedef int (*cgs_pm_query_clock_limits_t)(struct cgs_device *cgs_device,
-					   enum cgs_clock clock,
-					   struct cgs_clock_limits *limits);
-
-/**
- * cgs_set_camera_voltages() - Apply specific voltages to PMIC voltage planes
- * @cgs_device:	opaque device handle
- * @mask:	bitmask of voltages to change (1<<CGS_VOLTAGE_PLANE__xyz|...)
- * @voltages:	pointer to array of voltage values in 1mV units
- *
- * Return: 0 on success, -errno otherwise
- */
-typedef int (*cgs_set_camera_voltages_t)(struct cgs_device *cgs_device, uint32_t mask,
-					 const uint32_t *voltages);
 /**
  * cgs_get_firmware_info - Get the firmware information from core driver
  * @cgs_device: opaque device handle
@@ -627,9 +408,6 @@ typedef int (*cgs_enter_safe_mode)(struct cgs_device *cgs_device, bool en);
 
 struct cgs_ops {
 	/* memory management calls (similar to KFD interface) */
-	cgs_gpu_mem_info_t gpu_mem_info;
-	cgs_gmap_kmem_t gmap_kmem;
-	cgs_gunmap_kmem_t gunmap_kmem;
 	cgs_alloc_gpu_mem_t alloc_gpu_mem;
 	cgs_free_gpu_mem_t free_gpu_mem;
 	cgs_gmap_gpu_mem_t gmap_gpu_mem;
@@ -641,27 +419,12 @@ struct cgs_ops {
 	cgs_write_register_t write_register;
 	cgs_read_ind_register_t read_ind_register;
 	cgs_write_ind_register_t write_ind_register;
-	/* PCI configuration space access */
-	cgs_read_pci_config_byte_t read_pci_config_byte;
-	cgs_read_pci_config_word_t read_pci_config_word;
-	cgs_read_pci_config_dword_t read_pci_config_dword;
-	cgs_write_pci_config_byte_t write_pci_config_byte;
-	cgs_write_pci_config_word_t write_pci_config_word;
-	cgs_write_pci_config_dword_t write_pci_config_dword;
 	/* PCI resources */
 	cgs_get_pci_resource_t get_pci_resource;
 	/* ATOM BIOS */
 	cgs_atom_get_data_table_t atom_get_data_table;
 	cgs_atom_get_cmd_table_revs_t atom_get_cmd_table_revs;
 	cgs_atom_exec_cmd_table_t atom_exec_cmd_table;
-	/* Power management */
-	cgs_create_pm_request_t create_pm_request;
-	cgs_destroy_pm_request_t destroy_pm_request;
-	cgs_set_pm_request_t set_pm_request;
-	cgs_pm_request_clock_t pm_request_clock;
-	cgs_pm_request_engine_t pm_request_engine;
-	cgs_pm_query_clock_limits_t pm_query_clock_limits;
-	cgs_set_camera_voltages_t set_camera_voltages;
 	/* Firmware Info */
 	cgs_get_firmware_info get_firmware_info;
 	cgs_rel_firmware rel_firmware;
@@ -696,12 +459,6 @@ struct cgs_device
 #define CGS_OS_CALL(func,dev,...) \
 	(((struct cgs_device *)dev)->os_ops->func(dev, ##__VA_ARGS__))
 
-#define cgs_gpu_mem_info(dev,type,mc_start,mc_size,mem_size)		\
-	CGS_CALL(gpu_mem_info,dev,type,mc_start,mc_size,mem_size)
-#define cgs_gmap_kmem(dev,kmem,size,min_off,max_off,kmem_handle,mcaddr)	\
-	CGS_CALL(gmap_kmem,dev,kmem,size,min_off,max_off,kmem_handle,mcaddr)
-#define cgs_gunmap_kmem(dev,kmem_handle)	\
-	CGS_CALL(gunmap_kmem,dev,keme_handle)
 #define cgs_alloc_gpu_mem(dev,type,size,align,min_off,max_off,handle)	\
 	CGS_CALL(alloc_gpu_mem,dev,type,size,align,min_off,max_off,handle)
 #define cgs_free_gpu_mem(dev,handle)		\
@@ -724,19 +481,6 @@ struct cgs_device
 #define cgs_write_ind_register(dev,space,index,value)		\
 	CGS_CALL(write_ind_register,dev,space,index,value)
 
-#define cgs_read_pci_config_byte(dev,addr)	\
-	CGS_CALL(read_pci_config_byte,dev,addr)
-#define cgs_read_pci_config_word(dev,addr)	\
-	CGS_CALL(read_pci_config_word,dev,addr)
-#define cgs_read_pci_config_dword(dev,addr)		\
-	CGS_CALL(read_pci_config_dword,dev,addr)
-#define cgs_write_pci_config_byte(dev,addr,value)	\
-	CGS_CALL(write_pci_config_byte,dev,addr,value)
-#define cgs_write_pci_config_word(dev,addr,value)	\
-	CGS_CALL(write_pci_config_word,dev,addr,value)
-#define cgs_write_pci_config_dword(dev,addr,value)	\
-	CGS_CALL(write_pci_config_dword,dev,addr,value)
-
 #define cgs_atom_get_data_table(dev,table,size,frev,crev)	\
 	CGS_CALL(atom_get_data_table,dev,table,size,frev,crev)
 #define cgs_atom_get_cmd_table_revs(dev,table,frev,crev)	\
@@ -744,20 +488,6 @@ struct cgs_device
 #define cgs_atom_exec_cmd_table(dev,table,args)		\
 	CGS_CALL(atom_exec_cmd_table,dev,table,args)
 
-#define cgs_create_pm_request(dev,request)	\
-	CGS_CALL(create_pm_request,dev,request)
-#define cgs_destroy_pm_request(dev,request)		\
-	CGS_CALL(destroy_pm_request,dev,request)
-#define cgs_set_pm_request(dev,request,active)		\
-	CGS_CALL(set_pm_request,dev,request,active)
-#define cgs_pm_request_clock(dev,request,clock,freq)		\
-	CGS_CALL(pm_request_clock,dev,request,clock,freq)
-#define cgs_pm_request_engine(dev,request,engine,powered)	\
-	CGS_CALL(pm_request_engine,dev,request,engine,powered)
-#define cgs_pm_query_clock_limits(dev,clock,limits)		\
-	CGS_CALL(pm_query_clock_limits,dev,clock,limits)
-#define cgs_set_camera_voltages(dev,mask,voltages)	\
-	CGS_CALL(set_camera_voltages,dev,mask,voltages)
 #define cgs_get_firmware_info(dev, type, info)	\
 	CGS_CALL(get_firmware_info, dev, type, info)
 #define cgs_rel_firmware(dev, type)	\
-- 
cgit v1.2.3


From 408bfe7c3c5d036947b509356f494dc6b46025ff Mon Sep 17 00:00:00 2001
From: Junwei Zhang <Jerry.Zhang@amd.com>
Date: Thu, 27 Apr 2017 11:12:07 +0800
Subject: drm/amdgpu: export more gpu info for gfx9

v2: 64-bit aligned for gpu info
v3: squash in wave_front_fix

Signed-off-by: Ken Wang <Qingqing.Wang@amd.com>
Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Qiang Yu <Qiang.Yu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 11 +++++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  3 +++
 include/uapi/drm/amdgpu_drm.h           | 19 +++++++++++++++++++
 4 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a4fc54c70f30..c9f935710d40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -967,6 +967,9 @@ struct amdgpu_gfx_config {
 	unsigned mc_arb_ramcfg;
 	unsigned gb_addr_config;
 	unsigned num_rbs;
+	unsigned gs_vgt_table_depth;
+	unsigned gs_prim_buffer_depth;
+	unsigned max_gs_waves_per_vgt;
 
 	uint32_t tile_mode_array[32];
 	uint32_t macrotile_mode_array[16];
@@ -981,6 +984,7 @@ struct amdgpu_gfx_config {
 struct amdgpu_cu_info {
 	uint32_t number; /* total active CU number */
 	uint32_t ao_cu_mask;
+	uint32_t wave_front_size;
 	uint32_t bitmap[4][4];
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 832be632478f..21f616cdb279 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -546,10 +546,21 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 
 		if (amdgpu_ngg) {
 			dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr;
+			dev_info.prim_buf_size = adev->gfx.ngg.buf[PRIM].size;
 			dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr;
+			dev_info.pos_buf_size = adev->gfx.ngg.buf[POS].size;
 			dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr;
+			dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[CNTL].size;
 			dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr;
+			dev_info.param_buf_size = adev->gfx.ngg.buf[PARAM].size;
 		}
+		dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size;
+		dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs;
+		dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
+		dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches;
+		dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth;
+		dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth;
+		dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_waves_per_vgt;
 
 		return copy_to_user(out, &dev_info,
 				    min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9a9cb90e2f5f..210d21c085f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -785,6 +785,9 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		adev->gfx.config.gs_vgt_table_depth = 32;
+		adev->gfx.config.gs_prim_buffer_depth = 1792;
+		adev->gfx.config.max_gs_waves_per_vgt = 32;
 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	default:
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 95260e5043af..6c249e5cfb09 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -767,6 +767,25 @@ struct drm_amdgpu_info_device {
 	__u64 cntl_sb_buf_gpu_addr;
 	/* NGG Parameter Cache */
 	__u64 param_buf_gpu_addr;
+	__u32 prim_buf_size;
+	__u32 pos_buf_size;
+	__u32 cntl_sb_buf_size;
+	__u32 param_buf_size;
+	/* wavefront size*/
+	__u32 wave_front_size;
+	/* shader visible vgprs*/
+	__u32 num_shader_visible_vgprs;
+	/* CU per shader array*/
+	__u32 num_cu_per_sh;
+	/* number of tcc blocks*/
+	__u32 num_tcc_blocks;
+	/* gs vgt table depth*/
+	__u32 gs_vgt_table_depth;
+	/* gs primitive buffer depth*/
+	__u32 gs_prim_buffer_depth;
+	/* max gs wavefront per vgt*/
+	__u32 max_gs_waves_per_vgt;
+	__u32 _pad1;
 };
 
 struct drm_amdgpu_info_hw_ip {
-- 
cgit v1.2.3


From 44eb8c1b33619abc35cba0266ff497c843bd108d Mon Sep 17 00:00:00 2001
From: Junwei Zhang <Jerry.Zhang@amd.com>
Date: Thu, 27 Apr 2017 16:27:43 +0800
Subject: drm/amdgpu: bump version for exporting gpu info for gfx9

Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index bb47fb3ce341..eb1345627501 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -64,9 +64,10 @@
  * - 3.12.0 - Add query for double offchip LDS buffers
  * - 3.13.0 - Add PRT support
  * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
+ * - 3.15.0 - Export more gpu info for gfx9
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	14
+#define KMS_DRIVER_MINOR	15
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
-- 
cgit v1.2.3


From 10e709cb296c98424c03408d23e3addeddcd4088 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Thu, 27 Apr 2017 15:13:52 +0800
Subject: drm/amdgpu: fix deadlock of reservation between cs and gpu reset v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

the case could happen when gpu reset:
1. when gpu reset, cs can be continue until sw queue is full, then push job will wait with holding pd reservation.
2. gpu_reset routine will also need pd reservation to restore page table from their shadow.
3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs releases reservation.

v2: handle amdgpu_cs_submit error path.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ec71b9320561..4e6b9501ab0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1074,6 +1074,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	job->uf_sequence = cs->out.handle;
 	amdgpu_job_free_resources(job);
+	amdgpu_cs_parser_fini(p, 0, true);
 
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
@@ -1129,7 +1130,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		goto out;
 
 	r = amdgpu_cs_submit(&parser, cs);
+	if (r)
+		goto out;
 
+	return 0;
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
 	return r;
-- 
cgit v1.2.3


From 1dfc41d44c20dc3efbd0139d7f991d13c6daa875 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 27 Apr 2017 15:46:35 +0800
Subject: drm/amd/powerplay: add disable_smc_ctf callback in hwmgr.

export disablesmcctf to eventmgr.
need to disable temperature alert when s3/s4.
otherwise, when resume back,enable temperature
alert will fail.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | 10 ++++++++++
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c      |  1 +
 drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h   |  2 +-
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h             |  1 +
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 23bba2c8b18e..fcc722ea7649 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -501,3 +501,13 @@ int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_i
 
 	return hwmgr->hwmgr_func->get_max_high_clocks(hwmgr, clocks);
 }
+
+int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (hwmgr->hwmgr_func->disable_smc_firmware_ctf == NULL)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->disable_smc_firmware_ctf(hwmgr);
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 8f663ab56a80..4ad84530c88a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -4695,6 +4695,7 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
 	.release_firmware = smu7_release_firmware,
 	.set_power_profile_state = smu7_set_power_profile_state,
 	.avfs_control = smu7_avfs_control,
+	.disable_smc_firmware_ctf = smu7_thermal_disable_alert,
 };
 
 uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock,
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 5345b50761f4..a1ebe1014492 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -431,6 +431,6 @@ extern int phm_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 		struct pp_display_clock_request *clock);
 
 extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
-
+extern int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr);
 #endif /* _HARDWARE_MANAGER_H_ */
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 320225dd3328..98d38c31f6a6 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -373,6 +373,7 @@ struct pp_hwmgr_func {
 	int (*set_power_profile_state)(struct pp_hwmgr *hwmgr,
 			struct amd_pp_profile *request);
 	int (*avfs_control)(struct pp_hwmgr *hwmgr, bool enable);
+	int (*disable_smc_firmware_ctf)(struct pp_hwmgr *hwmgr);
 };
 
 struct pp_table_func {
-- 
cgit v1.2.3


From f8dc9476d9fe25b982e642a733967d7b5fbe5ae3 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 28 Apr 2017 12:54:26 +0800
Subject: drm/amd/powerplay: complete disable_smc_firmware_ctf_tasks.

Disable ctf in eventmgr to fix S3/S4 support.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c | 2 +-
 drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c     | 5 +++++
 drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h     | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c
index 9ef2d90e2886..b82c43af59ab 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c
@@ -219,7 +219,7 @@ const pem_event_action notify_smu_suspend_tasks[] = {
 };
 
 const pem_event_action disable_smc_firmware_ctf_tasks[] = {
-	/* PEM_Task_DisableSMCFirmwareCTF,*/
+	pem_task_disable_smc_firmware_ctf,
 	NULL
 };
 
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
index e04216ec7ee1..8c4ebaae1e0c 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
@@ -173,6 +173,11 @@ int pem_task_stop_asic_block_usage(struct pp_eventmgr *eventmgr, struct pem_even
 	return 0;
 }
 
+int pem_task_disable_smc_firmware_ctf(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
+{
+	return phm_disable_smc_firmware_ctf(eventmgr->hwmgr);
+}
+
 int pem_task_setup_asic(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
 {
 	return phm_setup_asic(eventmgr->hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h
index 6c6297e3b598..37e7ca5a58e0 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h
@@ -84,5 +84,6 @@ int pem_task_update_allowed_performance_levels(struct pp_eventmgr *eventmgr, str
 /*thermal */
 int pem_task_initialize_thermal_controller(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data);
 int pem_task_uninitialize_thermal_controller(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data);
+int pem_task_disable_smc_firmware_ctf(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data);
 
 #endif /* _EVENT_TASKS_H_ */
-- 
cgit v1.2.3


From 8b9242eddd51f17b8306d6c96172fd68ef1106c6 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 27 Apr 2017 15:48:56 +0800
Subject: drm/amd/powerplay: implement stop dpm task for vega10.

Add functions to disable dpm for S3/S4.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 97 ++++++++++++++++++++++
 .../gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c | 23 +++++
 .../gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h |  1 +
 .../gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c   |  2 +-
 .../gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h   |  1 +
 5 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 5e3e89be9fb6..68eae529df50 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -2420,6 +2420,26 @@ static int vega10_enable_thermal_protection(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int vega10_disable_thermal_protection(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
+
+	if (data->smu_features[GNLD_THERMAL].supported) {
+		if (!data->smu_features[GNLD_THERMAL].enabled)
+			pr_info("THERMAL Feature Already disabled!");
+
+		PP_ASSERT_WITH_CODE(
+				!vega10_enable_smc_features(hwmgr->smumgr,
+				false,
+				data->smu_features[GNLD_THERMAL].smu_feature_bitmap),
+				"disable THERMAL Feature Failed!",
+				return -1);
+		data->smu_features[GNLD_THERMAL].enabled = false;
+	}
+
+	return 0;
+}
+
 static int vega10_enable_vrhot_feature(struct pp_hwmgr *hwmgr)
 {
 	struct vega10_hwmgr *data =
@@ -2498,6 +2518,37 @@ static int vega10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int vega10_stop_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap)
+{
+	struct vega10_hwmgr *data =
+			(struct vega10_hwmgr *)(hwmgr->backend);
+	uint32_t i, feature_mask = 0;
+
+
+	if(data->smu_features[GNLD_LED_DISPLAY].supported == true){
+		PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+				true, data->smu_features[GNLD_LED_DISPLAY].smu_feature_bitmap),
+		"Attempt to Enable LED DPM feature Failed!", return -EINVAL);
+		data->smu_features[GNLD_LED_DISPLAY].enabled = true;
+	}
+
+	for (i = 0; i < GNLD_DPM_MAX; i++) {
+		if (data->smu_features[i].smu_feature_bitmap & bitmap) {
+			if (data->smu_features[i].supported) {
+				if (data->smu_features[i].enabled) {
+					feature_mask |= data->smu_features[i].
+							smu_feature_bitmap;
+					data->smu_features[i].enabled = false;
+				}
+			}
+		}
+	}
+
+	vega10_enable_smc_features(hwmgr->smumgr, false, feature_mask);
+
+	return 0;
+}
+
 /**
  * @brief Tell SMC to enabled the supported DPMs.
  *
@@ -4356,11 +4407,55 @@ vega10_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmg
 	return is_update_required;
 }
 
+static int vega10_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
+{
+	int tmp_result, result = 0;
+
+	tmp_result = (vega10_is_dpm_running(hwmgr)) ? 0 : -1;
+	PP_ASSERT_WITH_CODE(tmp_result == 0,
+			"DPM is not running right now, no need to disable DPM!",
+			return 0);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_ThermalController))
+		vega10_disable_thermal_protection(hwmgr);
+
+	tmp_result = vega10_disable_power_containment(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable power containment!", result = tmp_result);
+
+	tmp_result = vega10_avfs_enable(hwmgr, false);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable AVFS!", result = tmp_result);
+
+	tmp_result = vega10_stop_dpm(hwmgr, SMC_DPM_FEATURES);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to stop DPM!", result = tmp_result);
+
+	return result;
+}
+
+static int vega10_power_off_asic(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
+	int result;
+
+	result = vega10_disable_dpm_tasks(hwmgr);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"[disable_dpm_tasks] Failed to disable DPM!",
+			);
+	data->water_marks_bitmap &= ~(WaterMarksLoaded);
+
+	return result;
+}
+
+
 static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 	.backend_init = vega10_hwmgr_backend_init,
 	.backend_fini = vega10_hwmgr_backend_fini,
 	.asic_setup = vega10_setup_asic_task,
 	.dynamic_state_management_enable = vega10_enable_dpm_tasks,
+	.dynamic_state_management_disable = vega10_disable_dpm_tasks,
 	.get_num_of_pp_table_entries =
 			vega10_get_number_of_powerplay_table_entries,
 	.get_power_state_size = vega10_get_power_state_size,
@@ -4400,6 +4495,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 	.check_states_equal = vega10_check_states_equal,
 	.check_smc_update_required_for_display_configuration =
 			vega10_check_smc_update_required_for_display_configuration,
+	.power_off_asic = vega10_power_off_asic,
+	.disable_smc_firmware_ctf = vega10_thermal_disable_alert,
 };
 
 int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
index f1e244cd2370..692f752fbb27 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
@@ -113,6 +113,29 @@ int vega10_enable_power_containment(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int vega10_disable_power_containment(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data =
+			(struct vega10_hwmgr *)(hwmgr->backend);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_PowerContainment)) {
+		if (data->smu_features[GNLD_PPT].supported)
+			PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+					false, data->smu_features[GNLD_PPT].smu_feature_bitmap),
+					"Attempt to disable PPT feature Failed!",
+					data->smu_features[GNLD_PPT].supported = false);
+
+		if (data->smu_features[GNLD_TDC].supported)
+			PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+					false, data->smu_features[GNLD_TDC].smu_feature_bitmap),
+					"Attempt to disable PPT feature Failed!",
+					data->smu_features[GNLD_TDC].supported = false);
+	}
+
+	return 0;
+}
+
 static int vega10_set_overdrive_target_percentage(struct pp_hwmgr *hwmgr,
 		uint32_t adjust_percent)
 {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h
index d9662bf4a4b4..9ecaa27c0bb5 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h
@@ -60,6 +60,7 @@ int vega10_enable_smc_cac(struct pp_hwmgr *hwmgr);
 int vega10_enable_power_containment(struct pp_hwmgr *hwmgr);
 int vega10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n);
 int vega10_power_control_set_level(struct pp_hwmgr *hwmgr);
+int vega10_disable_power_containment(struct pp_hwmgr *hwmgr);
 
 #endif  /* _VEGA10_POWERTUNE_H_ */
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
index f4d77b62e1ba..7062ec8cc4ac 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
@@ -501,7 +501,7 @@ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr)
 * Disable thermal alerts on the RV770 thermal controller.
 * @param    hwmgr The address of the hardware manager.
 */
-static int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr)
+int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr)
 {
 	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h
index 8036808ec421..70c1d22b3d7d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h
@@ -78,6 +78,7 @@ extern int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr,
 		uint32_t *speed);
 extern int vega10_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr);
 extern uint32_t smu7_get_xclk(struct pp_hwmgr *hwmgr);
+extern int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr);
 
 #endif
 
-- 
cgit v1.2.3


From c81a1a74037f61c578f6b8218b079aa434e300b4 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <michel.daenzer@amd.com>
Date: Fri, 28 Apr 2017 17:28:14 +0900
Subject: drm/amdgpu: Make amdgpu_bo_reserve use uninterruptible waits for
 cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some of these paths probably cannot be interrupted by a signal anyway.
Those that can would fail to clean up things if they actually got
interrupted.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c      |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c      |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c      |  6 +++---
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c      |  6 +++---
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c       |  6 +++---
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c       |  6 +++---
 drivers/gpu/drm/amd/amdgpu/dce_virtual.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c       |  6 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c       | 10 +++++-----
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c       |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c       |  8 ++++----
 16 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index d46773b8f7e8..21125a9452b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -123,7 +123,7 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
 	int r;
 
 	/* unpin of the old buffer */
-	r = amdgpu_bo_reserve(work->old_abo, false);
+	r = amdgpu_bo_reserve(work->old_abo, true);
 	if (likely(r == 0)) {
 		r = amdgpu_bo_unpin(work->old_abo);
 		if (unlikely(r != 0)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index a48142d930c6..236d9950221b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -112,7 +112,7 @@ static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj)
 	struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
 	int ret;
 
-	ret = amdgpu_bo_reserve(abo, false);
+	ret = amdgpu_bo_reserve(abo, true);
 	if (likely(ret == 0)) {
 		amdgpu_bo_kunmap(abo);
 		amdgpu_bo_unpin(abo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 6d691abe889c..e7406ce7093c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -183,7 +183,7 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
 	if (adev->gart.robj == NULL) {
 		return;
 	}
-	r = amdgpu_bo_reserve(adev->gart.robj, false);
+	r = amdgpu_bo_reserve(adev->gart.robj, true);
 	if (likely(r == 0)) {
 		amdgpu_bo_kunmap(adev->gart.robj);
 		amdgpu_bo_unpin(adev->gart.robj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 21f616cdb279..4f2b6679f72f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -821,7 +821,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 
 	if (amdgpu_sriov_vf(adev)) {
 		/* TODO: how to handle reserve failure */
-		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
+		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
 		amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va);
 		fpriv->vm.csa_bo_va = NULL;
 		amdgpu_bo_unreserve(adev->virt.csa_obj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 3826d5aea0a6..6bdc866570ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -113,7 +113,7 @@ void amdgpu_gem_prime_unpin(struct drm_gem_object *obj)
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	int ret = 0;
 
-	ret = amdgpu_bo_reserve(bo, false);
+	ret = amdgpu_bo_reserve(bo, true);
 	if (unlikely(ret != 0))
 		return;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index de9f919ae336..5ca75a456ad2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -130,7 +130,7 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
-	r = amdgpu_bo_reserve(sa_manager->bo, false);
+	r = amdgpu_bo_reserve(sa_manager->bo, true);
 	if (!r) {
 		amdgpu_bo_kunmap(sa_manager->bo);
 		amdgpu_bo_unpin(sa_manager->bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 24ca251f82ca..7bb1bf76319a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1198,7 +1198,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 		return;
 	amdgpu_ttm_debugfs_fini(adev);
 	if (adev->stollen_vga_memory) {
-		r = amdgpu_bo_reserve(adev->stollen_vga_memory, false);
+		r = amdgpu_bo_reserve(adev->stollen_vga_memory, true);
 		if (r == 0) {
 			amdgpu_bo_unpin(adev->stollen_vga_memory);
 			amdgpu_bo_unreserve(adev->stollen_vga_memory);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index ba98d35340a3..0cdeb6a2e4a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2230,7 +2230,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
 		amdgpu_bo_unpin(abo);
@@ -2589,7 +2589,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
 unpin:
 	if (amdgpu_crtc->cursor_bo) {
 		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-		ret = amdgpu_bo_reserve(aobj, false);
+		ret = amdgpu_bo_reserve(aobj, true);
 		if (likely(ret == 0)) {
 			amdgpu_bo_unpin(aobj);
 			amdgpu_bo_unreserve(aobj);
@@ -2720,7 +2720,7 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc)
 
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 		else {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index e59bc42df18c..773654a19749 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2214,7 +2214,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
 		amdgpu_bo_unpin(abo);
@@ -2609,7 +2609,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
 unpin:
 	if (amdgpu_crtc->cursor_bo) {
 		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-		ret = amdgpu_bo_reserve(aobj, false);
+		ret = amdgpu_bo_reserve(aobj, true);
 		if (likely(ret == 0)) {
 			amdgpu_bo_unpin(aobj);
 			amdgpu_bo_unreserve(aobj);
@@ -2740,7 +2740,7 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
 
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 		else {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index e146d252aa30..1f3552967ba3 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -1645,7 +1645,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
 		amdgpu_bo_unpin(abo);
@@ -1962,7 +1962,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
 unpin:
 	if (amdgpu_crtc->cursor_bo) {
 		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-		ret = amdgpu_bo_reserve(aobj, false);
+		ret = amdgpu_bo_reserve(aobj, true);
 		if (likely(ret == 0)) {
 			amdgpu_bo_unpin(aobj);
 			amdgpu_bo_unreserve(aobj);
@@ -2088,7 +2088,7 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc)
 
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 		else {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 6df7a28e8aac..3c558c170e5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2089,7 +2089,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
 		amdgpu_bo_unpin(abo);
@@ -2440,7 +2440,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
 unpin:
 	if (amdgpu_crtc->cursor_bo) {
 		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-		ret = amdgpu_bo_reserve(aobj, false);
+		ret = amdgpu_bo_reserve(aobj, true);
 		if (likely(ret == 0)) {
 			amdgpu_bo_unpin(aobj);
 			amdgpu_bo_unreserve(aobj);
@@ -2571,7 +2571,7 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc)
 
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 		else {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 81a24b6b4846..f1b479b6ac98 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -248,7 +248,7 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
 
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
-		r = amdgpu_bo_reserve(abo, false);
+		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 		else {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 6a429e927297..a125f9d44577 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -2437,7 +2437,7 @@ static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->gfx.rlc.save_restore_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj);
@@ -2448,7 +2448,7 @@ static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
 	}
 
 	if (adev->gfx.rlc.clear_state_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
@@ -2459,7 +2459,7 @@ static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
 	}
 
 	if (adev->gfx.rlc.cp_table_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index eca022eaff44..ee2f2139e2eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2792,7 +2792,7 @@ static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 
 		if (ring->mqd_obj) {
-			r = amdgpu_bo_reserve(ring->mqd_obj, false);
+			r = amdgpu_bo_reserve(ring->mqd_obj, true);
 			if (unlikely(r != 0))
 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
 
@@ -2810,7 +2810,7 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->gfx.mec.hpd_eop_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
@@ -3359,7 +3359,7 @@ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
 
 	/* save restore block */
 	if (adev->gfx.rlc.save_restore_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj);
@@ -3371,7 +3371,7 @@ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
 
 	/* clear state block */
 	if (adev->gfx.rlc.clear_state_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
@@ -3383,7 +3383,7 @@ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
 
 	/* clear state block */
 	if (adev->gfx.rlc.cp_table_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 0115c5f65c01..703bd55d6075 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1239,7 +1239,7 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 
 	/* clear state block */
 	if (adev->gfx.rlc.clear_state_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
@@ -1250,7 +1250,7 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 
 	/* jump table block */
 	if (adev->gfx.rlc.cp_table_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
@@ -1363,7 +1363,7 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->gfx.mec.hpd_eop_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
@@ -1490,7 +1490,7 @@ static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
 
 	memset(hpd, 0, MEC_HPD_SIZE);
 
-	r = amdgpu_bo_reserve(kiq->eop_obj, false);
+	r = amdgpu_bo_reserve(kiq->eop_obj, true);
 	if (unlikely(r != 0))
 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
 	amdgpu_bo_kunmap(kiq->eop_obj);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 210d21c085f2..4073fd24dd13 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -453,7 +453,7 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->gfx.mec.hpd_eop_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
@@ -463,7 +463,7 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 		adev->gfx.mec.hpd_eop_obj = NULL;
 	}
 	if (adev->gfx.mec.mec_fw_obj) {
-		r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false);
+		r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, true);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj);
@@ -599,7 +599,7 @@ static int gfx_v9_0_kiq_init(struct amdgpu_device *adev)
 
 	memset(hpd, 0, MEC_HPD_SIZE);
 
-	r = amdgpu_bo_reserve(kiq->eop_obj, false);
+	r = amdgpu_bo_reserve(kiq->eop_obj, true);
 	if (unlikely(r != 0))
 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
 	amdgpu_bo_kunmap(kiq->eop_obj);
@@ -1786,7 +1786,7 @@ static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev)
 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 
 		if (ring->mqd_obj) {
-			r = amdgpu_bo_reserve(ring->mqd_obj, false);
+			r = amdgpu_bo_reserve(ring->mqd_obj, true);
 			if (unlikely(r != 0))
 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
 
-- 
cgit v1.2.3


From 8e12188400668046870087fc278404a11c853061 Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Fri, 28 Apr 2017 01:16:15 +0800
Subject: iommu/vt-d: Don't print the failure message when booting non-kdump
 kernel

When booting a new non-kdump kernel, we have below failure message:

[    0.004000] DMAR-IR: IRQ remapping was enabled on dmar2 but we are not in kdump mode
[    0.004000] DMAR-IR: Failed to copy IR table for dmar2 from previous kernel
[    0.004000] DMAR-IR: IRQ remapping was enabled on dmar1 but we are not in kdump mode
[    0.004000] DMAR-IR: Failed to copy IR table for dmar1 from previous kernel
[    0.004000] DMAR-IR: IRQ remapping was enabled on dmar0 but we are not in kdump mode
[    0.004000] DMAR-IR: Failed to copy IR table for dmar0 from previous kernel
[    0.004000] DMAR-IR: IRQ remapping was enabled on dmar3 but we are not in kdump mode
[    0.004000] DMAR-IR: Failed to copy IR table for dmar3 from previous kernel

For non-kdump case, we no need to copy IR table from previous kernel
so it's nonthing actually failed. To be less alarming or misleading,
do not print "DMAR-IR: Failed to copy IR table for dmar[0-9] from
previous kernel" messages when booting non-kdump kernel.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel_irq_remapping.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index ac596928f6b4..a190cbd76ef7 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -408,14 +408,6 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
 	size_t size;
 	u64 irta;
 
-	if (!is_kdump_kernel()) {
-		pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n",
-			iommu->name);
-		clear_ir_pre_enabled(iommu);
-		iommu_disable_irq_remapping(iommu);
-		return -EINVAL;
-	}
-
 	/* Check whether the old ir-table has the same size as ours */
 	irta = dmar_readq(iommu->reg + DMAR_IRTA_REG);
 	if ((irta & INTR_REMAP_TABLE_REG_SIZE_MASK)
@@ -567,7 +559,12 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 	init_ir_status(iommu);
 
 	if (ir_pre_enabled(iommu)) {
-		if (iommu_load_old_irte(iommu))
+		if (!is_kdump_kernel()) {
+			pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n",
+				iommu->name);
+			clear_ir_pre_enabled(iommu);
+			iommu_disable_irq_remapping(iommu);
+		} else if (iommu_load_old_irte(iommu))
 			pr_err("Failed to copy IR table for %s from previous kernel\n",
 			       iommu->name);
 		else
-- 
cgit v1.2.3


From 4486740da2e1cb307f58e0e62f94c92ec7c101a1 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@intel.com>
Date: Sun, 23 Apr 2017 07:15:20 -0700
Subject: powercap: intel_rapl: Add support for Gemini Lake

Gemini Lake RAPL support is similar to Goldmont.

Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Acked-by: Pan, Jacob jun <jacob.jun.pan@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 9a25110c4a46..9ddad0815ba9 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1164,6 +1164,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
 	RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD,	rapl_defaults_tng),
 	RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD,	rapl_defaults_ann),
 	RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE,	rapl_defaults_core),
 	RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON,	rapl_defaults_core),
 
 	RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL,	rapl_defaults_hsw_server),
-- 
cgit v1.2.3


From d49f2dedf33b8e7752ec66ac2b3b5bf4d7210943 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Fri, 28 Apr 2017 16:59:49 +0100
Subject: ACPI/IORT: Fix CONFIG_IOMMU_API dependency

The IOMMU probe deferral IORT rework had to add code in
iort_iommu_configure() and iort_iommu_xlate() that requires
the IOMMU_API to be selected in order to compile and work.

Stub out the pieces of code that depend on CONFIG_IOMMU_API
to be selected to prevent compilation failures such as:

drivers/acpi/arm64/iort.c: In function 'iort_iommu_xlate':
drivers/acpi/arm64/iort.c:647:22: error: 'struct iommu_fwspec' has no
member named 'ops'

by wrapping the code in static inline functions that provide a NOP
implementation when CONFIG_IOMMU_API is not selected.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/acpi/arm64/iort.c | 44 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index e7b1940ff13b..a629e83bff24 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -536,6 +536,33 @@ static inline bool iort_iommu_driver_enabled(u8 type)
 	}
 }
 
+#ifdef CONFIG_IOMMU_API
+static inline
+const struct iommu_ops *iort_fwspec_iommu_ops(struct iommu_fwspec *fwspec)
+{
+	return (fwspec && fwspec->ops) ? fwspec->ops : NULL;
+}
+
+static inline
+int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
+{
+	int err = 0;
+
+	if (!IS_ERR_OR_NULL(ops) && ops->add_device && dev->bus &&
+	    !dev->iommu_group)
+		err = ops->add_device(dev);
+
+	return err;
+}
+#else
+static inline
+const struct iommu_ops *iort_fwspec_iommu_ops(struct iommu_fwspec *fwspec)
+{ return NULL; }
+static inline
+int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
+{ return 0; }
+#endif
+
 static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
 					struct acpi_iort_node *node,
 					u32 streamid)
@@ -543,14 +570,14 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
 	const struct iommu_ops *ops = NULL;
 	int ret = -ENODEV;
 	struct fwnode_handle *iort_fwnode;
-	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
 
 	/*
 	 * If we already translated the fwspec there
 	 * is nothing left to do, return the iommu_ops.
 	 */
-	if (fwspec && fwspec->ops)
-		return fwspec->ops;
+	ops = iort_fwspec_iommu_ops(dev->iommu_fwspec);
+	if (ops)
+		return ops;
 
 	if (node) {
 		iort_fwnode = iort_get_fwnode(node);
@@ -611,6 +638,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 	struct acpi_iort_node *node, *parent;
 	const struct iommu_ops *ops = NULL;
 	u32 streamid = 0;
+	int err;
 
 	if (dev_is_pci(dev)) {
 		struct pci_bus *bus = to_pci_dev(dev)->bus;
@@ -654,13 +682,9 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 	 * If we have reason to believe the IOMMU driver missed the initial
 	 * add_device callback for dev, replay it to get things in order.
 	 */
-	if (!IS_ERR_OR_NULL(ops) && ops->add_device &&
-	    dev->bus && !dev->iommu_group) {
-		int err = ops->add_device(dev);
-
-		if (err)
-			ops = ERR_PTR(err);
-	}
+	err = iort_add_device_replay(ops, dev);
+	if (err)
+		ops = ERR_PTR(err);
 
 	return ops;
 }
-- 
cgit v1.2.3


From 26b37b946a5c2658dbc37dd5d6df40aaa9685d70 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Fri, 15 May 2015 02:00:02 +0300
Subject: arm: dma-mapping: Don't override dma_ops in arch_setup_dma_ops()

The arch_setup_dma_ops() function is in charge of setting dma_ops with a
call to set_dma_ops(). set_dma_ops() is also called from

- highbank and mvebu bus notifiers
- dmabounce (to be replaced with swiotlb)
- arm_iommu_attach_device

(arm_iommu_attach_device is itself called from IOMMU and bus master
device drivers)

To allow the arch_setup_dma_ops() call to be moved from device add time
to device probe time we must ensure that dma_ops already setup by any of
the above callers will not be overriden.

Aftering replacing dmabounce with swiotlb, converting IOMMU drivers to
of_xlate and taking care of highbank and mvebu, the workaround should be
removed.

Signed-off-by: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Tested-by: Ralph Sennhauser <ralph.sennhauser@gmail.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/arm/mm/dma-mapping.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 63eabb06f9f1..ff131928be50 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2390,6 +2390,15 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	const struct dma_map_ops *dma_ops;
 
 	dev->archdata.dma_coherent = coherent;
+
+	/*
+	 * Don't override the dma_ops if they have already been set. Ideally
+	 * this should be the only location where dma_ops are set, remove this
+	 * check when all other callers of set_dma_ops will have disappeared.
+	 */
+	if (dev->dma_ops)
+		return;
+
 	if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu))
 		dma_ops = arm_get_iommu_dma_map_ops(coherent);
 	else
-- 
cgit v1.2.3


From 461a6946b1f93f6720577fb06aa78e8cbd9291c9 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 26 Apr 2017 15:46:20 +0200
Subject: iommu: Remove pci.h include from trace/events/iommu.h

The include file does not need any PCI specifics, so remove
that include. Also fix the places that relied on it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/arm64/mm/dma-mapping.c       | 1 +
 drivers/infiniband/hw/qedr/main.c | 1 +
 drivers/iommu/fsl_pamu.h          | 1 +
 drivers/iommu/rockchip-iommu.c    | 1 +
 drivers/iommu/tegra-smmu.c        | 1 +
 include/linux/dma-iommu.h         | 1 +
 include/trace/events/iommu.h      | 1 -
 7 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 81cdb2e844ed..982f85b81624 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -28,6 +28,7 @@
 #include <linux/dma-contiguous.h>
 #include <linux/vmalloc.h>
 #include <linux/swiotlb.h>
+#include <linux/pci.h>
 
 #include <asm/cacheflush.h>
 
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index b9b47e5cc8b3..33033624cd9b 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -35,6 +35,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <linux/netdevice.h>
 #include <linux/iommu.h>
+#include <linux/pci.h>
 #include <net/addrconf.h>
 #include <linux/qed/qede_roce.h>
 #include <linux/qed/qed_chain.h>
diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h
index aab723f91f12..c3434f29c967 100644
--- a/drivers/iommu/fsl_pamu.h
+++ b/drivers/iommu/fsl_pamu.h
@@ -20,6 +20,7 @@
 #define __FSL_PAMU_H
 
 #include <linux/iommu.h>
+#include <linux/pci.h>
 
 #include <asm/fsl_pamu_stash.h>
 
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 9afcbf79f0b0..0ba303a184dd 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -8,6 +8,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/dma-iommu.h>
+#include <linux/dma-mapping.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 9305964250ac..eeb19f560a05 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -15,6 +15,7 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/dma-mapping.h>
 
 #include <soc/tegra/ahb.h>
 #include <soc/tegra/mc.h>
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 5725c94b1f12..abd946569515 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -20,6 +20,7 @@
 #include <asm/errno.h>
 
 #ifdef CONFIG_IOMMU_DMA
+#include <linux/dma-mapping.h>
 #include <linux/iommu.h>
 #include <linux/msi.h>
 
diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h
index 2c7befb10f13..99254ed89212 100644
--- a/include/trace/events/iommu.h
+++ b/include/trace/events/iommu.h
@@ -11,7 +11,6 @@
 #define _TRACE_IOMMU_H
 
 #include <linux/tracepoint.h>
-#include <linux/pci.h>
 
 struct device;
 
-- 
cgit v1.2.3


From 208480bb273e15f42711bd47f70dc0fbfa2570b8 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 26 Apr 2017 15:49:57 +0200
Subject: iommu: Remove trace-events include from iommu.h

It is not needed there anymore. All places needing it are
fixed.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/media/platform/mtk-vpu/mtk_vpu.c | 1 +
 include/linux/iommu.h                    | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c
index 463b69c934be..e011c8dc0198 100644
--- a/drivers/media/platform/mtk-vpu/mtk_vpu.c
+++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c
@@ -23,6 +23,7 @@
 #include <linux/of_reserved_mem.h>
 #include <linux/sched.h>
 #include <linux/sizes.h>
+#include <linux/dma-mapping.h>
 
 #include "mtk_vpu.h"
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index abaa0ca848bc..dda8717545e9 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -26,8 +26,6 @@
 #include <linux/err.h>
 #include <linux/of.h>
 
-#include <trace/events/iommu.h>
-
 #define IOMMU_READ	(1 << 0)
 #define IOMMU_WRITE	(1 << 1)
 #define IOMMU_CACHE	(1 << 2) /* DMA cache coherency */
-- 
cgit v1.2.3


From 37f8aa16da8e9239092429e7a6dadf352897a92d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 29 Apr 2017 00:02:37 -0400
Subject: pNFS/flexfiles: Fix up the ff_layout_write_pagelist failure path

If the attempt to write through pNFS fails, we need to use the same
failure semantics as for the read path: If the FF_FLAGS_NO_IO_THRU_MDS
flag is set or we have sufficient valid DSes, then we must retry through
pNFS

Fixes: d67ae825a59d ("pnfs/flexfiles: Add the FlexFile Layout Driver")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 11 ++++++++---
 fs/nfs/pnfs.c                          | 14 +++++++++++++-
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index f23b63eb356e..4e8bf02a62f9 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1802,16 +1802,16 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 
 	ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
 	if (!ds)
-		return PNFS_NOT_ATTEMPTED;
+		goto out_failed;
 
 	ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
 						   hdr->inode);
 	if (IS_ERR(ds_clnt))
-		return PNFS_NOT_ATTEMPTED;
+		goto out_failed;
 
 	ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
 	if (!ds_cred)
-		return PNFS_NOT_ATTEMPTED;
+		goto out_failed;
 
 	vers = nfs4_ff_layout_ds_version(lseg, idx);
 
@@ -1841,6 +1841,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 			  sync, RPC_TASK_SOFTCONN);
 	put_rpccred(ds_cred);
 	return PNFS_ATTEMPTED;
+
+out_failed:
+	if (ff_layout_avoid_mds_available_ds(lseg))
+		return PNFS_TRY_AGAIN;
+	return PNFS_NOT_ATTEMPTED;
 }
 
 static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e45b3ffeda08..6c2e4c73684c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2292,8 +2292,20 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc,
 	enum pnfs_try_status trypnfs;
 
 	trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
-	if (trypnfs == PNFS_NOT_ATTEMPTED)
+	switch (trypnfs) {
+	case PNFS_NOT_ATTEMPTED:
 		pnfs_write_through_mds(desc, hdr);
+	case PNFS_ATTEMPTED:
+		break;
+	case PNFS_TRY_AGAIN:
+		/* cleanup hdr and prepare to redo pnfs */
+		if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+			struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
+			list_splice_init(&hdr->pages, &mirror->pg_list);
+			mirror->pg_recoalesce = 1;
+		}
+		hdr->mds_ops->rpc_release(hdr);
+	}
 }
 
 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
-- 
cgit v1.2.3


From 722f0b891198ab1f5f7b020e32c989a517ed0577 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 29 Apr 2017 10:27:18 -0400
Subject: pNFS: Don't send COMMITs to the DSes if the server invalidated our
 layout

If the layout was invalidated, then assume we should requeue all the
pending writes for the DS in question.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs_nfs.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index dc4111d08a27..7697ac0ff81a 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -217,6 +217,13 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
 	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
 		if (list_empty(&bucket->committing))
 			continue;
+		/*
+		 * If the layout segment is invalid, then let
+		 * pnfs_generic_retry_commit() clean up the bucket.
+		 */
+		if (!pnfs_is_valid_lseg(bucket->clseg) &&
+		    !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags))
+			continue;
 		data = nfs_commitdata_alloc(false);
 		if (!data)
 			break;
-- 
cgit v1.2.3


From 1f18b82c3437a398562e748c22fcd2f71c2e230c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 29 Apr 2017 10:10:17 -0400
Subject: pNFS: Ensure we commit the layout if it has been invalidated

If the layout is being invalidated on the server, then we must
invoke nfs_commit_inode() to ensure any commits to the DS get
cleared out.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs42proc.c | 1 +
 fs/nfs/nfs4proc.c  | 1 +
 fs/nfs/pnfs.c      | 3 +++
 3 files changed, 5 insertions(+)

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index c81c61971625..87f5b7b971ca 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -379,6 +379,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
 			pnfs_mark_layout_stateid_invalid(lo, &head);
 			spin_unlock(&inode->i_lock);
 			pnfs_free_lseg_list(&head);
+			nfs_commit_inode(inode, 0);
 		} else
 			spin_unlock(&inode->i_lock);
 		break;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c2b82caa9068..158c3d52146f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8330,6 +8330,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 		 */
 		pnfs_mark_layout_stateid_invalid(lo, &head);
 		spin_unlock(&inode->i_lock);
+		nfs_commit_inode(inode, 0);
 		pnfs_free_lseg_list(&head);
 		status = -EAGAIN;
 		goto out;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6c2e4c73684c..140ecd7d350f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -727,6 +727,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
 		spin_unlock(&nfsi->vfs_inode.i_lock);
 		pnfs_free_lseg_list(&tmp_list);
+		nfs_commit_inode(&nfsi->vfs_inode, 0);
 		pnfs_put_layout_hdr(lo);
 	} else
 		spin_unlock(&nfsi->vfs_inode.i_lock);
@@ -1989,6 +1990,8 @@ out_forget:
 	spin_unlock(&ino->i_lock);
 	lseg->pls_layout = lo;
 	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+	if (!pnfs_layout_is_valid(lo))
+		nfs_commit_inode(ino, 0);
 	return ERR_PTR(-EAGAIN);
 }
 
-- 
cgit v1.2.3


From fd615f69a18a9d4aa5ef02a1dc83f319f75da8e7 Mon Sep 17 00:00:00 2001
From: LiuHailong <liu.hailong6@zte.com.cn>
Date: Tue, 7 Feb 2017 10:35:52 +0800
Subject: powerpc/64e: Fix hang when debugging programs with relocated kernel

Debug interrupts can be taken during interrupt entry, since interrupt
entry does not automatically turn them off.  The kernel will check
whether the faulting instruction is between [interrupt_base_book3e,
__end_interrupts], and if so clear MSR[DE] and return.

However, when the kernel is built with CONFIG_RELOCATABLE, it can't use
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) and
LOAD_REG_IMMEDIATE(r15,__end_interrupts), as they ignore relocation.
Thus, if the kernel is actually running at a different address than it
was built at, the address comparison will fail, and the exception entry
code will hang at kernel_dbg_exc.

r2(toc) is also not usable here, as r2 still holds data from the
interrupted context, so LOAD_REG_ADDR() doesn't work either.  So we use
the *name@got* to get the EV of two labels directly.

Test programs test.c shows as follows:
int main(int argc, char *argv[])
{
	if (access("/proc/sys/kernel/perf_event_paranoid", F_OK) == -1)
		printf("Kernel doesn't have perf_event support\n");
}

Steps to reproduce the bug, for example:
 1) ./gdb ./test
 2) (gdb) b access
 3) (gdb) r
 4) (gdb) s

Signed-off-by: Liu Hailong <liu.hailong6@zte.com.cn>
Signed-off-by: Jiang Xuexin <jiang.xuexin@zte.com.cn>
Reviewed-by: Jiang Biao <jiang.biao2@zte.com.cn>
Reviewed-by: Liu Song <liu.song11@zte.com.cn>
Reviewed-by: Huang Jian <huang.jian@zte.com.cn>
[scottwood: cleaned up commit message, and specified bad behavior
 as a hang rather than an oops to correspond to mainline kernel behavior]
Fixes: 1cb6e0649248 ("powerpc/book3e: support CONFIG_RELOCATABLE")
Cc: <stable@vger.kernel.org> # 4.4.x-
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/kernel/exceptions-64e.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 45b453e4d0c8..acd8ca76233e 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -735,8 +735,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	andis.	r15,r14,(DBSR_IC|DBSR_BT)@h
 	beq+	1f
 
+#ifdef CONFIG_RELOCATABLE
+	ld	r15,PACATOC(r13)
+	ld	r14,interrupt_base_book3e@got(r15)
+	ld	r15,__end_interrupts@got(r15)
+#else
 	LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
 	LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+#endif
 	cmpld	cr0,r10,r14
 	cmpld	cr1,r10,r15
 	blt+	cr0,1f
@@ -799,8 +805,14 @@ kernel_dbg_exc:
 	andis.	r15,r14,(DBSR_IC|DBSR_BT)@h
 	beq+	1f
 
+#ifdef CONFIG_RELOCATABLE
+	ld	r15,PACATOC(r13)
+	ld	r14,interrupt_base_book3e@got(r15)
+	ld	r15,__end_interrupts@got(r15)
+#else
 	LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
 	LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+#endif
 	cmpld	cr0,r10,r14
 	cmpld	cr1,r10,r15
 	blt+	cr0,1f
-- 
cgit v1.2.3


From 8b8642af15ed14b9a7a34d3401afbcc274533e13 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 7 Feb 2017 10:05:09 +0100
Subject: net: ethernet: ucc_geth: fix MEM_PART_MURAM mode

Since commit 5093bb965a163 ("powerpc/QE: switch to the cpm_muram
implementation"), muram area is not part of immrbar mapping anymore
so immrbar_virt_to_phys() is not usable anymore.

Fixes: 5093bb965a163 ("powerpc/QE: switch to the cpm_muram implementation")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Li Yang <pku.leo@gmail.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 drivers/net/ethernet/freescale/ucc_geth.c | 8 +++-----
 include/soc/fsl/qe/qe.h                   | 1 +
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 3f7ae9f64cd8..f77ba9fa257b 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -2594,11 +2594,10 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		} else if (ugeth->ug_info->uf_info.bd_mem_part ==
 			   MEM_PART_MURAM) {
 			out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base,
-				 (u32) immrbar_virt_to_phys(ugeth->
-							    p_tx_bd_ring[i]));
+				 (u32)qe_muram_dma(ugeth->p_tx_bd_ring[i]));
 			out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].
 				 last_bd_completed_address,
-				 (u32) immrbar_virt_to_phys(endOfRing));
+				 (u32)qe_muram_dma(endOfRing));
 		}
 	}
 
@@ -2844,8 +2843,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		} else if (ugeth->ug_info->uf_info.bd_mem_part ==
 			   MEM_PART_MURAM) {
 			out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr,
-				 (u32) immrbar_virt_to_phys(ugeth->
-							    p_rx_bd_ring[i]));
+				 (u32)qe_muram_dma(ugeth->p_rx_bd_ring[i]));
 		}
 		/* rest of fields handled by QE */
 	}
diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index 70339d7958c0..0cd4c11479b1 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -243,6 +243,7 @@ static inline int qe_alive_during_sleep(void)
 #define qe_muram_free cpm_muram_free
 #define qe_muram_addr cpm_muram_addr
 #define qe_muram_offset cpm_muram_offset
+#define qe_muram_dma cpm_muram_dma
 
 #define qe_setbits32(_addr, _v) iowrite32be(ioread32be(_addr) |  (_v), (_addr))
 #define qe_clrbits32(_addr, _v) iowrite32be(ioread32be(_addr) & ~(_v), (_addr))
-- 
cgit v1.2.3


From b54ea82f01282253c85eb7e2fd2b6c96f7a027d8 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 7 Feb 2017 10:05:11 +0100
Subject: soc/fsl/qe: get rid of immrbar_virt_to_phys()

immrbar_virt_to_phys() is not used anymore

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Acked-by: Li Yang <pku.leo@gmail.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 drivers/soc/fsl/qe/qe.c       |  4 +---
 include/soc/fsl/qe/immap_qe.h | 19 -------------------
 2 files changed, 1 insertion(+), 22 deletions(-)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index ade168f5328e..d9c04f588f7f 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -66,7 +66,7 @@ static unsigned int qe_num_of_snum;
 
 static phys_addr_t qebase = -1;
 
-phys_addr_t get_qe_base(void)
+static phys_addr_t get_qe_base(void)
 {
 	struct device_node *qe;
 	int ret;
@@ -90,8 +90,6 @@ phys_addr_t get_qe_base(void)
 	return qebase;
 }
 
-EXPORT_SYMBOL(get_qe_base);
-
 void qe_reset(void)
 {
 	if (qe_immr == NULL)
diff --git a/include/soc/fsl/qe/immap_qe.h b/include/soc/fsl/qe/immap_qe.h
index c76ef30b05ba..7baaabd5ec2c 100644
--- a/include/soc/fsl/qe/immap_qe.h
+++ b/include/soc/fsl/qe/immap_qe.h
@@ -464,25 +464,6 @@ struct qe_immap {
 } __attribute__ ((packed));
 
 extern struct qe_immap __iomem *qe_immr;
-extern phys_addr_t get_qe_base(void);
-
-/*
- * Returns the offset within the QE address space of the given pointer.
- *
- * Note that the QE does not support 36-bit physical addresses, so if
- * get_qe_base() returns a number above 4GB, the caller will probably fail.
- */
-static inline phys_addr_t immrbar_virt_to_phys(void *address)
-{
-	void *q = (void *)qe_immr;
-
-	/* Is it a MURAM address? */
-	if ((address >= q) && (address < (q + QE_IMMAP_SIZE)))
-		return get_qe_base() + (address - q);
-
-	/* It's an address returned by kmalloc */
-	return virt_to_phys(address);
-}
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_IMMAP_QE_H */
-- 
cgit v1.2.3


From 2ccf80b7566cc035d903dd0ac5d7ebd25c2c1060 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@keymile.com>
Date: Fri, 17 Feb 2017 11:29:45 +0100
Subject: soc/fsl/qe: round brg_freq to 1kHz granularity

Because of integer computation rounding in u-boot (that sets the QE
brg-frequency DTS prop), the clk value is 99999999 Hz even though it is
100 MHz.

When setting brg clks that are exact divisors of 100 MHz, this small
differnce plays a role and can result in lower clks to be output (for
instance 20 MHz - divide by 5 - results in 16.666 MHz - divide by 6).

This patch fixes that by "forcing" the brg_clk to the nearest kHz when
the difference is below 2 integer rounding errors (i.e. 4).

Signed-off-by: Valentin Longchamp <valentin.longchamp@keymile.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 drivers/soc/fsl/qe/qe.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index d9c04f588f7f..31a094573a9d 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -161,11 +161,15 @@ EXPORT_SYMBOL(qe_issue_cmd);
  */
 static unsigned int brg_clk = 0;
 
+#define CLK_GRAN	(1000)
+#define CLK_GRAN_LIMIT	(5)
+
 unsigned int qe_get_brg_clk(void)
 {
 	struct device_node *qe;
 	int size;
 	const u32 *prop;
+	unsigned int mod;
 
 	if (brg_clk)
 		return brg_clk;
@@ -183,6 +187,15 @@ unsigned int qe_get_brg_clk(void)
 
 	of_node_put(qe);
 
+	/* round this if near to a multiple of CLK_GRAN */
+	mod = brg_clk % CLK_GRAN;
+	if (mod) {
+		if (mod < CLK_GRAN_LIMIT)
+			brg_clk -= mod;
+		else if (mod > (CLK_GRAN - CLK_GRAN_LIMIT))
+			brg_clk += CLK_GRAN - mod;
+	}
+
 	return brg_clk;
 }
 EXPORT_SYMBOL(qe_get_brg_clk);
-- 
cgit v1.2.3


From e5c5c8d23fef2a976afe724f30e34da31be09769 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@keymile.com>
Date: Fri, 17 Feb 2017 11:29:46 +0100
Subject: soc/fsl/qe: only apply QE_General4 workaround on affected SoCs

The QE_General4 workaround is only valid for the MPC832x and MPC836x
SoCs. The other SoCs that embed a QUICC engine are not affected by this
hardware bug and thus can use the computed divisors (this was
successfully tested on the T1040).

Similalry to what was done in commit 8ce795cb0c6b ("i2c: mpc: assign the
correct prescaler from SVR") in order to avoid changes in
the device tree nodes of the QE (with maybe a variant of the compatible
property), the PVR reg is read out to find out if the workaround must be
applied or not.

Signed-off-by: Valentin Longchamp <valentin.longchamp@keymile.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 drivers/soc/fsl/qe/qe.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 31a094573a9d..2ef6fc6487c1 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -200,6 +200,9 @@ unsigned int qe_get_brg_clk(void)
 }
 EXPORT_SYMBOL(qe_get_brg_clk);
 
+#define PVR_VER_836x	0x8083
+#define PVR_VER_832x	0x8084
+
 /* Program the BRG to the given sampling rate and multiplier
  *
  * @brg: the BRG, QE_BRG1 - QE_BRG16
@@ -226,8 +229,9 @@ int qe_setbrg(enum qe_clock brg, unsigned int rate, unsigned int multiplier)
 	/* Errata QE_General4, which affects some MPC832x and MPC836x SOCs, says
 	   that the BRG divisor must be even if you're not using divide-by-16
 	   mode. */
-	if (!div16 && (divisor & 1) && (divisor > 3))
-		divisor++;
+	if (pvr_version_is(PVR_VER_836x) || pvr_version_is(PVR_VER_832x))
+		if (!div16 && (divisor & 1) && (divisor > 3))
+			divisor++;
 
 	tempval = ((divisor - 1) << QE_BRGC_DIVISOR_SHIFT) |
 		QE_BRGC_ENABLE | div16;
-- 
cgit v1.2.3


From 4ba251626ff1ac3447f9a05ecfcec7f85b6c8a91 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@keymile.com>
Date: Fri, 17 Feb 2017 11:29:47 +0100
Subject: soc/fsl/qe: add EXPORT_SYMBOL for the 2 qe_tdm functions

This allows to build the fsl_ucc_hdlc driver as a module.

Signed-off-by: Valentin Longchamp <valentin.longchamp@keymile.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 drivers/soc/fsl/qe/qe_tdm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/soc/fsl/qe/qe_tdm.c b/drivers/soc/fsl/qe/qe_tdm.c
index a1048b44e6b9..f744c214f680 100644
--- a/drivers/soc/fsl/qe/qe_tdm.c
+++ b/drivers/soc/fsl/qe/qe_tdm.c
@@ -177,6 +177,7 @@ err_miss_siram_property:
 	devm_iounmap(&pdev->dev, utdm->si_regs);
 	return ret;
 }
+EXPORT_SYMBOL(ucc_of_parse_tdm);
 
 void ucc_tdm_init(struct ucc_tdm *utdm, struct ucc_tdm_info *ut_info)
 {
@@ -274,3 +275,4 @@ void ucc_tdm_init(struct ucc_tdm *utdm, struct ucc_tdm_info *ut_info)
 		break;
 	}
 }
+EXPORT_SYMBOL(ucc_tdm_init);
-- 
cgit v1.2.3


From e21c7316d8ddcf1fd679591d1427e937999a7cf5 Mon Sep 17 00:00:00 2001
From: Roy Pledge <roy.pledge@nxp.com>
Date: Mon, 17 Apr 2017 16:55:19 -0400
Subject: soc/fsl/qbman: Disable IRQs for deferred QBMan work

Work for Congestion State Notifications (CSCN) and Message Ring (MR)
handling is handled via the workqueue mechanism. This requires the
driver to disable those IRQs before scheduling the work and re-enabling
it once the work is completed so that the interrupt doesn't continually
fire.

Signed-off-by: Roy Pledge <roy.pledge@nxp.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 drivers/soc/fsl/qbman/qman.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c
index 6f509f68085e..2827a65aa25a 100644
--- a/drivers/soc/fsl/qbman/qman.c
+++ b/drivers/soc/fsl/qbman/qman.c
@@ -1344,6 +1344,7 @@ static void qm_congestion_task(struct work_struct *work)
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		spin_unlock(&p->cgr_lock);
 		dev_crit(p->config->dev, "QUERYCONGESTION timeout\n");
+		qman_p_irqsource_add(p, QM_PIRQ_CSCI);
 		return;
 	}
 	/* mask out the ones I'm not interested in */
@@ -1358,6 +1359,7 @@ static void qm_congestion_task(struct work_struct *work)
 		if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid))
 			cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid));
 	spin_unlock(&p->cgr_lock);
+	qman_p_irqsource_add(p, QM_PIRQ_CSCI);
 }
 
 static void qm_mr_process_task(struct work_struct *work)
@@ -1417,12 +1419,14 @@ static void qm_mr_process_task(struct work_struct *work)
 	}
 
 	qm_mr_cci_consume(&p->p, num);
+	qman_p_irqsource_add(p, QM_PIRQ_MRI);
 	preempt_enable();
 }
 
 static u32 __poll_portal_slow(struct qman_portal *p, u32 is)
 {
 	if (is & QM_PIRQ_CSCI) {
+		qman_p_irqsource_remove(p, QM_PIRQ_CSCI);
 		queue_work_on(smp_processor_id(), qm_portal_wq,
 			      &p->congestion_work);
 	}
@@ -1434,6 +1438,7 @@ static u32 __poll_portal_slow(struct qman_portal *p, u32 is)
 	}
 
 	if (is & QM_PIRQ_MRI) {
+		qman_p_irqsource_remove(p, QM_PIRQ_MRI);
 		queue_work_on(smp_processor_id(), qm_portal_wq,
 			      &p->mr_work);
 	}
-- 
cgit v1.2.3


From 6cf62a3b97e78ba41d31390e59a1ddc98a9e3622 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 18:52:33 +0200
Subject: power: supply: bq24190_charger: Add disable-reset device-property

Allow platform-code to disable the reset on probe and suspend/resume
by setting a "disable-reset" boolean device property on the device.

There are several reasons why the platform-code may want to disable
the reset on probe and suspend/resume:

1) Resetting the charger should never be necessary it should always have
sane values programmed. If it is running with invalid values while we
are not running (system turned off or suspended) there is a big problem
as that may lead to overcharging the battery.

2) The reset in suspend() is meant to put the charger back into default
mode, but this is not necessary and not a good idea. If the charger has
been programmed with a higher max charge_current / charge_voltage then
putting it back in default-mode will reset those to the safe power-on
defaults, leading to slower charging, or charging to a lower voltage
(and thus not using the full capacity) while suspended which is
undesirable. Reprogramming the max charge_current / charge_voltage
after the reset will not help here as that will put the charger back
in host mode and start the i2c watchdog if the host then does not do
anything for 40s (iow if we're suspended for more then 40s) the watchdog
expires resetting the device to default-mode, including resetting all
the registers to there safe power-on defaults. So the only way to keep
using custom charge settings while suspending is to keep the charger in
its normal running state with the i2c watchdog disabled. This is fine
as the charger will still automatically switch from constant current
to constant voltage and stop charging when the battery is full.

3) Besides never being necessary resetting the charger also causes
problems on systems where the charge voltage limit is set higher then the
reset value, if this is the case and the charger is reset while charging
and the battery voltage is between the 2 voltages, then about half the
time the charger gets confused and claims to be charging (REG08 contains
0x64) but in reality the charger has decoupled itself from VBUS (Q1 off)
and is drawing 0A from VBUS, leaving the system running from the battery.

This last problem is happening on a GPD-win mini PC with a bq24292i
charger chip combined with a max17047 fuel-gauge and a LiHV battery.
I've checked and TI does not list any errata for the bq24292i which
could explain this (there are no errata at all).

Cc: Liam Breck <kernel@networkimprov.net>
Cc: Tony Lindgren <tony@atomide.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Liam Breck <kernel@networkimprov.net>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/bq24190_charger.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c
index bd9e5c3d8cc2..2d379a2f99f1 100644
--- a/drivers/power/supply/bq24190_charger.c
+++ b/drivers/power/supply/bq24190_charger.c
@@ -533,6 +533,9 @@ static int bq24190_register_reset(struct bq24190_dev_info *bdi)
 	int ret, limit = 100;
 	u8 v;
 
+	if (device_property_read_bool(bdi->dev, "disable-reset"))
+		return 0;
+
 	/* Reset the registers */
 	ret = bq24190_write_mask(bdi, BQ24190_REG_POC,
 			BQ24190_REG_POC_RESET_MASK,
-- 
cgit v1.2.3


From 3fb319c2cdcd10f775f4d4a05a7d12fa1a5679c1 Mon Sep 17 00:00:00 2001
From: "H. Nikolaus Schaller" <hns@goldelico.com>
Date: Fri, 14 Apr 2017 20:25:56 +0200
Subject: power: supply: twl4030-charger: add writable INPUT_CURRENT_LIMIT
 property

Currently, the twl4030 charger defines its own max_current by directly
creating sysfs nodes. It should use the input_current_limit property
which is e.g. used by the bq24257 driver.

This patch adds the input_current_property with the same semantics as
the max_current property. The code to manage the max_current property
is removed by a separate patch.

Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/twl4030_charger.c | 59 ++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index 990ff3d218bc..0b84f336fe39 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -922,6 +922,28 @@ static int twl4030_bci_get_property(struct power_supply *psy,
 			twl4030_bci_state_to_status(state) !=
 				POWER_SUPPLY_STATUS_NOT_CHARGING;
 		break;
+	case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+		val->intval = -1;
+		if (psy->desc->type != POWER_SUPPLY_TYPE_USB) {
+			if (!bci->ac_is_active)
+				val->intval = bci->ac_cur;
+		} else {
+			if (bci->ac_is_active)
+				val->intval = bci->usb_cur_target;
+		}
+		if (val->intval < 0) {
+			u8 bcictl1;
+
+			val->intval = twl4030bci_read_adc_val(TWL4030_BCIIREF1);
+			if (val->intval < 0)
+				return val->intval;
+			ret = twl4030_bci_read(TWL4030_BCICTL1, &bcictl1);
+			if (ret < 0)
+				return ret;
+			val->intval = regval2ua(val->intval, bcictl1 &
+							TWL4030_CGAIN);
+		}
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -929,11 +951,44 @@ static int twl4030_bci_get_property(struct power_supply *psy,
 	return 0;
 }
 
+static int twl4030_bci_set_property(struct power_supply *psy,
+				    enum power_supply_property psp,
+				    const union power_supply_propval *val)
+{
+	struct twl4030_bci *bci = dev_get_drvdata(psy->dev.parent);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+		if (psy->desc->type == POWER_SUPPLY_TYPE_USB)
+			bci->usb_cur_target = val->intval;
+		else
+			bci->ac_cur = val->intval;
+		twl4030_charger_update_current(bci);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int twl4030_bci_property_is_writeable(struct power_supply *psy,
+				      enum power_supply_property psp)
+{
+	switch (psp) {
+	case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static enum power_supply_property twl4030_charger_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_ONLINE,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
 };
 
 #ifdef CONFIG_OF
@@ -970,6 +1025,8 @@ static const struct power_supply_desc twl4030_bci_ac_desc = {
 	.properties	= twl4030_charger_props,
 	.num_properties	= ARRAY_SIZE(twl4030_charger_props),
 	.get_property	= twl4030_bci_get_property,
+	.set_property	= twl4030_bci_set_property,
+	.property_is_writeable	= twl4030_bci_property_is_writeable,
 };
 
 static const struct power_supply_desc twl4030_bci_usb_desc = {
@@ -978,6 +1035,8 @@ static const struct power_supply_desc twl4030_bci_usb_desc = {
 	.properties	= twl4030_charger_props,
 	.num_properties	= ARRAY_SIZE(twl4030_charger_props),
 	.get_property	= twl4030_bci_get_property,
+	.set_property	= twl4030_bci_set_property,
+	.property_is_writeable	= twl4030_bci_property_is_writeable,
 };
 
 static int twl4030_bci_probe(struct platform_device *pdev)
-- 
cgit v1.2.3


From 19a2ee69c20255157738402b6fdbcf5d708dad80 Mon Sep 17 00:00:00 2001
From: "H. Nikolaus Schaller" <hns@goldelico.com>
Date: Fri, 14 Apr 2017 21:29:38 +0200
Subject: power: supply: twl4030-charger: don't check if battery is present

We can't assume that the battery is or stays present after probing
on devices with replaceable battery.

On some devices (e.g. GTA04 or OpenPanodra) it can be removed
and even be hot swapped by the user while device continues to operate
through external AC or USB power (as long as system power consumption
remains below ca. 500mA as provided by USB). Under certain conditions
it is possible to boot without battery.

So it makes no sense to check for this situation during probe and make
the charger driver (and its status reports) completely non-operational if
the battery can be inserted later.

Tested on: GTA04 and OpenPandora.

Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/twl4030_charger.c | 36 ----------------------------------
 1 file changed, 36 deletions(-)

diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index 0b84f336fe39..abd10f6861ae 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -205,35 +205,6 @@ static int twl4030bci_read_adc_val(u8 reg)
 	return temp | val;
 }
 
-/*
- * Check if Battery Pack was present
- */
-static int twl4030_is_battery_present(struct twl4030_bci *bci)
-{
-	int ret;
-	u8 val = 0;
-
-	/* Battery presence in Main charge? */
-	ret = twl_i2c_read_u8(TWL_MODULE_MAIN_CHARGE, &val, TWL4030_BCIMFSTS3);
-	if (ret)
-		return ret;
-	if (val & TWL4030_BATSTSMCHG)
-		return 0;
-
-	/*
-	 * OK, It could be that bootloader did not enable main charger,
-	 * pre-charge is h/w auto. So, Battery presence in Pre-charge?
-	 */
-	ret = twl_i2c_read_u8(TWL4030_MODULE_PRECHARGE, &val,
-			      TWL4030_BCIMFSTS1);
-	if (ret)
-		return ret;
-	if (val & TWL4030_BATSTSPCHG)
-		return 0;
-
-	return -ENODEV;
-}
-
 /*
  * TI provided formulas:
  * CGAIN == 0: ICHG = (BCIICHG * 1.7) / (2^10 - 1) - 0.85
@@ -1068,13 +1039,6 @@ static int twl4030_bci_probe(struct platform_device *pdev)
 	bci->irq_chg = platform_get_irq(pdev, 0);
 	bci->irq_bci = platform_get_irq(pdev, 1);
 
-	/* Only proceed further *IF* battery is physically present */
-	ret = twl4030_is_battery_present(bci);
-	if  (ret) {
-		dev_crit(&pdev->dev, "Battery was not detected:%d\n", ret);
-		return ret;
-	}
-
 	platform_set_drvdata(pdev, bci);
 
 	bci->ac = devm_power_supply_register(&pdev->dev, &twl4030_bci_ac_desc,
-- 
cgit v1.2.3


From 2848e039c562a5278cd2ebc5b005c0f1a381b34e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 16 Apr 2017 17:30:31 +0200
Subject: power: supply: Make power_supply_am_i_supplied return -ENODEV if
 there are no suppliers

It is sensible to assume that the hardware actually always has a
way of charging the battery so when power_supply_am_i_supplied does not
find any suppliers, that does not mean that there are none, but simply
that no power_supply-drivers are registered / bound for any suppliers for
the supply calling power_supply_am_i_supplied.

At which point a fuel-gauge driver calling power_supply_am_i_supplied()
cannot determine whether the battery is being charged or not.

Allow a caller of power_supply_am_i_supplied to differentiate between
there not being any suppliers, vs no suppliers being online by returning
-ENODEV if there are no suppliers matching supplied_to / supplied_from,
which allows fuel-gauge drivers to return POWER_SUPPLY_STATUS_UNKNOWN
rather then POWER_SUPPLY_STATUS_DISCHARGING if there are no suppliers.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/power_supply_core.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 1e0960b646e8..7ec7c7c202bd 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -280,13 +280,19 @@ static inline int power_supply_check_supplies(struct power_supply *psy)
 }
 #endif
 
-static int __power_supply_am_i_supplied(struct device *dev, void *data)
+struct psy_am_i_supplied_data {
+	struct power_supply *psy;
+	unsigned int count;
+};
+
+static int __power_supply_am_i_supplied(struct device *dev, void *_data)
 {
 	union power_supply_propval ret = {0,};
-	struct power_supply *psy = data;
 	struct power_supply *epsy = dev_get_drvdata(dev);
+	struct psy_am_i_supplied_data *data = _data;
 
-	if (__power_supply_is_supplied_by(epsy, psy))
+	data->count++;
+	if (__power_supply_is_supplied_by(epsy, data->psy))
 		if (!epsy->desc->get_property(epsy, POWER_SUPPLY_PROP_ONLINE,
 					&ret))
 			return ret.intval;
@@ -296,12 +302,16 @@ static int __power_supply_am_i_supplied(struct device *dev, void *data)
 
 int power_supply_am_i_supplied(struct power_supply *psy)
 {
+	struct psy_am_i_supplied_data data = { psy, 0 };
 	int error;
 
-	error = class_for_each_device(power_supply_class, NULL, psy,
+	error = class_for_each_device(power_supply_class, NULL, &data,
 				      __power_supply_am_i_supplied);
 
-	dev_dbg(&psy->dev, "%s %d\n", __func__, error);
+	dev_dbg(&psy->dev, "%s count %u err %d\n", __func__, data.count, error);
+
+	if (data.count == 0)
+		return -ENODEV;
 
 	return error;
 }
-- 
cgit v1.2.3


From 1eb2869b3c90f4e7971515ddbe92f9e0d18827f5 Mon Sep 17 00:00:00 2001
From: Liam Breck <kernel@networkimprov.net>
Date: Sun, 16 Apr 2017 16:06:26 -0700
Subject: power: supply: bq24190_charger: Deprecate battery class and replicate
 its features in charger

The driver was registering two classes, bq24190-battery & -charger.
Because the power supply framework cannot surface features from multiple
drivers in a single class, a fuel gauge driver would create a third class,
which some power management utilities cannot see.

Deprecate the -battery class for future removal and replicate its features
in -charger. Set /sys/class...-charger/online = pg_stat && !batfet_disable.
If device_property "omit-battery-class" is set, don't register -battery.

Cc: Tony Lindgren <tony@atomide.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Liam Breck <kernel@networkimprov.net>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/bq24190_charger.c | 146 +++++++++++++++++++++++++--------
 1 file changed, 111 insertions(+), 35 deletions(-)

diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c
index 2d379a2f99f1..d5a707e14526 100644
--- a/drivers/power/supply/bq24190_charger.c
+++ b/drivers/power/supply/bq24190_charger.c
@@ -662,22 +662,25 @@ static int bq24190_charger_get_health(struct bq24190_dev_info *bdi,
 	v = bdi->f_reg;
 	mutex_unlock(&bdi->f_reg_lock);
 
-	if (v & BQ24190_REG_F_BOOST_FAULT_MASK) {
-		/*
-		 * This could be over-current or over-voltage but there's
-		 * no way to tell which.  Return 'OVERVOLTAGE' since there
-		 * isn't an 'OVERCURRENT' value defined that we can return
-		 * even if it was over-current.
-		 */
-		health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
-	} else {
-		v &= BQ24190_REG_F_CHRG_FAULT_MASK;
-		v >>= BQ24190_REG_F_CHRG_FAULT_SHIFT;
-
-		switch (v) {
-		case 0x0: /* Normal */
-			health = POWER_SUPPLY_HEALTH_GOOD;
+	if (v & BQ24190_REG_F_NTC_FAULT_MASK) {
+		switch (v >> BQ24190_REG_F_NTC_FAULT_SHIFT & 0x7) {
+		case 0x1: /* TS1  Cold */
+		case 0x3: /* TS2  Cold */
+		case 0x5: /* Both Cold */
+			health = POWER_SUPPLY_HEALTH_COLD;
 			break;
+		case 0x2: /* TS1  Hot */
+		case 0x4: /* TS2  Hot */
+		case 0x6: /* Both Hot */
+			health = POWER_SUPPLY_HEALTH_OVERHEAT;
+			break;
+		default:
+			health = POWER_SUPPLY_HEALTH_UNKNOWN;
+		}
+	} else if (v & BQ24190_REG_F_BAT_FAULT_MASK) {
+		health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+	} else if (v & BQ24190_REG_F_CHRG_FAULT_MASK) {
+		switch (v >> BQ24190_REG_F_CHRG_FAULT_SHIFT & 0x3) {
 		case 0x1: /* Input Fault (VBUS OVP or VBAT<VBUS<3.8V) */
 			/*
 			 * This could be over-voltage or under-voltage
@@ -694,9 +697,19 @@ static int bq24190_charger_get_health(struct bq24190_dev_info *bdi,
 		case 0x3: /* Charge Safety Timer Expiration */
 			health = POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE;
 			break;
-		default:
-			health = POWER_SUPPLY_HEALTH_UNKNOWN;
+		default:  /* prevent compiler warning */
+			health = -1;
 		}
+	} else if (v & BQ24190_REG_F_BOOST_FAULT_MASK) {
+		/*
+		 * This could be over-current or over-voltage but there's
+		 * no way to tell which.  Return 'OVERVOLTAGE' since there
+		 * isn't an 'OVERCURRENT' value defined that we can return
+		 * even if it was over-current.
+		 */
+		health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+	} else {
+		health = POWER_SUPPLY_HEALTH_GOOD;
 	}
 
 	val->intval = health;
@@ -707,19 +720,59 @@ static int bq24190_charger_get_health(struct bq24190_dev_info *bdi,
 static int bq24190_charger_get_online(struct bq24190_dev_info *bdi,
 		union power_supply_propval *val)
 {
-	u8 v;
+	u8 pg_stat, batfet_disable;
 	int ret;
 
 	ret = bq24190_read_mask(bdi, BQ24190_REG_SS,
 			BQ24190_REG_SS_PG_STAT_MASK,
-			BQ24190_REG_SS_PG_STAT_SHIFT, &v);
+			BQ24190_REG_SS_PG_STAT_SHIFT, &pg_stat);
 	if (ret < 0)
 		return ret;
 
-	val->intval = v;
+	ret = bq24190_read_mask(bdi, BQ24190_REG_MOC,
+			BQ24190_REG_MOC_BATFET_DISABLE_MASK,
+			BQ24190_REG_MOC_BATFET_DISABLE_SHIFT, &batfet_disable);
+	if (ret < 0)
+		return ret;
+
+	val->intval = pg_stat && !batfet_disable;
+
 	return 0;
 }
 
+static int bq24190_battery_set_online(struct bq24190_dev_info *bdi,
+				      const union power_supply_propval *val);
+static int bq24190_battery_get_status(struct bq24190_dev_info *bdi,
+				      union power_supply_propval *val);
+static int bq24190_battery_get_temp_alert_max(struct bq24190_dev_info *bdi,
+					      union power_supply_propval *val);
+static int bq24190_battery_set_temp_alert_max(struct bq24190_dev_info *bdi,
+					      const union power_supply_propval *val);
+
+static int bq24190_charger_set_online(struct bq24190_dev_info *bdi,
+				      const union power_supply_propval *val)
+{
+	return bq24190_battery_set_online(bdi, val);
+}
+
+static int bq24190_charger_get_status(struct bq24190_dev_info *bdi,
+				      union power_supply_propval *val)
+{
+	return bq24190_battery_get_status(bdi, val);
+}
+
+static int bq24190_charger_get_temp_alert_max(struct bq24190_dev_info *bdi,
+					      union power_supply_propval *val)
+{
+	return bq24190_battery_get_temp_alert_max(bdi, val);
+}
+
+static int bq24190_charger_set_temp_alert_max(struct bq24190_dev_info *bdi,
+					      const union power_supply_propval *val)
+{
+	return bq24190_battery_set_temp_alert_max(bdi, val);
+}
+
 static int bq24190_charger_get_current(struct bq24190_dev_info *bdi,
 		union power_supply_propval *val)
 {
@@ -834,6 +887,12 @@ static int bq24190_charger_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_ONLINE:
 		ret = bq24190_charger_get_online(bdi, val);
 		break;
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = bq24190_charger_get_status(bdi, val);
+		break;
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
+		ret =  bq24190_charger_get_temp_alert_max(bdi, val);
+		break;
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
 		ret = bq24190_charger_get_current(bdi, val);
 		break;
@@ -882,6 +941,12 @@ static int bq24190_charger_set_property(struct power_supply *psy,
 		return ret;
 
 	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = bq24190_charger_set_online(bdi, val);
+		break;
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
+		ret = bq24190_charger_set_temp_alert_max(bdi, val);
+		break;
 	case POWER_SUPPLY_PROP_CHARGE_TYPE:
 		ret = bq24190_charger_set_charge_type(bdi, val);
 		break;
@@ -907,6 +972,8 @@ static int bq24190_charger_property_is_writeable(struct power_supply *psy,
 	int ret;
 
 	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
 	case POWER_SUPPLY_PROP_CHARGE_TYPE:
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
@@ -923,6 +990,8 @@ static enum power_supply_property bq24190_charger_properties[] = {
 	POWER_SUPPLY_PROP_CHARGE_TYPE,
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_TEMP_ALERT_MAX,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
@@ -1096,6 +1165,7 @@ static int bq24190_battery_get_property(struct power_supply *psy,
 	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	int ret;
 
+	dev_warn(bdi->dev, "warning: /sys/class/power_supply/bq24190-battery is deprecated\n");
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
 
 	ret = pm_runtime_get_sync(bdi->dev);
@@ -1141,6 +1211,7 @@ static int bq24190_battery_set_property(struct power_supply *psy,
 	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	int ret;
 
+	dev_warn(bdi->dev, "warning: /sys/class/power_supply/bq24190-battery is deprecated\n");
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
 
 	ret = pm_runtime_get_sync(bdi->dev);
@@ -1269,9 +1340,9 @@ static void bq24190_check_status(struct bq24190_dev_info *bdi)
 		bdi->ss_reg = ss_reg;
 	}
 
-	if (alert_charger)
+	if (alert_charger || alert_battery)
 		power_supply_changed(bdi->charger);
-	if (alert_battery)
+	if (alert_battery && bdi->battery)
 		power_supply_changed(bdi->battery);
 
 	dev_dbg(bdi->dev, "ss_reg: 0x%02x, f_reg: 0x%02x\n", ss_reg, f_reg);
@@ -1476,19 +1547,23 @@ static int bq24190_probe(struct i2c_client *client,
 		goto out_pmrt;
 	}
 
-	battery_cfg.drv_data = bdi;
-	bdi->battery = power_supply_register(dev, &bq24190_battery_desc,
-						&battery_cfg);
-	if (IS_ERR(bdi->battery)) {
-		dev_err(dev, "Can't register battery\n");
-		ret = PTR_ERR(bdi->battery);
-		goto out_charger;
+	/* the battery class is deprecated and will be removed. */
+	/* in the interim, this property hides it.              */
+	if (!device_property_read_bool(dev, "omit-battery-class")) {
+		battery_cfg.drv_data = bdi;
+		bdi->battery = power_supply_register(dev, &bq24190_battery_desc,
+						     &battery_cfg);
+		if (IS_ERR(bdi->battery)) {
+			dev_err(dev, "Can't register battery\n");
+			ret = PTR_ERR(bdi->battery);
+			goto out_charger;
+		}
 	}
 
 	ret = bq24190_sysfs_create_group(bdi);
 	if (ret) {
 		dev_err(dev, "Can't create sysfs entries\n");
-		goto out_battery;
+		goto out_charger;
 	}
 
 	bdi->initialized = true;
@@ -1526,10 +1601,9 @@ static int bq24190_probe(struct i2c_client *client,
 out_sysfs:
 	bq24190_sysfs_remove_group(bdi);
 
-out_battery:
-	power_supply_unregister(bdi->battery);
-
 out_charger:
+	if (!IS_ERR_OR_NULL(bdi->battery))
+		power_supply_unregister(bdi->battery);
 	power_supply_unregister(bdi->charger);
 
 out_pmrt:
@@ -1552,7 +1626,8 @@ static int bq24190_remove(struct i2c_client *client)
 
 	bq24190_register_reset(bdi);
 	bq24190_sysfs_remove_group(bdi);
-	power_supply_unregister(bdi->battery);
+	if (bdi->battery)
+		power_supply_unregister(bdi->battery);
 	power_supply_unregister(bdi->charger);
 	if (error >= 0)
 		pm_runtime_put_sync(bdi->dev);
@@ -1639,7 +1714,8 @@ static __maybe_unused int bq24190_pm_resume(struct device *dev)
 
 	/* Things may have changed while suspended so alert upper layer */
 	power_supply_changed(bdi->charger);
-	power_supply_changed(bdi->battery);
+	if (bdi->battery)
+		power_supply_changed(bdi->battery);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 7221675c70d147f2ae6f044697dff046f3805224 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Tue, 18 Apr 2017 09:34:16 +0200
Subject: dt-bindings: power: supply: add AXP20X/AXP22X battery DT binding

The X-Powers AXP20X and AXP22X PMICs can have a battery as power supply.

This patch adds the DT binding documentation for the battery power
supply which gets various data from the PMIC, such as the battery status
(charging, discharging, full, dead), current max limit, current current,
battery capacity (in percentage), voltage max and min limits, current
voltage and battery capacity (in Ah).

Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 .../bindings/power/supply/axp20x_battery.txt         | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/power/supply/axp20x_battery.txt

diff --git a/Documentation/devicetree/bindings/power/supply/axp20x_battery.txt b/Documentation/devicetree/bindings/power/supply/axp20x_battery.txt
new file mode 100644
index 000000000000..c24886676a60
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/axp20x_battery.txt
@@ -0,0 +1,20 @@
+AXP20x and AXP22x battery power supply
+
+Required Properties:
+ - compatible, one of:
+			"x-powers,axp209-battery-power-supply"
+			"x-powers,axp221-battery-power-supply"
+
+This node is a subnode of the axp20x/axp22x PMIC.
+
+The AXP20X and AXP22X can read the battery voltage, charge and discharge
+currents of the battery by reading ADC channels from the AXP20X/AXP22X
+ADC.
+
+Example:
+
+&axp209 {
+	battery_power_supply: battery-power-supply {
+		compatible = "x-powers,axp209-battery-power-supply";
+	}
+};
-- 
cgit v1.2.3


From 46c202b5f25fb6fbd4af60ded133fa745b3601b3 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Tue, 18 Apr 2017 09:34:17 +0200
Subject: power: supply: add battery driver for AXP20X and AXP22X PMICs

The X-Powers AXP20X and AXP22X PMICs can have a battery as power supply.

This patch adds the battery power supply driver to get various data from
the PMIC, such as the battery status (charging, discharging, full,
dead), current max limit, current current, battery capacity (in
percentage), voltage max and min limits, current voltage and battery
capacity (in Ah).

This battery driver uses the AXP20X/AXP22X ADC driver as PMIC data
provider.

Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/Kconfig          |  12 +
 drivers/power/supply/Makefile         |   1 +
 drivers/power/supply/axp20x_battery.c | 502 ++++++++++++++++++++++++++++++++++
 3 files changed, 515 insertions(+)
 create mode 100644 drivers/power/supply/axp20x_battery.c

diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index da922756149f..4b718e90b3ca 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -238,6 +238,18 @@ config CHARGER_AXP20X
 	  This driver can also be built as a module. If so, the module will be
 	  called axp20x_ac_power.
 
+config BATTERY_AXP20X
+	tristate "X-Powers AXP20X battery driver"
+	depends on MFD_AXP20X
+	depends on AXP20X_ADC
+	depends on IIO
+	help
+	  Say Y here to enable support for X-Powers AXP20X PMICs' battery power
+	  supply.
+
+	  This driver can also be built as a module. If so, the module will be
+	  called axp20x_battery.
+
 config AXP288_CHARGER
 	tristate "X-Powers AXP288 Charger"
 	depends on MFD_AXP20X && EXTCON_AXP288
diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile
index 39fc733e6cc4..a39126d7a6ce 100644
--- a/drivers/power/supply/Makefile
+++ b/drivers/power/supply/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_TEST_POWER)	+= test_power.o
 
 obj-$(CONFIG_BATTERY_88PM860X)	+= 88pm860x_battery.o
 obj-$(CONFIG_BATTERY_ACT8945A)	+= act8945a_charger.o
+obj-$(CONFIG_BATTERY_AXP20X)	+= axp20x_battery.o
 obj-$(CONFIG_CHARGER_AXP20X)	+= axp20x_ac_power.o
 obj-$(CONFIG_BATTERY_DS2760)	+= ds2760_battery.o
 obj-$(CONFIG_BATTERY_DS2780)	+= ds2780_battery.o
diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c
new file mode 100644
index 000000000000..5d29b2eab8fc
--- /dev/null
+++ b/drivers/power/supply/axp20x_battery.c
@@ -0,0 +1,502 @@
+/*
+ * Battery power supply driver for X-Powers AXP20X and AXP22X PMICs
+ *
+ * Copyright 2016 Free Electrons NextThing Co.
+ *	Quentin Schulz <quentin.schulz@free-electrons.com>
+ *
+ * This driver is based on a previous upstreaming attempt by:
+ *	Bruno Prémont <bonbons@linux-vserver.org>
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/consumer.h>
+#include <linux/mfd/axp20x.h>
+
+#define AXP20X_PWR_STATUS_BAT_CHARGING	BIT(2)
+
+#define AXP20X_PWR_OP_BATT_PRESENT	BIT(5)
+#define AXP20X_PWR_OP_BATT_ACTIVATED	BIT(3)
+
+#define AXP209_FG_PERCENT		GENMASK(6, 0)
+#define AXP22X_FG_VALID			BIT(7)
+
+#define AXP20X_CHRG_CTRL1_TGT_VOLT	GENMASK(6, 5)
+#define AXP20X_CHRG_CTRL1_TGT_4_1V	(0 << 5)
+#define AXP20X_CHRG_CTRL1_TGT_4_15V	(1 << 5)
+#define AXP20X_CHRG_CTRL1_TGT_4_2V	(2 << 5)
+#define AXP20X_CHRG_CTRL1_TGT_4_36V	(3 << 5)
+
+#define AXP22X_CHRG_CTRL1_TGT_4_22V	(1 << 5)
+#define AXP22X_CHRG_CTRL1_TGT_4_24V	(3 << 5)
+
+#define AXP20X_CHRG_CTRL1_TGT_CURR	GENMASK(3, 0)
+
+#define AXP20X_V_OFF_MASK		GENMASK(2, 0)
+
+struct axp20x_batt_ps {
+	struct regmap *regmap;
+	struct power_supply *batt;
+	struct device *dev;
+	struct iio_channel *batt_chrg_i;
+	struct iio_channel *batt_dischrg_i;
+	struct iio_channel *batt_v;
+	u8 axp_id;
+};
+
+static int axp20x_battery_get_max_voltage(struct axp20x_batt_ps *axp20x_batt,
+					  int *val)
+{
+	int ret, reg;
+
+	ret = regmap_read(axp20x_batt->regmap, AXP20X_CHRG_CTRL1, &reg);
+	if (ret)
+		return ret;
+
+	switch (reg & AXP20X_CHRG_CTRL1_TGT_VOLT) {
+	case AXP20X_CHRG_CTRL1_TGT_4_1V:
+		*val = 4100000;
+		break;
+	case AXP20X_CHRG_CTRL1_TGT_4_15V:
+		*val = 4150000;
+		break;
+	case AXP20X_CHRG_CTRL1_TGT_4_2V:
+		*val = 4200000;
+		break;
+	case AXP20X_CHRG_CTRL1_TGT_4_36V:
+		*val = 4360000;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int axp22x_battery_get_max_voltage(struct axp20x_batt_ps *axp20x_batt,
+					  int *val)
+{
+	int ret, reg;
+
+	ret = regmap_read(axp20x_batt->regmap, AXP20X_CHRG_CTRL1, &reg);
+	if (ret)
+		return ret;
+
+	switch (reg & AXP20X_CHRG_CTRL1_TGT_VOLT) {
+	case AXP20X_CHRG_CTRL1_TGT_4_1V:
+		*val = 4100000;
+		break;
+	case AXP20X_CHRG_CTRL1_TGT_4_2V:
+		*val = 4200000;
+		break;
+	case AXP22X_CHRG_CTRL1_TGT_4_22V:
+		*val = 4220000;
+		break;
+	case AXP22X_CHRG_CTRL1_TGT_4_24V:
+		*val = 4240000;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void raw_to_constant_charge_current(struct axp20x_batt_ps *axp, int *val)
+{
+	if (axp->axp_id == AXP209_ID)
+		*val = *val * 100000 + 300000;
+	else
+		*val = *val * 150000 + 300000;
+}
+
+static int axp20x_get_constant_charge_current(struct axp20x_batt_ps *axp,
+					      int *val)
+{
+	int ret;
+
+	ret = regmap_read(axp->regmap, AXP20X_CHRG_CTRL1, val);
+	if (ret)
+		return ret;
+
+	*val &= AXP20X_CHRG_CTRL1_TGT_CURR;
+
+	raw_to_constant_charge_current(axp, val);
+
+	return 0;
+}
+
+static int axp20x_battery_get_prop(struct power_supply *psy,
+				   enum power_supply_property psp,
+				   union power_supply_propval *val)
+{
+	struct axp20x_batt_ps *axp20x_batt = power_supply_get_drvdata(psy);
+	struct iio_channel *chan;
+	int ret = 0, reg, val1;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_PRESENT:
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_PWR_OP_MODE,
+				  &reg);
+		if (ret)
+			return ret;
+
+		val->intval = !!(reg & AXP20X_PWR_OP_BATT_PRESENT);
+		break;
+
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_PWR_INPUT_STATUS,
+				  &reg);
+		if (ret)
+			return ret;
+
+		if (reg & AXP20X_PWR_STATUS_BAT_CHARGING) {
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+			return 0;
+		}
+
+		ret = iio_read_channel_processed(axp20x_batt->batt_dischrg_i,
+						 &val1);
+		if (ret)
+			return ret;
+
+		if (val1) {
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+			return 0;
+		}
+
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_FG_RES, &val1);
+		if (ret)
+			return ret;
+
+		/*
+		 * Fuel Gauge data takes 7 bits but the stored value seems to be
+		 * directly the raw percentage without any scaling to 7 bits.
+		 */
+		if ((val1 & AXP209_FG_PERCENT) == 100)
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		else
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		break;
+
+	case POWER_SUPPLY_PROP_HEALTH:
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_PWR_OP_MODE,
+				  &val1);
+		if (ret)
+			return ret;
+
+		if (val1 & AXP20X_PWR_OP_BATT_ACTIVATED) {
+			val->intval = POWER_SUPPLY_HEALTH_DEAD;
+			return 0;
+		}
+
+		val->intval = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+		ret = axp20x_get_constant_charge_current(axp20x_batt,
+							 &val->intval);
+		if (ret)
+			return ret;
+		break;
+
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+		val->intval = AXP20X_CHRG_CTRL1_TGT_CURR;
+		raw_to_constant_charge_current(axp20x_batt, &val->intval);
+
+		break;
+
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_PWR_INPUT_STATUS,
+				  &reg);
+		if (ret)
+			return ret;
+
+		if (reg & AXP20X_PWR_STATUS_BAT_CHARGING)
+			chan = axp20x_batt->batt_chrg_i;
+		else
+			chan = axp20x_batt->batt_dischrg_i;
+
+		ret = iio_read_channel_processed(chan, &val->intval);
+		if (ret)
+			return ret;
+
+		/* IIO framework gives mA but Power Supply framework gives uA */
+		val->intval *= 1000;
+		break;
+
+	case POWER_SUPPLY_PROP_CAPACITY:
+		/* When no battery is present, return capacity is 100% */
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_PWR_OP_MODE,
+				  &reg);
+		if (ret)
+			return ret;
+
+		if (!(reg & AXP20X_PWR_OP_BATT_PRESENT)) {
+			val->intval = 100;
+			return 0;
+		}
+
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_FG_RES, &reg);
+		if (ret)
+			return ret;
+
+		if (axp20x_batt->axp_id == AXP221_ID &&
+		    !(reg & AXP22X_FG_VALID))
+			return -EINVAL;
+
+		/*
+		 * Fuel Gauge data takes 7 bits but the stored value seems to be
+		 * directly the raw percentage without any scaling to 7 bits.
+		 */
+		val->intval = reg & AXP209_FG_PERCENT;
+		break;
+
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		if (axp20x_batt->axp_id == AXP209_ID)
+			return axp20x_battery_get_max_voltage(axp20x_batt,
+							      &val->intval);
+		return axp22x_battery_get_max_voltage(axp20x_batt,
+						      &val->intval);
+
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		ret = regmap_read(axp20x_batt->regmap, AXP20X_V_OFF, &reg);
+		if (ret)
+			return ret;
+
+		val->intval = 2600000 + 100000 * (reg & AXP20X_V_OFF_MASK);
+		break;
+
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = iio_read_channel_processed(axp20x_batt->batt_v,
+						 &val->intval);
+		if (ret)
+			return ret;
+
+		/* IIO framework gives mV but Power Supply framework gives uV */
+		val->intval *= 1000;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int axp20x_battery_set_max_voltage(struct axp20x_batt_ps *axp20x_batt,
+					  int val)
+{
+	switch (val) {
+	case 4100000:
+		val = AXP20X_CHRG_CTRL1_TGT_4_1V;
+		break;
+
+	case 4150000:
+		if (axp20x_batt->axp_id == AXP221_ID)
+			return -EINVAL;
+
+		val = AXP20X_CHRG_CTRL1_TGT_4_15V;
+		break;
+
+	case 4200000:
+		val = AXP20X_CHRG_CTRL1_TGT_4_2V;
+		break;
+
+	default:
+		/*
+		 * AXP20x max voltage can be set to 4.36V and AXP22X max voltage
+		 * can be set to 4.22V and 4.24V, but these voltages are too
+		 * high for Lithium based batteries (AXP PMICs are supposed to
+		 * be used with these kinds of battery).
+		 */
+		return -EINVAL;
+	}
+
+	return regmap_update_bits(axp20x_batt->regmap, AXP20X_CHRG_CTRL1,
+				  AXP20X_CHRG_CTRL1_TGT_VOLT, val);
+}
+
+static int axp20x_set_constant_charge_current(struct axp20x_batt_ps *axp_batt,
+					      int charge_current)
+{
+	if (axp_batt->axp_id == AXP209_ID)
+		charge_current = (charge_current - 300000) / 100000;
+	else
+		charge_current = (charge_current - 300000) / 150000;
+
+	if (charge_current > AXP20X_CHRG_CTRL1_TGT_CURR || charge_current < 0)
+		return -EINVAL;
+
+	return regmap_update_bits(axp_batt->regmap, AXP20X_CHRG_CTRL1,
+				  AXP20X_CHRG_CTRL1_TGT_CURR, charge_current);
+}
+
+static int axp20x_set_voltage_min_design(struct axp20x_batt_ps *axp_batt,
+					 int min_voltage)
+{
+	int val1 = (min_voltage - 2600000) / 100000;
+
+	if (val1 < 0 || val1 > AXP20X_V_OFF_MASK)
+		return -EINVAL;
+
+	return regmap_update_bits(axp_batt->regmap, AXP20X_V_OFF,
+				  AXP20X_V_OFF_MASK, val1);
+}
+
+static int axp20x_battery_set_prop(struct power_supply *psy,
+				   enum power_supply_property psp,
+				   const union power_supply_propval *val)
+{
+	struct axp20x_batt_ps *axp20x_batt = power_supply_get_drvdata(psy);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		return axp20x_set_voltage_min_design(axp20x_batt, val->intval);
+
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		return axp20x_battery_set_max_voltage(axp20x_batt, val->intval);
+
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+		return axp20x_set_constant_charge_current(axp20x_batt,
+							  val->intval);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static enum power_supply_property axp20x_battery_props[] = {
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT,
+	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_CAPACITY,
+};
+
+static int axp20x_battery_prop_writeable(struct power_supply *psy,
+					 enum power_supply_property psp)
+{
+	return psp == POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN ||
+	       psp == POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN ||
+	       psp == POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT;
+}
+
+static const struct power_supply_desc axp20x_batt_ps_desc = {
+	.name = "axp20x-battery",
+	.type = POWER_SUPPLY_TYPE_BATTERY,
+	.properties = axp20x_battery_props,
+	.num_properties = ARRAY_SIZE(axp20x_battery_props),
+	.property_is_writeable = axp20x_battery_prop_writeable,
+	.get_property = axp20x_battery_get_prop,
+	.set_property = axp20x_battery_set_prop,
+};
+
+static const struct of_device_id axp20x_battery_ps_id[] = {
+	{
+		.compatible = "x-powers,axp209-battery-power-supply",
+		.data = (void *)AXP209_ID,
+	}, {
+		.compatible = "x-powers,axp221-battery-power-supply",
+		.data = (void *)AXP221_ID,
+	}, { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, axp20x_battery_ps_id);
+
+static int axp20x_power_probe(struct platform_device *pdev)
+{
+	struct axp20x_batt_ps *axp20x_batt;
+	struct power_supply_config psy_cfg = {};
+
+	if (!of_device_is_available(pdev->dev.of_node))
+		return -ENODEV;
+
+	axp20x_batt = devm_kzalloc(&pdev->dev, sizeof(*axp20x_batt),
+				   GFP_KERNEL);
+	if (!axp20x_batt)
+		return -ENOMEM;
+
+	axp20x_batt->dev = &pdev->dev;
+
+	axp20x_batt->batt_v = devm_iio_channel_get(&pdev->dev, "batt_v");
+	if (IS_ERR(axp20x_batt->batt_v)) {
+		if (PTR_ERR(axp20x_batt->batt_v) == -ENODEV)
+			return -EPROBE_DEFER;
+		return PTR_ERR(axp20x_batt->batt_v);
+	}
+
+	axp20x_batt->batt_chrg_i = devm_iio_channel_get(&pdev->dev,
+							"batt_chrg_i");
+	if (IS_ERR(axp20x_batt->batt_chrg_i)) {
+		if (PTR_ERR(axp20x_batt->batt_chrg_i) == -ENODEV)
+			return -EPROBE_DEFER;
+		return PTR_ERR(axp20x_batt->batt_chrg_i);
+	}
+
+	axp20x_batt->batt_dischrg_i = devm_iio_channel_get(&pdev->dev,
+							   "batt_dischrg_i");
+	if (IS_ERR(axp20x_batt->batt_dischrg_i)) {
+		if (PTR_ERR(axp20x_batt->batt_dischrg_i) == -ENODEV)
+			return -EPROBE_DEFER;
+		return PTR_ERR(axp20x_batt->batt_dischrg_i);
+	}
+
+	axp20x_batt->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	platform_set_drvdata(pdev, axp20x_batt);
+
+	psy_cfg.drv_data = axp20x_batt;
+	psy_cfg.of_node = pdev->dev.of_node;
+
+	axp20x_batt->axp_id = (uintptr_t)of_device_get_match_data(&pdev->dev);
+
+	axp20x_batt->batt = devm_power_supply_register(&pdev->dev,
+						       &axp20x_batt_ps_desc,
+						       &psy_cfg);
+	if (IS_ERR(axp20x_batt->batt)) {
+		dev_err(&pdev->dev, "failed to register power supply: %ld\n",
+			PTR_ERR(axp20x_batt->batt));
+		return PTR_ERR(axp20x_batt->batt);
+	}
+
+	return 0;
+}
+
+static struct platform_driver axp20x_batt_driver = {
+	.probe    = axp20x_power_probe,
+	.driver   = {
+		.name  = "axp20x-battery-power-supply",
+		.of_match_table = axp20x_battery_ps_id,
+	},
+};
+
+module_platform_driver(axp20x_batt_driver);
+
+MODULE_DESCRIPTION("Battery power supply driver for AXP20X and AXP22X PMICs");
+MODULE_AUTHOR("Quentin Schulz <quentin.schulz@free-electrons.com>");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From bb8fe8ea0067083e0452d5c67a4ab70ad72cc52f Mon Sep 17 00:00:00 2001
From: Ryosuke Saito <raitosyo@gmail.com>
Date: Fri, 21 Apr 2017 12:13:17 +0900
Subject: power: supply: sbs-battery: fix the sbs interrupt request

Since we use the default primary handler for the irq, IRQF_ONESHOT must
be set. Otherwise the request fails and the following errors are
displayed:

genirq: Threaded irq requested with handler=NULL and !ONESHOT for irq 129
sbs-battery 0-000b: Failed to request irq: -22

Signed-off-by: Ryosuke Saito <raitosyo@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/sbs-battery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/sbs-battery.c b/drivers/power/supply/sbs-battery.c
index 8bb2eb38eb1c..e07d570f504f 100644
--- a/drivers/power/supply/sbs-battery.c
+++ b/drivers/power/supply/sbs-battery.c
@@ -820,7 +820,7 @@ static int sbs_probe(struct i2c_client *client,
 	}
 
 	rc = devm_request_threaded_irq(&client->dev, irq, NULL, sbs_irq,
-		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 		dev_name(&client->dev), chip);
 	if (rc) {
 		dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
-- 
cgit v1.2.3


From 756e142a4b3b25aac44eaa54ded125fce0e8fcc7 Mon Sep 17 00:00:00 2001
From: Rahul Bedarkar <rahulbedarkar89@gmail.com>
Date: Fri, 21 Apr 2017 16:37:57 +0530
Subject: power: supply: generic-adc-battery: use SIMPLE_DEV_PM_OPS helper
 macro

Replace ifdefs with SIMPLE_DEV_PM_OPS helper macro.

Signed-off-by: Rahul Bedarkar <rahulbedarkar89@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/generic-adc-battery.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/power/supply/generic-adc-battery.c b/drivers/power/supply/generic-adc-battery.c
index edb36bf781b0..37e523374fe0 100644
--- a/drivers/power/supply/generic-adc-battery.c
+++ b/drivers/power/supply/generic-adc-battery.c
@@ -383,8 +383,7 @@ static int gab_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int gab_suspend(struct device *dev)
+static int __maybe_unused gab_suspend(struct device *dev)
 {
 	struct gab *adc_bat = dev_get_drvdata(dev);
 
@@ -393,7 +392,7 @@ static int gab_suspend(struct device *dev)
 	return 0;
 }
 
-static int gab_resume(struct device *dev)
+static int __maybe_unused gab_resume(struct device *dev)
 {
 	struct gab *adc_bat = dev_get_drvdata(dev);
 	struct gab_platform_data *pdata = adc_bat->pdata;
@@ -407,20 +406,12 @@ static int gab_resume(struct device *dev)
 	return 0;
 }
 
-static const struct dev_pm_ops gab_pm_ops = {
-	.suspend        = gab_suspend,
-	.resume         = gab_resume,
-};
-
-#define GAB_PM_OPS       (&gab_pm_ops)
-#else
-#define GAB_PM_OPS       (NULL)
-#endif
+static SIMPLE_DEV_PM_OPS(gab_pm_ops, gab_suspend, gab_resume);
 
 static struct platform_driver gab_driver = {
 	.driver		= {
 		.name	= "generic-adc-battery",
-		.pm	= GAB_PM_OPS
+		.pm	= &gab_pm_ops,
 	},
 	.probe		= gab_probe,
 	.remove		= gab_remove,
-- 
cgit v1.2.3


From 8b20839988f1ed5e534b270f3776709b640dc7e0 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Mon, 24 Apr 2017 16:22:08 +0800
Subject: power: supply: isp1704: Fix unchecked return value of devm_kzalloc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Function devm_kzalloc() will return a NULL pointer. However, in function
isp1704_charger_probe(), the return value of devm_kzalloc() is directly
used without validation. This may result in a bad memory access bug.

Fixes: 34a109610e2a ("isp1704_charger: Add DT support")
Signed-off-by: Pan Bian <bianpan2016@163.com>
Reviewed-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/isp1704_charger.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/power/supply/isp1704_charger.c b/drivers/power/supply/isp1704_charger.c
index 4cd6899b961e..95af5f305838 100644
--- a/drivers/power/supply/isp1704_charger.c
+++ b/drivers/power/supply/isp1704_charger.c
@@ -418,6 +418,10 @@ static int isp1704_charger_probe(struct platform_device *pdev)
 
 		pdata = devm_kzalloc(&pdev->dev,
 			sizeof(struct isp1704_charger_data), GFP_KERNEL);
+		if (!pdata) {
+			ret = -ENOMEM;
+			goto fail0;
+		}
 		pdata->enable_gpio = gpio;
 
 		dev_info(&pdev->dev, "init gpio %d\n", pdata->enable_gpio);
-- 
cgit v1.2.3


From 6254a6a94489c4be717f757bec7d3a372cba1b6e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 27 Apr 2017 21:11:48 +0200
Subject: power: supply: axp20x_usb_power: add IIO dependency

When CONFIG_IIO=m and the axp20x_usb_power driver is built-in, we get
a link time error:

drivers/power/built-in.o: In function `axp20x_usb_power_get_property':
undefined reference to `iio_read_channel_processed'
drivers/power/built-in.o: In function `axp20x_usb_power_probe':
undefined reference to `devm_iio_channel_get'
undefined reference to `devm_iio_channel_get'

This adds the same dependency that we already have for the AC power driver
to the USB power driver. For consistency, I'm also moving the two closer
together in the Kconfig file.

Fixes: 33863c938caa ("power: supply: axp20x_usb_power: use IIO channels when available")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/Kconfig | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index 4b718e90b3ca..86f40bf37c34 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -250,6 +250,14 @@ config BATTERY_AXP20X
 	  This driver can also be built as a module. If so, the module will be
 	  called axp20x_battery.
 
+config AXP20X_POWER
+	tristate "AXP20x power supply driver"
+	depends on MFD_AXP20X
+	depends on IIO
+	help
+	  This driver provides support for the power supply features of
+	  AXP20x PMIC.
+
 config AXP288_CHARGER
 	tristate "X-Powers AXP288 Charger"
 	depends on MFD_AXP20X && EXTCON_AXP288
@@ -553,11 +561,4 @@ config CHARGER_RT9455
 	help
 	  Say Y to enable support for Richtek RT9455 battery charger.
 
-config AXP20X_POWER
-	tristate "AXP20x power supply driver"
-	depends on MFD_AXP20X
-	help
-	  This driver provides support for the power supply features of
-	  AXP20x PMIC.
-
 endif # POWER_SUPPLY
-- 
cgit v1.2.3


From 58532e66261e8cee52ac0abbe33c502b6e76f380 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Thu, 27 Apr 2017 14:21:40 +0200
Subject: dmaengine: dmatest: Select DMA_ENGINE_RAID as its needed for the
 slave_sg test

To enable usage of multiple SG buffers via the sg_buffers= module
parameter, lets select DMA_ENGINE_RAID via Kconfig when DMATEST is
configured. Otherwise the dmatest will "BUG" when more than 1
buffer (total of 2 for src + dst) is configured via sg_buffers.

Signed-off-by: Stefan Roese <sr@denx.de>
Cc: Kedareswara rao Appana <appanad@xilinx.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d01d59812cf3..9be6736cb3d2 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -608,6 +608,7 @@ config ASYNC_TX_DMA
 config DMATEST
 	tristate "DMA Test client"
 	depends on DMA_ENGINE
+	select DMA_ENGINE_RAID
 	help
 	  Simple DMA test client. Say N unless you're debugging a
 	  DMA Device driver.
-- 
cgit v1.2.3


From fbfb8e1dcea0508f203535a92faea7a3ddf24558 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Thu, 27 Apr 2017 14:21:41 +0200
Subject: dmaengine: dmatest: Add check for supported buffer count (sg_buffers)

When using dmatest with sg_buffers=128 I stumbled upon the problem, that
the "map_cnt" variable of "struct dmaengine_unmap_data" was set to 0.
"map_cnt" is an "u8" variable, resulting in an overrun when its
value is set to src_cnt + dst_cnt, to twice the sg_buffer value.

This patch adds a small check to dmatest, so that this confusing error
is detected and the test is aborted.

Signed-off-by: Stefan Roese <sr@denx.de>
Cc: Kedareswara rao Appana <appanad@xilinx.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dmatest.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index d042b2b4965e..a07ef3d6b3ec 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -535,6 +535,13 @@ static int dmatest_func(void *data)
 
 		total_tests++;
 
+		/* Check if buffer count fits into map count variable (u8) */
+		if ((src_cnt + dst_cnt) >= 255) {
+			pr_err("too many buffers (%d of 255 supported)\n",
+			       src_cnt + dst_cnt);
+			break;
+		}
+
 		if (1 << align > params->buf_size) {
 			pr_err("%u-byte buffer too small for %d-byte alignment\n",
 			       params->buf_size, 1 << align);
-- 
cgit v1.2.3


From 7f232af35620fe68746cb3da582062de965b286c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 29 Apr 2017 22:56:49 +0100
Subject: power: supply: ab8500_charger: spelling: "prechage" -> "precharge"

trivial fix to spelling mistake in dev_error message.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/ab8500_charger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c
index 5cee9aa87aa3..4ebbcce45c48 100644
--- a/drivers/power/supply/ab8500_charger.c
+++ b/drivers/power/supply/ab8500_charger.c
@@ -3238,7 +3238,7 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di)
 			BUS_PP_PRECHG_CURRENT_MASK, 0);
 		if (ret) {
 			dev_err(di->dev,
-				"failed to setup usb power path prechage current\n");
+				"failed to setup usb power path precharge current\n");
 			goto out;
 		}
 	}
-- 
cgit v1.2.3


From c67c06939ee4a24fcad1235dd9632153533fdd4d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:47 +0200
Subject: power: supply: max17042_battery: Use sign_extend32 instead of DIY
 code

Use sign_extend32 to sign-extend variables where necessary instead of
DIY code.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index da7a75f82489..790dfa9d0a2e 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -106,13 +106,7 @@ static int max17042_get_temperature(struct max17042_chip *chip, int *temp)
 	if (ret < 0)
 		return ret;
 
-	*temp = data;
-	/* The value is signed. */
-	if (*temp & 0x8000) {
-		*temp = (0x7fff & ~*temp) + 1;
-		*temp *= -1;
-	}
-
+	*temp = sign_extend32(data, 15);
 	/* The value is converted into deci-centigrade scale */
 	/* Units of LSB = 1 / 256 degree Celsius */
 	*temp = *temp * 10 / 256;
@@ -302,13 +296,7 @@ static int max17042_get_property(struct power_supply *psy,
 			if (ret < 0)
 				return ret;
 
-			val->intval = data;
-			if (val->intval & 0x8000) {
-				/* Negative */
-				val->intval = ~val->intval & 0x7fff;
-				val->intval++;
-				val->intval *= -1;
-			}
+			val->intval = sign_extend32(data, 15);
 			val->intval *= 1562500 / chip->pdata->r_sns;
 		} else {
 			return -EINVAL;
@@ -320,13 +308,7 @@ static int max17042_get_property(struct power_supply *psy,
 			if (ret < 0)
 				return ret;
 
-			val->intval = data;
-			if (val->intval & 0x8000) {
-				/* Negative */
-				val->intval = ~val->intval & 0x7fff;
-				val->intval++;
-				val->intval *= -1;
-			}
+			val->intval = sign_extend32(data, 15);
 			val->intval *= 1562500 / chip->pdata->r_sns;
 		} else {
 			return -EINVAL;
-- 
cgit v1.2.3


From 2814913c3136b41084a896c90062bd9b87672dff Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:48 +0200
Subject: power: supply: max17047_battery: The temp alert values are 8-bit 2's
 complement

The temp alert values are 8-bit 2's complement, so sign-extend them
before reporting them back to the caller.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index 790dfa9d0a2e..a51b2965cd5c 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -270,14 +270,14 @@ static int max17042_get_property(struct power_supply *psy,
 		if (ret < 0)
 			return ret;
 		/* LSB is Alert Minimum. In deci-centigrade */
-		val->intval = (data & 0xff) * 10;
+		val->intval = sign_extend32(data & 0xff, 7) * 10;
 		break;
 	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
 		ret = regmap_read(map, MAX17042_TALRT_Th, &data);
 		if (ret < 0)
 			return ret;
 		/* MSB is Alert Maximum. In deci-centigrade */
-		val->intval = (data >> 8) * 10;
+		val->intval = sign_extend32(data >> 8, 7) * 10;
 		break;
 	case POWER_SUPPLY_PROP_TEMP_MIN:
 		val->intval = chip->pdata->temp_min;
-- 
cgit v1.2.3


From 917362135b8a5c0680acf08807e9fc6179eb6c79 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:49 +0200
Subject: power: supply: max17042_battery: Add default platform_data fallback
 data

Some x86 machines use a max17047 fuel-gauge and x86 might be missing
platform_data if not provided by SFI.

This commit adds default platform_data as fallback option so that the
driver can work on boards where no platform_data is provided.

Since not all boards have a thermistor hooked up, set temp_min to 0 and
change the health checks from temp <= temp_min to temp < temp_min to
not trigger on such boards (where temp reads 0).

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 60 +++++++++++++++++++++++++++++----
 include/linux/power/max17042_battery.h  |  6 +++-
 2 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index a51b2965cd5c..f0ff6e880ff2 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -150,12 +150,12 @@ static int max17042_get_battery_health(struct max17042_chip *chip, int *health)
 	if (ret < 0)
 		goto health_error;
 
-	if (temp <= chip->pdata->temp_min) {
+	if (temp < chip->pdata->temp_min) {
 		*health = POWER_SUPPLY_HEALTH_COLD;
 		goto out;
 	}
 
-	if (temp >= chip->pdata->temp_max) {
+	if (temp > chip->pdata->temp_max) {
 		*health = POWER_SUPPLY_HEALTH_OVERHEAT;
 		goto out;
 	}
@@ -772,8 +772,9 @@ static void max17042_init_worker(struct work_struct *work)
 
 #ifdef CONFIG_OF
 static struct max17042_platform_data *
-max17042_get_pdata(struct device *dev)
+max17042_get_pdata(struct max17042_chip *chip)
 {
+	struct device *dev = &chip->client->dev;
 	struct device_node *np = dev->of_node;
 	u32 prop;
 	struct max17042_platform_data *pdata;
@@ -806,10 +807,55 @@ max17042_get_pdata(struct device *dev)
 	return pdata;
 }
 #else
+static struct max17042_reg_data max17047_default_pdata_init_regs[] = {
+	/*
+	 * Some firmwares do not set FullSOCThr, Enable End-of-Charge Detection
+	 * when the voltage FG reports 95%, as recommended in the datasheet.
+	 */
+	{ MAX17047_FullSOCThr, MAX17042_BATTERY_FULL << 8 },
+};
+
 static struct max17042_platform_data *
-max17042_get_pdata(struct device *dev)
+max17042_get_pdata(struct max17042_chip *chip)
 {
-	return dev->platform_data;
+	struct device *dev = &chip->client->dev;
+	struct max17042_platform_data *pdata;
+	int ret, misc_cfg;
+
+	if (dev->platform_data)
+		return dev->platform_data;
+
+	/*
+	 * The MAX17047 gets used on x86 where we might not have pdata, assume
+	 * the firmware will already have initialized the fuel-gauge and provide
+	 * default values for the non init bits to make things work.
+	 */
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return pdata;
+
+	if (chip->chip_type != MAXIM_DEVICE_TYPE_MAX17042) {
+		pdata->init_data = max17047_default_pdata_init_regs;
+		pdata->num_init_data =
+			ARRAY_SIZE(max17047_default_pdata_init_regs);
+	}
+
+	ret = regmap_read(chip->regmap, MAX17042_MiscCFG, &misc_cfg);
+	if (ret < 0)
+		return NULL;
+
+	/* If bits 0-1 are set to 3 then only Voltage readings are used */
+	if ((misc_cfg & 0x3) == 0x3)
+		pdata->enable_current_sense = false;
+	else
+		pdata->enable_current_sense = true;
+
+	pdata->vmin = MAX17042_DEFAULT_VMIN;
+	pdata->vmax = MAX17042_DEFAULT_VMAX;
+	pdata->temp_min = MAX17042_DEFAULT_TEMP_MIN;
+	pdata->temp_max = MAX17042_DEFAULT_TEMP_MAX;
+
+	return pdata;
 }
 #endif
 
@@ -858,20 +904,20 @@ static int max17042_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	chip->client = client;
+	chip->chip_type = id->driver_data;
 	chip->regmap = devm_regmap_init_i2c(client, &max17042_regmap_config);
 	if (IS_ERR(chip->regmap)) {
 		dev_err(&client->dev, "Failed to initialize regmap\n");
 		return -EINVAL;
 	}
 
-	chip->pdata = max17042_get_pdata(&client->dev);
+	chip->pdata = max17042_get_pdata(chip);
 	if (!chip->pdata) {
 		dev_err(&client->dev, "no platform data provided\n");
 		return -EINVAL;
 	}
 
 	i2c_set_clientdata(client, chip);
-	chip->chip_type = id->driver_data;
 	psy_cfg.drv_data = chip;
 
 	/* When current is not measured,
diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index 522757ac9cd4..3489fb0f9099 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -24,8 +24,12 @@
 #define __MAX17042_BATTERY_H_
 
 #define MAX17042_STATUS_BattAbsent	(1 << 3)
-#define MAX17042_BATTERY_FULL	(100)
+#define MAX17042_BATTERY_FULL		(95)   /* Recommend. FullSOCThr value */
 #define MAX17042_DEFAULT_SNS_RESISTOR	(10000)
+#define MAX17042_DEFAULT_VMIN		(3000)
+#define MAX17042_DEFAULT_VMAX		(4500) /* LiHV cell max */
+#define MAX17042_DEFAULT_TEMP_MIN	(0)    /* For sys without temp sensor */
+#define MAX17042_DEFAULT_TEMP_MAX	(700)  /* 70 degrees Celcius */
 
 #define MAX17042_CHARACTERIZATION_DATA_SIZE 48
 
-- 
cgit v1.2.3


From a9df22c00d7c2c9c2944c62f1b819de6c214660f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:51 +0200
Subject: power: supply: max17042_battery: Add support for the STATUS property

Userspace prefers the driver having a status property over having to guess
itself. Specifically this will properly make the GNOME3 UI (and likely
others) properly show discharging / charging / full status, instead
of always showing discharging as status.

Note that in the case there is no charger driver supplying the max17042,
then a status of unknown will get returned. At least upower treats
this the same as not having a status attribute, so in this case nothing
changes from a userspace pov.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 46 +++++++++++++++++++++++++++++++++
 include/linux/power/max17042_battery.h  |  3 +++
 2 files changed, 49 insertions(+)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index f0ff6e880ff2..62efe7eeb3f8 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -76,6 +76,7 @@ struct max17042_chip {
 };
 
 static enum power_supply_property max17042_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_CYCLE_COUNT,
 	POWER_SUPPLY_PROP_VOLTAGE_MAX,
@@ -113,6 +114,46 @@ static int max17042_get_temperature(struct max17042_chip *chip, int *temp)
 	return 0;
 }
 
+static int max17042_get_status(struct max17042_chip *chip, int *status)
+{
+	int ret, charge_full, charge_now;
+
+	ret = power_supply_am_i_supplied(chip->battery);
+	if (ret < 0) {
+		*status = POWER_SUPPLY_STATUS_UNKNOWN;
+		return 0;
+	}
+	if (ret == 0) {
+		*status = POWER_SUPPLY_STATUS_DISCHARGING;
+		return 0;
+	}
+
+	/*
+	 * The MAX170xx has builtin end-of-charge detection and will update
+	 * FullCAP to match RepCap when it detects end of charging.
+	 *
+	 * When this cycle the battery gets charged to a higher (calculated)
+	 * capacity then the previous cycle then FullCAP will get updated
+	 * contineously once end-of-charge detection kicks in, so allow the
+	 * 2 to differ a bit.
+	 */
+
+	ret = regmap_read(chip->regmap, MAX17042_FullCAP, &charge_full);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_read(chip->regmap, MAX17042_RepCap, &charge_now);
+	if (ret < 0)
+		return ret;
+
+	if ((charge_full - charge_now) <= MAX17042_FULL_THRESHOLD)
+		*status = POWER_SUPPLY_STATUS_FULL;
+	else
+		*status = POWER_SUPPLY_STATUS_CHARGING;
+
+	return 0;
+}
+
 static int max17042_get_battery_health(struct max17042_chip *chip, int *health)
 {
 	int temp, vavg, vbatt, ret;
@@ -182,6 +223,11 @@ static int max17042_get_property(struct power_supply *psy,
 		return -EAGAIN;
 
 	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = max17042_get_status(chip, &val->intval);
+		if (ret < 0)
+			return ret;
+		break;
 	case POWER_SUPPLY_PROP_PRESENT:
 		ret = regmap_read(map, MAX17042_STATUS, &data);
 		if (ret < 0)
diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index 3489fb0f9099..a7ed29baf44a 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -31,6 +31,9 @@
 #define MAX17042_DEFAULT_TEMP_MIN	(0)    /* For sys without temp sensor */
 #define MAX17042_DEFAULT_TEMP_MAX	(700)  /* 70 degrees Celcius */
 
+/* Consider RepCap which is less then 10 units below FullCAP full */
+#define MAX17042_FULL_THRESHOLD		10
+
 #define MAX17042_CHARACTERIZATION_DATA_SIZE 48
 
 enum max17042_register {
-- 
cgit v1.2.3


From dcdddda8fdd23048f2eaf54a18a33e9f225d8fb6 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:52 +0200
Subject: power: supply: max17042_battery: Add external_power_changed callback

If our supplier changes status, chances are we've changed status too,
let any listeners know about this.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index 62efe7eeb3f8..d355ee54b06d 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -429,6 +429,11 @@ static int max17042_property_is_writeable(struct power_supply *psy,
 	return ret;
 }
 
+static void max17042_external_power_changed(struct power_supply *psy)
+{
+	power_supply_changed(psy);
+}
+
 static int max17042_write_verify_reg(struct regmap *map, u8 reg, u32 value)
 {
 	int retries = 8;
@@ -917,6 +922,7 @@ static const struct power_supply_desc max17042_psy_desc = {
 	.get_property	= max17042_get_property,
 	.set_property	= max17042_set_property,
 	.property_is_writeable	= max17042_property_is_writeable,
+	.external_power_changed	= max17042_external_power_changed,
 	.properties	= max17042_battery_props,
 	.num_properties	= ARRAY_SIZE(max17042_battery_props),
 };
-- 
cgit v1.2.3


From ef7fcdaebfce56bd7f1ac3b916b5d587a3666f10 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:53 +0200
Subject: power: supply: max17042_battery: Add support for the TECHNOLOGY
 attribute

The max17042 is intended for Li-Ion or Li-Po batteries, add a TECHNOLOGY
attribute to reflect this. Note this is hardcoded to Li-Ion as there is
no way to tell the difference, and Lithium-Ion Polymer batteries are
a sub-family of Lithium-Ion so Li-Ion technically is correct for both.

Using Li-Ion for both is already done by many drivers and is much
better then not providing any technology info at all.

Suggested-by: Wolfgang Wiedmeyer <wolfgit@wiedmeyer.de>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index d355ee54b06d..254550038455 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -78,6 +78,7 @@ struct max17042_chip {
 static enum power_supply_property max17042_battery_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
 	POWER_SUPPLY_PROP_CYCLE_COUNT,
 	POWER_SUPPLY_PROP_VOLTAGE_MAX,
 	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
@@ -238,6 +239,9 @@ static int max17042_get_property(struct power_supply *psy,
 		else
 			val->intval = 1;
 		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
+		break;
 	case POWER_SUPPLY_PROP_CYCLE_COUNT:
 		ret = regmap_read(map, MAX17042_Cycles, &data);
 		if (ret < 0)
-- 
cgit v1.2.3


From 7bfc9397ff9690fbb0da44e1a0aeab953cb73b77 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:54 +0200
Subject: power: supply: max17042_battery: Add support for the VOLT_MIN
 property

The info is there, so lets export it, like we already do for VOLT_MAX.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index 254550038455..0e94349b3855 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -81,6 +81,7 @@ static enum power_supply_property max17042_battery_props[] = {
 	POWER_SUPPLY_PROP_TECHNOLOGY,
 	POWER_SUPPLY_PROP_CYCLE_COUNT,
 	POWER_SUPPLY_PROP_VOLTAGE_MAX,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN,
 	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
@@ -257,6 +258,13 @@ static int max17042_get_property(struct power_supply *psy,
 		val->intval = data >> 8;
 		val->intval *= 20000; /* Units of LSB = 20mV */
 		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN:
+		ret = regmap_read(map, MAX17042_MinMaxVolt, &data);
+		if (ret < 0)
+			return ret;
+
+		val->intval = (data & 0xff) * 20000; /* Units of 20mV */
+		break;
 	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
 		if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042)
 			ret = regmap_read(map, MAX17042_V_empty, &data);
-- 
cgit v1.2.3


From d7d15fc67791c0e255f36345d02441c1716c2a1f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:55 +0200
Subject: power: supply: max17042_battery: mAh readings depend on r_sns value

The PROP_CHARGE_FULL code was hardcoded for the default sense
resistor of 0.010 Ohm, make it use r_sns which contains the
actual sense resistor value in micro-Ohms instead.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index 0e94349b3855..37e987f55833 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -220,6 +220,7 @@ static int max17042_get_property(struct power_supply *psy,
 	struct regmap *map = chip->regmap;
 	int ret;
 	u32 data;
+	u64 data64;
 
 	if (!chip->init_complete)
 		return -EAGAIN;
@@ -309,7 +310,9 @@ static int max17042_get_property(struct power_supply *psy,
 		if (ret < 0)
 			return ret;
 
-		val->intval = data * 1000 / 2;
+		data64 = data * 5000000ll;
+		do_div(data64, chip->pdata->r_sns);
+		val->intval = data64;
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_COUNTER:
 		ret = regmap_read(map, MAX17042_QH, &data);
-- 
cgit v1.2.3


From 2e015412a320e49aab4beaeb6262e93fdc0a42f2 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:56 +0200
Subject: power: supply: max17042_battery: Add support for the
 CHARGE_FULL_DESIGN property

The info is there, lets export it as a property.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index 37e987f55833..3838f09e013c 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -87,6 +87,7 @@ static enum power_supply_property max17042_battery_props[] = {
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
 	POWER_SUPPLY_PROP_VOLTAGE_OCV,
 	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
 	POWER_SUPPLY_PROP_CHARGE_FULL,
 	POWER_SUPPLY_PROP_CHARGE_COUNTER,
 	POWER_SUPPLY_PROP_TEMP,
@@ -305,6 +306,15 @@ static int max17042_get_property(struct power_supply *psy,
 
 		val->intval = data >> 8;
 		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+		ret = regmap_read(map, MAX17042_DesignCap, &data);
+		if (ret < 0)
+			return ret;
+
+		data64 = data * 5000000ll;
+		do_div(data64, chip->pdata->r_sns);
+		val->intval = data64;
+		break;
 	case POWER_SUPPLY_PROP_CHARGE_FULL:
 		ret = regmap_read(map, MAX17042_FullCAP, &data);
 		if (ret < 0)
-- 
cgit v1.2.3


From 6d6b61eafca94146cedf7709974a7423df16a6f1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:57 +0200
Subject: power: supply: max17042_battery: Add support for the CHARGE_NOW
 property

At least upower prefers the more precise charge_now sysfs value over
capacity and the max17042 has the info, so lets export it.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index 3838f09e013c..3e19797fec4b 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -89,6 +89,7 @@ static enum power_supply_property max17042_battery_props[] = {
 	POWER_SUPPLY_PROP_CAPACITY,
 	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
 	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
 	POWER_SUPPLY_PROP_CHARGE_COUNTER,
 	POWER_SUPPLY_PROP_TEMP,
 	POWER_SUPPLY_PROP_TEMP_ALERT_MIN,
@@ -320,6 +321,15 @@ static int max17042_get_property(struct power_supply *psy,
 		if (ret < 0)
 			return ret;
 
+		data64 = data * 5000000ll;
+		do_div(data64, chip->pdata->r_sns);
+		val->intval = data64;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+		ret = regmap_read(map, MAX17042_RepCap, &data);
+		if (ret < 0)
+			return ret;
+
 		data64 = data * 5000000ll;
 		do_div(data64, chip->pdata->r_sns);
 		val->intval = data64;
-- 
cgit v1.2.3


From adb69a3c4524693a32ba72533ee6d7a52e54989e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Apr 2017 20:32:58 +0200
Subject: power: supply: max17042_battery: Add support for the SCOPE property

Add support for the SCOPE property, always return SCOPE_SYSTEM,
as the max170xx is used for the main battery on all known systems
with a max170xx.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/max17042_battery.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index 3e19797fec4b..aecaaa2b0586 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -97,6 +97,7 @@ static enum power_supply_property max17042_battery_props[] = {
 	POWER_SUPPLY_PROP_TEMP_MIN,
 	POWER_SUPPLY_PROP_TEMP_MAX,
 	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_SCOPE,
 	POWER_SUPPLY_PROP_CURRENT_NOW,
 	POWER_SUPPLY_PROP_CURRENT_AVG,
 };
@@ -371,6 +372,9 @@ static int max17042_get_property(struct power_supply *psy,
 		if (ret < 0)
 			return ret;
 		break;
+	case POWER_SUPPLY_PROP_SCOPE:
+		val->intval = POWER_SUPPLY_SCOPE_SYSTEM;
+		break;
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
 		if (chip->pdata->enable_current_sense) {
 			ret = regmap_read(map, MAX17042_Current, &data);
-- 
cgit v1.2.3


From 633e8799ddc09431be2744c4a1efdbda13af2b0b Mon Sep 17 00:00:00 2001
From: Michael Trimarchi <michael@amarulasolutions.com>
Date: Tue, 25 Apr 2017 15:18:05 +0200
Subject: power: supply: pda_power: move from timer to delayed_work

This changed is needed to avoid locking problem during
boot as shown:

<5>[    8.824096] Registering SWP/SWPB emulation handler
<6>[    8.977294] clock: disabling unused clocks to save power
<3>[    9.108154] BUG: sleeping function called from invalid context at kernel_albert/kernel/mutex.c:269
<3>[    9.122894] in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: swapper/0
<4>[    9.130249] 3 locks held by swapper/0/1:
<4>[    9.134613]  #0:  (&__lockdep_no_validate__){......}, at: [<c0342430>] __driver_attach+0x58/0xa8
<4>[    9.144500]  #1:  (&__lockdep_no_validate__){......}, at: [<c0342440>] __driver_attach+0x68/0xa8
<4>[    9.154357]  #2:  (&polling_timer){......}, at: [<c0053770>] run_timer_softirq+0x108/0x3ec
<4>[    9.163726] Backtrace:
<4>[    9.166473] [<c001269c>] (dump_backtrace+0x0/0x114) from [<c067e5f0>] (dump_stack+0x20/0x24)
<4>[    9.175811]  r6:00203230 r5:0000010d r4:d782e000 r3:60000113
<4>[    9.182250] [<c067e5d0>] (dump_stack+0x0/0x24) from [<c007441c>] (__might_sleep+0x10c/0x128)
<4>[    9.191650] [<c0074310>] (__might_sleep+0x0/0x128) from [<c0688f60>] (mutex_lock_nested+0x34/0x36c)
<4>[    9.201660]  r5:c02d5350 r4:d79a0c64
<4>[    9.205688] [<c0688f2c>] (mutex_lock_nested+0x0/0x36c) from [<c02d5350>] (regulator_set_current_limit+0x30/0x118)
<4>[    9.217071] [<c02d5320>] (regulator_set_current_limit+0x0/0x118) from [<c0435ce0>] (update_charger+0x84/0xc4)
<4>[    9.228027]  r7:d782fb20 r6:00000101 r5:c1767e94 r4:00000000
<4>[    9.234436] [<c0435c5c>] (update_charger+0x0/0xc4) from [<c0435d40>] (psy_changed+0x20/0x48)
<4>[    9.243804]  r5:d782e000 r4:c1767e94
<4>[    9.247802] [<c0435d20>] (psy_changed+0x0/0x48) from [<c0435dec>] (polling_timer_func+0x84/0xb8)
<4>[    9.257537]  r4:c1767e94 r3:00000002
<4>[    9.261566] [<c0435d68>] (polling_timer_func+0x0/0xb8) from [<c00537e4>] (run_timer_softirq+0x17c/0x3ec)
<4>[    9.272033]  r4:c1767eb0 r3:00000000
<4>[    9.276062] [<c0053668>] (run_timer_softirq+0x0/0x3ec) from [<c004b000>] (__do_softirq+0xf0/0x298)
<4>[    9.286010] [<c004af10>] (__do_softirq+0x0/0x298) from [<c004b650>] (irq_exit+0x98/0xa0)
<4>[    9.295013] [<c004b5b8>] (irq_exit+0x0/0xa0) from [<c000edbc>] (handle_IRQ+0x60/0xc0)
<4>[    9.303680]  r4:c1194e98 r3:c00bc778
<4>[    9.307708] [<c000ed5c>] (handle_IRQ+0x0/0xc0) from [<c0008504>] (gic_handle_irq+0x34/0x68)
<4>[    9.316955]  r8:000ac383 r7:d782fc3c r6:d782fc08 r5:c11936c4 r4:e0802100
<4>[    9.324310] r3:c026ba48
<4>[    9.327301] [<c00084d0>] (gic_handle_irq+0x0/0x68) from [<c068c2c0>] (__irq_svc+0x40/0x74)
<4>[    9.336456] Exception stack(0xd782fc08 to 0xd782fc50)
<4>[    9.342041] fc00:                   d6e30e6c ac383627 00000000 ac383417 ea19c000 ea200000
<4>[    9.351104] fc20: beffffff 00000667 000ac383 d6e30670 d6e3066c d782fc94 d782fbe8 d782fc50
<4>[    9.360168] fc40: c026ba48 c001d1f0 00000113 ffffffff

Fixes: b2998049cfae ("[BATTERY] pda_power platform driver")
Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com>
Signed-off-by: Anthony Brandon <anthony@amarulasolutions.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/pda_power.c | 49 ++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/drivers/power/supply/pda_power.c b/drivers/power/supply/pda_power.c
index dfe1ee89f7c7..922a86787c5c 100644
--- a/drivers/power/supply/pda_power.c
+++ b/drivers/power/supply/pda_power.c
@@ -30,9 +30,9 @@ static inline unsigned int get_irq_flags(struct resource *res)
 static struct device *dev;
 static struct pda_power_pdata *pdata;
 static struct resource *ac_irq, *usb_irq;
-static struct timer_list charger_timer;
-static struct timer_list supply_timer;
-static struct timer_list polling_timer;
+static struct delayed_work charger_work;
+static struct delayed_work polling_work;
+static struct delayed_work supply_work;
 static int polling;
 static struct power_supply *pda_psy_ac, *pda_psy_usb;
 
@@ -140,7 +140,7 @@ static void update_charger(void)
 	}
 }
 
-static void supply_timer_func(unsigned long unused)
+static void supply_work_func(struct work_struct *work)
 {
 	if (ac_status == PDA_PSY_TO_CHANGE) {
 		ac_status = new_ac_status;
@@ -161,11 +161,12 @@ static void psy_changed(void)
 	 * Okay, charger set. Now wait a bit before notifying supplicants,
 	 * charge power should stabilize.
 	 */
-	mod_timer(&supply_timer,
-		  jiffies + msecs_to_jiffies(pdata->wait_for_charger));
+	cancel_delayed_work(&supply_work);
+	schedule_delayed_work(&supply_work,
+			      msecs_to_jiffies(pdata->wait_for_charger));
 }
 
-static void charger_timer_func(unsigned long unused)
+static void charger_work_func(struct work_struct *work)
 {
 	update_status();
 	psy_changed();
@@ -184,13 +185,14 @@ static irqreturn_t power_changed_isr(int irq, void *power_supply)
 	 * Wait a bit before reading ac/usb line status and setting charger,
 	 * because ac/usb status readings may lag from irq.
 	 */
-	mod_timer(&charger_timer,
-		  jiffies + msecs_to_jiffies(pdata->wait_for_status));
+	cancel_delayed_work(&charger_work);
+	schedule_delayed_work(&charger_work,
+			      msecs_to_jiffies(pdata->wait_for_status));
 
 	return IRQ_HANDLED;
 }
 
-static void polling_timer_func(unsigned long unused)
+static void polling_work_func(struct work_struct *work)
 {
 	int changed = 0;
 
@@ -211,8 +213,9 @@ static void polling_timer_func(unsigned long unused)
 	if (changed)
 		psy_changed();
 
-	mod_timer(&polling_timer,
-		  jiffies + msecs_to_jiffies(pdata->polling_interval));
+	cancel_delayed_work(&polling_work);
+	schedule_delayed_work(&polling_work,
+			      msecs_to_jiffies(pdata->polling_interval));
 }
 
 #if IS_ENABLED(CONFIG_USB_PHY)
@@ -250,8 +253,9 @@ static int otg_handle_notification(struct notifier_block *nb,
 	 * Wait a bit before reading ac/usb line status and setting charger,
 	 * because ac/usb status readings may lag from irq.
 	 */
-	mod_timer(&charger_timer,
-		  jiffies + msecs_to_jiffies(pdata->wait_for_status));
+	cancel_delayed_work(&charger_work);
+	schedule_delayed_work(&charger_work,
+			      msecs_to_jiffies(pdata->wait_for_status));
 
 	return NOTIFY_OK;
 }
@@ -300,8 +304,8 @@ static int pda_power_probe(struct platform_device *pdev)
 	if (!pdata->ac_max_uA)
 		pdata->ac_max_uA = 500000;
 
-	setup_timer(&charger_timer, charger_timer_func, 0);
-	setup_timer(&supply_timer, supply_timer_func, 0);
+	INIT_DELAYED_WORK(&charger_work, charger_work_func);
+	INIT_DELAYED_WORK(&supply_work, supply_work_func);
 
 	ac_irq = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "ac");
 	usb_irq = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "usb");
@@ -385,9 +389,10 @@ static int pda_power_probe(struct platform_device *pdev)
 
 	if (polling) {
 		dev_dbg(dev, "will poll for status\n");
-		setup_timer(&polling_timer, polling_timer_func, 0);
-		mod_timer(&polling_timer,
-			  jiffies + msecs_to_jiffies(pdata->polling_interval));
+		INIT_DELAYED_WORK(&polling_work, polling_work_func);
+		cancel_delayed_work(&polling_work);
+		schedule_delayed_work(&polling_work,
+				      msecs_to_jiffies(pdata->polling_interval));
 	}
 
 	if (ac_irq || usb_irq)
@@ -433,9 +438,9 @@ static int pda_power_remove(struct platform_device *pdev)
 		free_irq(ac_irq->start, pda_psy_ac);
 
 	if (polling)
-		del_timer_sync(&polling_timer);
-	del_timer_sync(&charger_timer);
-	del_timer_sync(&supply_timer);
+		cancel_delayed_work_sync(&polling_work);
+	cancel_delayed_work_sync(&charger_work);
+	cancel_delayed_work_sync(&supply_work);
 
 	if (pdata->is_usb_online)
 		power_supply_unregister(pda_psy_usb);
-- 
cgit v1.2.3


From 4df2cce4722d35eb35bb9371e7c9a600a84558f9 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <contact@paulk.fr>
Date: Tue, 25 Apr 2017 17:09:04 +0200
Subject: power: supply: sbs-battery: Don't ignore the first external power
 change

A mechanism to ignore the first external power change notification was
put in place years ago to ignore the power_supply_register notification.

However, this doesn't apply to the current situation anymore, as the
first notification is always the result of a legitimate power change.

This removes this deprecated mechanism, which puts back the driver's
state machine to a sane state (an ignored first notification previously
caused a charging/discharging status inversion).

Signed-off-by: Paul Kocialkowski <contact@paulk.fr>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/sbs-battery.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/power/supply/sbs-battery.c b/drivers/power/supply/sbs-battery.c
index e07d570f504f..7fd0f308189f 100644
--- a/drivers/power/supply/sbs-battery.c
+++ b/drivers/power/supply/sbs-battery.c
@@ -171,7 +171,6 @@ struct sbs_info {
 	u32				i2c_retry_count;
 	u32				poll_retry_count;
 	struct delayed_work		work;
-	int				ignore_changes;
 };
 
 static char model_name[I2C_SMBUS_BLOCK_MAX + 1];
@@ -694,11 +693,6 @@ static void sbs_external_power_changed(struct power_supply *psy)
 {
 	struct sbs_info *chip = power_supply_get_drvdata(psy);
 
-	if (chip->ignore_changes > 0) {
-		chip->ignore_changes--;
-		return;
-	}
-
 	/* cancel outstanding work */
 	cancel_delayed_work_sync(&chip->work);
 
@@ -775,10 +769,6 @@ static int sbs_probe(struct i2c_client *client,
 	chip->enable_detection = false;
 	psy_cfg.of_node = client->dev.of_node;
 	psy_cfg.drv_data = chip;
-	/* ignore first notification of external change, it is generated
-	 * from the power_supply_register call back
-	 */
-	chip->ignore_changes = 1;
 	chip->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
 
 	/* use pdata if available, fall back to DT properties,
-- 
cgit v1.2.3


From 7f93e1fa032bb5ee19b868b9649bc98c82553003 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <contact@paulk.fr>
Date: Tue, 25 Apr 2017 17:09:05 +0200
Subject: power: supply: sbs-battery: Correct supply status with current draw

The status reported directly by the battery controller is not always
reliable and should be corrected based on the current draw information.

This implements such a correction with a dedicated function, called
where the supply status is retrieved.

Signed-off-by: Paul Kocialkowski <contact@paulk.fr>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/sbs-battery.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/power/supply/sbs-battery.c b/drivers/power/supply/sbs-battery.c
index 7fd0f308189f..171c701a2d49 100644
--- a/drivers/power/supply/sbs-battery.c
+++ b/drivers/power/supply/sbs-battery.c
@@ -295,6 +295,31 @@ static int sbs_write_word_data(struct i2c_client *client, u8 address,
 	return 0;
 }
 
+static int sbs_status_correct(struct i2c_client *client, int *intval)
+{
+	int ret;
+
+	ret = sbs_read_word_data(client, sbs_data[REG_CURRENT].addr);
+	if (ret < 0)
+		return ret;
+
+	ret = (s16)ret;
+
+	/* Not drawing current means full (cannot be not charging) */
+	if (ret == 0)
+		*intval = POWER_SUPPLY_STATUS_FULL;
+
+	if (*intval == POWER_SUPPLY_STATUS_FULL) {
+		/* Drawing or providing current when full */
+		if (ret > 0)
+			*intval = POWER_SUPPLY_STATUS_CHARGING;
+		else if (ret < 0)
+			*intval = POWER_SUPPLY_STATUS_DISCHARGING;
+	}
+
+	return 0;
+}
+
 static int sbs_get_battery_presence_and_health(
 	struct i2c_client *client, enum power_supply_property psp,
 	union power_supply_propval *val)
@@ -401,6 +426,8 @@ static int sbs_get_battery_property(struct i2c_client *client,
 		else
 			val->intval = POWER_SUPPLY_STATUS_CHARGING;
 
+		sbs_status_correct(client, &val->intval);
+
 		if (chip->poll_time == 0)
 			chip->last_state = val->intval;
 		else if (chip->last_state != val->intval) {
@@ -721,6 +748,8 @@ static void sbs_delayed_work(struct work_struct *work)
 	else
 		ret = POWER_SUPPLY_STATUS_CHARGING;
 
+	sbs_status_correct(chip->client, &ret);
+
 	if (chip->last_state != ret) {
 		chip->poll_time = 0;
 		power_supply_changed(chip->power_supply);
-- 
cgit v1.2.3


From e9574c14fd11ba254633342ff8ea58664c2938e1 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Thu, 27 Apr 2017 17:30:07 +0200
Subject: power: supply: avoid unused twl4030-madc.h

Avoid inclusion of unused twl4030-madc.h. This
will allow twl4030-madc.h to be merged into the
iio driver.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/rx51_battery.c         | 1 -
 drivers/power/supply/twl4030_madc_battery.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/power/supply/rx51_battery.c b/drivers/power/supply/rx51_battery.c
index af9383d23d12..5654708b1279 100644
--- a/drivers/power/supply/rx51_battery.c
+++ b/drivers/power/supply/rx51_battery.c
@@ -23,7 +23,6 @@
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/slab.h>
-#include <linux/i2c/twl4030-madc.h>
 #include <linux/iio/consumer.h>
 #include <linux/of.h>
 
diff --git a/drivers/power/supply/twl4030_madc_battery.c b/drivers/power/supply/twl4030_madc_battery.c
index f5817e422d64..4d41acb98576 100644
--- a/drivers/power/supply/twl4030_madc_battery.c
+++ b/drivers/power/supply/twl4030_madc_battery.c
@@ -17,7 +17,6 @@
 #include <linux/power_supply.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
-#include <linux/i2c/twl4030-madc.h>
 #include <linux/power/twl4030_madc_battery.h>
 #include <linux/iio/consumer.h>
 
-- 
cgit v1.2.3


From cad87f097ca2d56093145c4becd487f5c186a612 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Mon, 1 May 2017 13:55:25 +0200
Subject: mailmap: add Sebastian Reichel

Add two entries to map to my primary kernel address.

Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 .mailmap | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.mailmap b/.mailmap
index 67dc22ffc9a8..979c2c96636e 100644
--- a/.mailmap
+++ b/.mailmap
@@ -143,6 +143,8 @@ Santosh Shilimkar <ssantosh@kernel.org>
 Santosh Shilimkar <santosh.shilimkar@oracle.org>
 Sascha Hauer <s.hauer@pengutronix.de>
 S.Çağlar Onur <caglar@pardus.org.tr>
+Sebastian Reichel <sre@kernel.org> <sre@debian.org>
+Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
 Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
 Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
 Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
-- 
cgit v1.2.3


From cda3b01368391b48728b7edbbac0ca873b514367 Mon Sep 17 00:00:00 2001
From: Phil Reid <preid@electromag.com.au>
Date: Mon, 1 May 2017 16:49:58 +0800
Subject: power: supply: sbs-battery: Add alert callback

To simplify the sbs-manager code and notification of battery removal
use the i2c alert callback to notify the sbs-battery driver that an
event has occurred.

Signed-off-by: Phil Reid <preid@electromag.com.au>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/sbs-battery.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/sbs-battery.c b/drivers/power/supply/sbs-battery.c
index 171c701a2d49..e3a114e60f1a 100644
--- a/drivers/power/supply/sbs-battery.c
+++ b/drivers/power/supply/sbs-battery.c
@@ -701,21 +701,30 @@ done:
 	return 0;
 }
 
-static irqreturn_t sbs_irq(int irq, void *devid)
+static void sbs_supply_changed(struct sbs_info *chip)
 {
-	struct sbs_info *chip = devid;
 	struct power_supply *battery = chip->power_supply;
 	int ret;
 
 	ret = gpiod_get_value_cansleep(chip->gpio_detect);
 	if (ret < 0)
-		return ret;
+		return;
 	chip->is_present = ret;
 	power_supply_changed(battery);
+}
 
+static irqreturn_t sbs_irq(int irq, void *devid)
+{
+	sbs_supply_changed(devid);
 	return IRQ_HANDLED;
 }
 
+static void sbs_alert(struct i2c_client *client, enum i2c_alert_protocol prot,
+	unsigned int data)
+{
+	sbs_supply_changed(i2c_get_clientdata(client));
+}
+
 static void sbs_external_power_changed(struct power_supply *psy)
 {
 	struct sbs_info *chip = power_supply_get_drvdata(psy);
@@ -936,6 +945,7 @@ MODULE_DEVICE_TABLE(of, sbs_dt_ids);
 static struct i2c_driver sbs_battery_driver = {
 	.probe		= sbs_probe,
 	.remove		= sbs_remove,
+	.alert		= sbs_alert,
 	.id_table	= sbs_id,
 	.driver = {
 		.name	= "sbs-battery",
-- 
cgit v1.2.3


From 0d43d7ab277a048c4b1455ee00030933e1bd5fa3 Mon Sep 17 00:00:00 2001
From: Ludovic Barre <ludovic.barre@st.com>
Date: Thu, 13 Apr 2017 19:15:57 +0200
Subject: mtd: spi-nor: add driver for STM32 quad spi flash controller

The quadspi is a specialized communication interface targeting single,
dual or quad SPI Flash memories.

It can operate in any of the following modes:
-indirect mode: all the operations are performed using the quadspi
 registers
-read memory-mapped mode: the external Flash memory is mapped to the
 microcontroller address space and is seen by the system as if it was
 an internal memory

Signed-off-by: Ludovic Barre <ludovic.barre@st.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/Kconfig         |   7 +
 drivers/mtd/spi-nor/Makefile        |   1 +
 drivers/mtd/spi-nor/stm32-quadspi.c | 693 ++++++++++++++++++++++++++++++++++++
 3 files changed, 701 insertions(+)
 create mode 100644 drivers/mtd/spi-nor/stm32-quadspi.c

diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
index 7252087ef407..bfdfb1e72b38 100644
--- a/drivers/mtd/spi-nor/Kconfig
+++ b/drivers/mtd/spi-nor/Kconfig
@@ -106,4 +106,11 @@ config SPI_INTEL_SPI_PLATFORM
 	  To compile this driver as a module, choose M here: the module
 	  will be called intel-spi-platform.
 
+config SPI_STM32_QUADSPI
+	tristate "STM32 Quad SPI controller"
+	depends on ARCH_STM32
+	help
+	  This enables support for the STM32 Quad SPI controller.
+	  We only connect the NOR to this controller.
+
 endif # MTD_SPI_NOR
diff --git a/drivers/mtd/spi-nor/Makefile b/drivers/mtd/spi-nor/Makefile
index 72238a793198..285aab86c7ca 100644
--- a/drivers/mtd/spi-nor/Makefile
+++ b/drivers/mtd/spi-nor/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_MTD_MT81xx_NOR)    += mtk-quadspi.o
 obj-$(CONFIG_SPI_NXP_SPIFI)	+= nxp-spifi.o
 obj-$(CONFIG_SPI_INTEL_SPI)	+= intel-spi.o
 obj-$(CONFIG_SPI_INTEL_SPI_PLATFORM)	+= intel-spi-platform.o
+obj-$(CONFIG_SPI_STM32_QUADSPI)	+= stm32-quadspi.o
\ No newline at end of file
diff --git a/drivers/mtd/spi-nor/stm32-quadspi.c b/drivers/mtd/spi-nor/stm32-quadspi.c
new file mode 100644
index 000000000000..ae45f81b8cd3
--- /dev/null
+++ b/drivers/mtd/spi-nor/stm32-quadspi.c
@@ -0,0 +1,693 @@
+/*
+ * stm32_quadspi.c
+ *
+ * Copyright (C) 2017, Ludovic Barre
+ *
+ * License terms: GNU General Public License (GPL), version 2
+ */
+#include <linux/clk.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/spi-nor.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#define QUADSPI_CR		0x00
+#define CR_EN			BIT(0)
+#define CR_ABORT		BIT(1)
+#define CR_DMAEN		BIT(2)
+#define CR_TCEN			BIT(3)
+#define CR_SSHIFT		BIT(4)
+#define CR_DFM			BIT(6)
+#define CR_FSEL			BIT(7)
+#define CR_FTHRES_SHIFT		8
+#define CR_FTHRES_MASK		GENMASK(12, 8)
+#define CR_FTHRES(n)		(((n) << CR_FTHRES_SHIFT) & CR_FTHRES_MASK)
+#define CR_TEIE			BIT(16)
+#define CR_TCIE			BIT(17)
+#define CR_FTIE			BIT(18)
+#define CR_SMIE			BIT(19)
+#define CR_TOIE			BIT(20)
+#define CR_PRESC_SHIFT		24
+#define CR_PRESC_MASK		GENMASK(31, 24)
+#define CR_PRESC(n)		(((n) << CR_PRESC_SHIFT) & CR_PRESC_MASK)
+
+#define QUADSPI_DCR		0x04
+#define DCR_CSHT_SHIFT		8
+#define DCR_CSHT_MASK		GENMASK(10, 8)
+#define DCR_CSHT(n)		(((n) << DCR_CSHT_SHIFT) & DCR_CSHT_MASK)
+#define DCR_FSIZE_SHIFT		16
+#define DCR_FSIZE_MASK		GENMASK(20, 16)
+#define DCR_FSIZE(n)		(((n) << DCR_FSIZE_SHIFT) & DCR_FSIZE_MASK)
+
+#define QUADSPI_SR		0x08
+#define SR_TEF			BIT(0)
+#define SR_TCF			BIT(1)
+#define SR_FTF			BIT(2)
+#define SR_SMF			BIT(3)
+#define SR_TOF			BIT(4)
+#define SR_BUSY			BIT(5)
+#define SR_FLEVEL_SHIFT		8
+#define SR_FLEVEL_MASK		GENMASK(13, 8)
+
+#define QUADSPI_FCR		0x0c
+#define FCR_CTCF		BIT(1)
+
+#define QUADSPI_DLR		0x10
+
+#define QUADSPI_CCR		0x14
+#define CCR_INST_SHIFT		0
+#define CCR_INST_MASK		GENMASK(7, 0)
+#define CCR_INST(n)		(((n) << CCR_INST_SHIFT) & CCR_INST_MASK)
+#define CCR_IMODE_NONE		(0U << 8)
+#define CCR_IMODE_1		(1U << 8)
+#define CCR_IMODE_2		(2U << 8)
+#define CCR_IMODE_4		(3U << 8)
+#define CCR_ADMODE_NONE		(0U << 10)
+#define CCR_ADMODE_1		(1U << 10)
+#define CCR_ADMODE_2		(2U << 10)
+#define CCR_ADMODE_4		(3U << 10)
+#define CCR_ADSIZE_SHIFT	12
+#define CCR_ADSIZE_MASK		GENMASK(13, 12)
+#define CCR_ADSIZE(n)		(((n) << CCR_ADSIZE_SHIFT) & CCR_ADSIZE_MASK)
+#define CCR_ABMODE_NONE		(0U << 14)
+#define CCR_ABMODE_1		(1U << 14)
+#define CCR_ABMODE_2		(2U << 14)
+#define CCR_ABMODE_4		(3U << 14)
+#define CCR_ABSIZE_8		(0U << 16)
+#define CCR_ABSIZE_16		(1U << 16)
+#define CCR_ABSIZE_24		(2U << 16)
+#define CCR_ABSIZE_32		(3U << 16)
+#define CCR_DCYC_SHIFT		18
+#define CCR_DCYC_MASK		GENMASK(22, 18)
+#define CCR_DCYC(n)		(((n) << CCR_DCYC_SHIFT) & CCR_DCYC_MASK)
+#define CCR_DMODE_NONE		(0U << 24)
+#define CCR_DMODE_1		(1U << 24)
+#define CCR_DMODE_2		(2U << 24)
+#define CCR_DMODE_4		(3U << 24)
+#define CCR_FMODE_INDW		(0U << 26)
+#define CCR_FMODE_INDR		(1U << 26)
+#define CCR_FMODE_APM		(2U << 26)
+#define CCR_FMODE_MM		(3U << 26)
+
+#define QUADSPI_AR		0x18
+#define QUADSPI_ABR		0x1c
+#define QUADSPI_DR		0x20
+#define QUADSPI_PSMKR		0x24
+#define QUADSPI_PSMAR		0x28
+#define QUADSPI_PIR		0x2c
+#define QUADSPI_LPTR		0x30
+#define LPTR_DFT_TIMEOUT	0x10
+
+#define FSIZE_VAL(size)		(__fls(size) - 1)
+
+#define STM32_MAX_MMAP_SZ	SZ_256M
+#define STM32_MAX_NORCHIP	2
+
+#define STM32_QSPI_FIFO_TIMEOUT_US 30000
+#define STM32_QSPI_BUSY_TIMEOUT_US 100000
+
+struct stm32_qspi_flash {
+	struct spi_nor nor;
+	struct stm32_qspi *qspi;
+	u32 cs;
+	u32 fsize;
+	u32 presc;
+	u32 read_mode;
+	bool registered;
+};
+
+struct stm32_qspi {
+	struct device *dev;
+	void __iomem *io_base;
+	void __iomem *mm_base;
+	resource_size_t mm_size;
+	u32 nor_num;
+	struct clk *clk;
+	u32 clk_rate;
+	struct stm32_qspi_flash flash[STM32_MAX_NORCHIP];
+	struct completion cmd_completion;
+
+	/*
+	 * to protect device configuration, could be different between
+	 * 2 flash access (bk1, bk2)
+	 */
+	struct mutex lock;
+};
+
+struct stm32_qspi_cmd {
+	u8 addr_width;
+	u8 dummy;
+	bool tx_data;
+	u8 opcode;
+	u32 framemode;
+	u32 qspimode;
+	u32 addr;
+	size_t len;
+	void *buf;
+};
+
+static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi)
+{
+	u32 cr;
+	int err = 0;
+
+	if (readl_relaxed(qspi->io_base + QUADSPI_SR) & SR_TCF)
+		return 0;
+
+	reinit_completion(&qspi->cmd_completion);
+	cr = readl_relaxed(qspi->io_base + QUADSPI_CR);
+	writel_relaxed(cr | CR_TCIE, qspi->io_base + QUADSPI_CR);
+
+	if (!wait_for_completion_interruptible_timeout(&qspi->cmd_completion,
+						       msecs_to_jiffies(1000)))
+		err = -ETIMEDOUT;
+
+	writel_relaxed(cr, qspi->io_base + QUADSPI_CR);
+	return err;
+}
+
+static int stm32_qspi_wait_nobusy(struct stm32_qspi *qspi)
+{
+	u32 sr;
+
+	return readl_relaxed_poll_timeout(qspi->io_base + QUADSPI_SR, sr,
+					  !(sr & SR_BUSY), 10,
+					  STM32_QSPI_BUSY_TIMEOUT_US);
+}
+
+static void stm32_qspi_set_framemode(struct spi_nor *nor,
+				     struct stm32_qspi_cmd *cmd, bool read)
+{
+	u32 dmode = CCR_DMODE_1;
+
+	cmd->framemode = CCR_IMODE_1;
+
+	if (read) {
+		switch (nor->flash_read) {
+		case SPI_NOR_NORMAL:
+		case SPI_NOR_FAST:
+			dmode = CCR_DMODE_1;
+			break;
+		case SPI_NOR_DUAL:
+			dmode = CCR_DMODE_2;
+			break;
+		case SPI_NOR_QUAD:
+			dmode = CCR_DMODE_4;
+			break;
+		}
+	}
+
+	cmd->framemode |= cmd->tx_data ? dmode : 0;
+	cmd->framemode |= cmd->addr_width ? CCR_ADMODE_1 : 0;
+}
+
+static void stm32_qspi_read_fifo(u8 *val, void __iomem *addr)
+{
+	*val = readb_relaxed(addr);
+}
+
+static void stm32_qspi_write_fifo(u8 *val, void __iomem *addr)
+{
+	writeb_relaxed(*val, addr);
+}
+
+static int stm32_qspi_tx_poll(struct stm32_qspi *qspi,
+			      const struct stm32_qspi_cmd *cmd)
+{
+	void (*tx_fifo)(u8 *, void __iomem *);
+	u32 len = cmd->len, sr;
+	u8 *buf = cmd->buf;
+	int ret;
+
+	if (cmd->qspimode == CCR_FMODE_INDW)
+		tx_fifo = stm32_qspi_write_fifo;
+	else
+		tx_fifo = stm32_qspi_read_fifo;
+
+	while (len--) {
+		ret = readl_relaxed_poll_timeout(qspi->io_base + QUADSPI_SR,
+						 sr, (sr & SR_FTF), 10,
+						 STM32_QSPI_FIFO_TIMEOUT_US);
+		if (ret) {
+			dev_err(qspi->dev, "fifo timeout (stat:%#x)\n", sr);
+			break;
+		}
+		tx_fifo(buf++, qspi->io_base + QUADSPI_DR);
+	}
+
+	return ret;
+}
+
+static int stm32_qspi_tx_mm(struct stm32_qspi *qspi,
+			    const struct stm32_qspi_cmd *cmd)
+{
+	memcpy_fromio(cmd->buf, qspi->mm_base + cmd->addr, cmd->len);
+	return 0;
+}
+
+static int stm32_qspi_tx(struct stm32_qspi *qspi,
+			 const struct stm32_qspi_cmd *cmd)
+{
+	if (!cmd->tx_data)
+		return 0;
+
+	if (cmd->qspimode == CCR_FMODE_MM)
+		return stm32_qspi_tx_mm(qspi, cmd);
+
+	return stm32_qspi_tx_poll(qspi, cmd);
+}
+
+static int stm32_qspi_send(struct stm32_qspi_flash *flash,
+			   const struct stm32_qspi_cmd *cmd)
+{
+	struct stm32_qspi *qspi = flash->qspi;
+	u32 ccr, dcr, cr;
+	int err;
+
+	err = stm32_qspi_wait_nobusy(qspi);
+	if (err)
+		goto abort;
+
+	dcr = readl_relaxed(qspi->io_base + QUADSPI_DCR) & ~DCR_FSIZE_MASK;
+	dcr |= DCR_FSIZE(flash->fsize);
+	writel_relaxed(dcr, qspi->io_base + QUADSPI_DCR);
+
+	cr = readl_relaxed(qspi->io_base + QUADSPI_CR);
+	cr &= ~CR_PRESC_MASK & ~CR_FSEL;
+	cr |= CR_PRESC(flash->presc);
+	cr |= flash->cs ? CR_FSEL : 0;
+	writel_relaxed(cr, qspi->io_base + QUADSPI_CR);
+
+	if (cmd->tx_data)
+		writel_relaxed(cmd->len - 1, qspi->io_base + QUADSPI_DLR);
+
+	ccr = cmd->framemode | cmd->qspimode;
+
+	if (cmd->dummy)
+		ccr |= CCR_DCYC(cmd->dummy);
+
+	if (cmd->addr_width)
+		ccr |= CCR_ADSIZE(cmd->addr_width - 1);
+
+	ccr |= CCR_INST(cmd->opcode);
+	writel_relaxed(ccr, qspi->io_base + QUADSPI_CCR);
+
+	if (cmd->addr_width && cmd->qspimode != CCR_FMODE_MM)
+		writel_relaxed(cmd->addr, qspi->io_base + QUADSPI_AR);
+
+	err = stm32_qspi_tx(qspi, cmd);
+	if (err)
+		goto abort;
+
+	if (cmd->qspimode != CCR_FMODE_MM) {
+		err = stm32_qspi_wait_cmd(qspi);
+		if (err)
+			goto abort;
+		writel_relaxed(FCR_CTCF, qspi->io_base + QUADSPI_FCR);
+	}
+
+	return err;
+
+abort:
+	cr = readl_relaxed(qspi->io_base + QUADSPI_CR) | CR_ABORT;
+	writel_relaxed(cr, qspi->io_base + QUADSPI_CR);
+
+	dev_err(qspi->dev, "%s abort err:%d\n", __func__, err);
+	return err;
+}
+
+static int stm32_qspi_read_reg(struct spi_nor *nor,
+			       u8 opcode, u8 *buf, int len)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct device *dev = flash->qspi->dev;
+	struct stm32_qspi_cmd cmd;
+
+	dev_dbg(dev, "read_reg: cmd:%#.2x buf:%p len:%#x\n", opcode, buf, len);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = opcode;
+	cmd.tx_data = true;
+	cmd.len = len;
+	cmd.buf = buf;
+	cmd.qspimode = CCR_FMODE_INDR;
+
+	stm32_qspi_set_framemode(nor, &cmd, false);
+
+	return stm32_qspi_send(flash, &cmd);
+}
+
+static int stm32_qspi_write_reg(struct spi_nor *nor, u8 opcode,
+				u8 *buf, int len)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct device *dev = flash->qspi->dev;
+	struct stm32_qspi_cmd cmd;
+
+	dev_dbg(dev, "write_reg: cmd:%#.2x buf:%p len:%#x\n", opcode, buf, len);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = opcode;
+	cmd.tx_data = !!(buf && len > 0);
+	cmd.len = len;
+	cmd.buf = buf;
+	cmd.qspimode = CCR_FMODE_INDW;
+
+	stm32_qspi_set_framemode(nor, &cmd, false);
+
+	return stm32_qspi_send(flash, &cmd);
+}
+
+static ssize_t stm32_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
+			       u_char *buf)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct stm32_qspi *qspi = flash->qspi;
+	struct stm32_qspi_cmd cmd;
+	int err;
+
+	dev_dbg(qspi->dev, "read(%#.2x): buf:%p from:%#.8x len:%#x\n",
+		nor->read_opcode, buf, (u32)from, len);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = nor->read_opcode;
+	cmd.addr_width = nor->addr_width;
+	cmd.addr = (u32)from;
+	cmd.tx_data = true;
+	cmd.dummy = nor->read_dummy;
+	cmd.len = len;
+	cmd.buf = buf;
+	cmd.qspimode = flash->read_mode;
+
+	stm32_qspi_set_framemode(nor, &cmd, true);
+	err = stm32_qspi_send(flash, &cmd);
+
+	return err ? err : len;
+}
+
+static ssize_t stm32_qspi_write(struct spi_nor *nor, loff_t to, size_t len,
+				const u_char *buf)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct device *dev = flash->qspi->dev;
+	struct stm32_qspi_cmd cmd;
+	int err;
+
+	dev_dbg(dev, "write(%#.2x): buf:%p to:%#.8x len:%#x\n",
+		nor->program_opcode, buf, (u32)to, len);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = nor->program_opcode;
+	cmd.addr_width = nor->addr_width;
+	cmd.addr = (u32)to;
+	cmd.tx_data = true;
+	cmd.len = len;
+	cmd.buf = (void *)buf;
+	cmd.qspimode = CCR_FMODE_INDW;
+
+	stm32_qspi_set_framemode(nor, &cmd, false);
+	err = stm32_qspi_send(flash, &cmd);
+
+	return err ? err : len;
+}
+
+static int stm32_qspi_erase(struct spi_nor *nor, loff_t offs)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct device *dev = flash->qspi->dev;
+	struct stm32_qspi_cmd cmd;
+
+	dev_dbg(dev, "erase(%#.2x):offs:%#x\n", nor->erase_opcode, (u32)offs);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = nor->erase_opcode;
+	cmd.addr_width = nor->addr_width;
+	cmd.addr = (u32)offs;
+	cmd.qspimode = CCR_FMODE_INDW;
+
+	stm32_qspi_set_framemode(nor, &cmd, false);
+
+	return stm32_qspi_send(flash, &cmd);
+}
+
+static irqreturn_t stm32_qspi_irq(int irq, void *dev_id)
+{
+	struct stm32_qspi *qspi = (struct stm32_qspi *)dev_id;
+	u32 cr, sr, fcr = 0;
+
+	cr = readl_relaxed(qspi->io_base + QUADSPI_CR);
+	sr = readl_relaxed(qspi->io_base + QUADSPI_SR);
+
+	if ((cr & CR_TCIE) && (sr & SR_TCF)) {
+		/* tx complete */
+		fcr |= FCR_CTCF;
+		complete(&qspi->cmd_completion);
+	} else {
+		dev_info_ratelimited(qspi->dev, "spurious interrupt\n");
+	}
+
+	writel_relaxed(fcr, qspi->io_base + QUADSPI_FCR);
+
+	return IRQ_HANDLED;
+}
+
+static int stm32_qspi_prep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct stm32_qspi *qspi = flash->qspi;
+
+	mutex_lock(&qspi->lock);
+	return 0;
+}
+
+static void stm32_qspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	struct stm32_qspi_flash *flash = nor->priv;
+	struct stm32_qspi *qspi = flash->qspi;
+
+	mutex_unlock(&qspi->lock);
+}
+
+static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
+				  struct device_node *np)
+{
+	u32 width, flash_read, presc, cs_num, max_rate = 0;
+	struct stm32_qspi_flash *flash;
+	struct mtd_info *mtd;
+	int ret;
+
+	of_property_read_u32(np, "reg", &cs_num);
+	if (cs_num >= STM32_MAX_NORCHIP)
+		return -EINVAL;
+
+	of_property_read_u32(np, "spi-max-frequency", &max_rate);
+	if (!max_rate)
+		return -EINVAL;
+
+	presc = DIV_ROUND_UP(qspi->clk_rate, max_rate) - 1;
+
+	if (of_property_read_u32(np, "spi-rx-bus-width", &width))
+		width = 1;
+
+	if (width == 4)
+		flash_read = SPI_NOR_QUAD;
+	else if (width == 2)
+		flash_read = SPI_NOR_DUAL;
+	else if (width == 1)
+		flash_read = SPI_NOR_NORMAL;
+	else
+		return -EINVAL;
+
+	flash = &qspi->flash[cs_num];
+	flash->qspi = qspi;
+	flash->cs = cs_num;
+	flash->presc = presc;
+
+	flash->nor.dev = qspi->dev;
+	spi_nor_set_flash_node(&flash->nor, np);
+	flash->nor.priv = flash;
+	mtd = &flash->nor.mtd;
+
+	flash->nor.read = stm32_qspi_read;
+	flash->nor.write = stm32_qspi_write;
+	flash->nor.erase = stm32_qspi_erase;
+	flash->nor.read_reg = stm32_qspi_read_reg;
+	flash->nor.write_reg = stm32_qspi_write_reg;
+	flash->nor.prepare = stm32_qspi_prep;
+	flash->nor.unprepare = stm32_qspi_unprep;
+
+	writel_relaxed(LPTR_DFT_TIMEOUT, qspi->io_base + QUADSPI_LPTR);
+
+	writel_relaxed(CR_PRESC(presc) | CR_FTHRES(3) | CR_TCEN | CR_SSHIFT
+		       | CR_EN, qspi->io_base + QUADSPI_CR);
+
+	/*
+	 * in stm32 qspi controller, QUADSPI_DCR register has a fsize field
+	 * which define the size of nor flash.
+	 * if fsize is NULL, the controller can't sent spi-nor command.
+	 * set a temporary value just to discover the nor flash with
+	 * "spi_nor_scan". After, the right value (mtd->size) can be set.
+	 */
+	flash->fsize = FSIZE_VAL(SZ_1K);
+
+	ret = spi_nor_scan(&flash->nor, NULL, flash_read);
+	if (ret) {
+		dev_err(qspi->dev, "device scan failed\n");
+		return ret;
+	}
+
+	flash->fsize = FSIZE_VAL(mtd->size);
+
+	flash->read_mode = CCR_FMODE_MM;
+	if (mtd->size > qspi->mm_size)
+		flash->read_mode = CCR_FMODE_INDR;
+
+	writel_relaxed(DCR_CSHT(1), qspi->io_base + QUADSPI_DCR);
+
+	ret = mtd_device_register(mtd, NULL, 0);
+	if (ret) {
+		dev_err(qspi->dev, "mtd device parse failed\n");
+		return ret;
+	}
+
+	flash->registered = true;
+
+	dev_dbg(qspi->dev, "read mm:%s cs:%d bus:%d\n",
+		flash->read_mode == CCR_FMODE_MM ? "yes" : "no", cs_num, width);
+
+	return 0;
+}
+
+static void stm32_qspi_mtd_free(struct stm32_qspi *qspi)
+{
+	int i;
+
+	for (i = 0; i < STM32_MAX_NORCHIP; i++)
+		if (qspi->flash[i].registered)
+			mtd_device_unregister(&qspi->flash[i].nor.mtd);
+}
+
+static int stm32_qspi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *flash_np;
+	struct reset_control *rstc;
+	struct stm32_qspi *qspi;
+	struct resource *res;
+	int ret, irq;
+
+	qspi = devm_kzalloc(dev, sizeof(*qspi), GFP_KERNEL);
+	if (!qspi)
+		return -ENOMEM;
+
+	qspi->nor_num = of_get_child_count(dev->of_node);
+	if (!qspi->nor_num || qspi->nor_num > STM32_MAX_NORCHIP)
+		return -ENODEV;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi");
+	qspi->io_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(qspi->io_base))
+		return PTR_ERR(qspi->io_base);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_mm");
+	qspi->mm_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(qspi->mm_base))
+		return PTR_ERR(qspi->mm_base);
+
+	qspi->mm_size = resource_size(res);
+
+	irq = platform_get_irq(pdev, 0);
+	ret = devm_request_irq(dev, irq, stm32_qspi_irq, 0,
+			       dev_name(dev), qspi);
+	if (ret) {
+		dev_err(dev, "failed to request irq\n");
+		return ret;
+	}
+
+	init_completion(&qspi->cmd_completion);
+
+	qspi->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(qspi->clk))
+		return PTR_ERR(qspi->clk);
+
+	qspi->clk_rate = clk_get_rate(qspi->clk);
+	if (!qspi->clk_rate)
+		return -EINVAL;
+
+	ret = clk_prepare_enable(qspi->clk);
+	if (ret) {
+		dev_err(dev, "can not enable the clock\n");
+		return ret;
+	}
+
+	rstc = devm_reset_control_get(dev, NULL);
+	if (!IS_ERR(rstc)) {
+		reset_control_assert(rstc);
+		udelay(2);
+		reset_control_deassert(rstc);
+	}
+
+	qspi->dev = dev;
+	platform_set_drvdata(pdev, qspi);
+	mutex_init(&qspi->lock);
+
+	for_each_available_child_of_node(dev->of_node, flash_np) {
+		ret = stm32_qspi_flash_setup(qspi, flash_np);
+		if (ret) {
+			dev_err(dev, "unable to setup flash chip\n");
+			goto err_flash;
+		}
+	}
+
+	return 0;
+
+err_flash:
+	mutex_destroy(&qspi->lock);
+	stm32_qspi_mtd_free(qspi);
+
+	clk_disable_unprepare(qspi->clk);
+	return ret;
+}
+
+static int stm32_qspi_remove(struct platform_device *pdev)
+{
+	struct stm32_qspi *qspi = platform_get_drvdata(pdev);
+
+	/* disable qspi */
+	writel_relaxed(0, qspi->io_base + QUADSPI_CR);
+
+	stm32_qspi_mtd_free(qspi);
+	mutex_destroy(&qspi->lock);
+
+	clk_disable_unprepare(qspi->clk);
+	return 0;
+}
+
+static const struct of_device_id stm32_qspi_match[] = {
+	{.compatible = "st,stm32f469-qspi"},
+	{}
+};
+MODULE_DEVICE_TABLE(of, stm32_qspi_match);
+
+static struct platform_driver stm32_qspi_driver = {
+	.probe	= stm32_qspi_probe,
+	.remove	= stm32_qspi_remove,
+	.driver	= {
+		.name = "stm32-quadspi",
+		.of_match_table = stm32_qspi_match,
+	},
+};
+module_platform_driver(stm32_qspi_driver);
+
+MODULE_AUTHOR("Ludovic Barre <ludovic.barre@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STM32 quad spi driver");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 8abe904dc82772bf1635fcc5765433c672f0a875 Mon Sep 17 00:00:00 2001
From: Guochun Mao <guochun.mao@mediatek.com>
Date: Wed, 5 Apr 2017 16:37:42 +0800
Subject: mtd: mtk-nor: set controller's address width according to nor flash

When nor's size larger than 16MByte, nor's address width maybe
set to 3 or 4, and controller should change address width according
to nor's setting.

Signed-off-by: Guochun Mao <guochun.mao@mediatek.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/mtd/spi-nor/mtk-quadspi.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c
index e661877c23de..b6377707ce32 100644
--- a/drivers/mtd/spi-nor/mtk-quadspi.c
+++ b/drivers/mtd/spi-nor/mtk-quadspi.c
@@ -104,6 +104,8 @@
 #define MTK_NOR_MAX_RX_TX_SHIFT		6
 /* can shift up to 56 bits (7 bytes) transfer by MTK_NOR_PRG_CMD */
 #define MTK_NOR_MAX_SHIFT		7
+/* nor controller 4-byte address mode enable bit */
+#define MTK_NOR_4B_ADDR_EN		BIT(4)
 
 /* Helpers for accessing the program data / shift data registers */
 #define MTK_NOR_PRG_REG(n)		(MTK_NOR_PRGDATA0_REG + 4 * (n))
@@ -230,10 +232,35 @@ static int mt8173_nor_write_buffer_disable(struct mt8173_nor *mt8173_nor)
 				  10000);
 }
 
+static void mt8173_nor_set_addr_width(struct mt8173_nor *mt8173_nor)
+{
+	u8 val;
+	struct spi_nor *nor = &mt8173_nor->nor;
+
+	val = readb(mt8173_nor->base + MTK_NOR_DUAL_REG);
+
+	switch (nor->addr_width) {
+	case 3:
+		val &= ~MTK_NOR_4B_ADDR_EN;
+		break;
+	case 4:
+		val |= MTK_NOR_4B_ADDR_EN;
+		break;
+	default:
+		dev_warn(mt8173_nor->dev, "Unexpected address width %u.\n",
+			 nor->addr_width);
+		break;
+	}
+
+	writeb(val, mt8173_nor->base + MTK_NOR_DUAL_REG);
+}
+
 static void mt8173_nor_set_addr(struct mt8173_nor *mt8173_nor, u32 addr)
 {
 	int i;
 
+	mt8173_nor_set_addr_width(mt8173_nor);
+
 	for (i = 0; i < 3; i++) {
 		writeb(addr & 0xff, mt8173_nor->base + MTK_NOR_RADR0_REG + i * 4);
 		addr >>= 8;
-- 
cgit v1.2.3


From 98da65d5e32583d89a1b1c760293b601816a98d3 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <michel.daenzer@amd.com>
Date: Fri, 28 Apr 2017 16:32:43 -0400
Subject: Revert "drm/amdgpu: Refactor flip into prepare submit and submit.
 (v3)"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit cb341a319f7e66f879d69af929c3dadfc1a8f31e.

The purpose of the refactor was for amdgpu_crtc_prepare/submit_flip to
be used by the DC code, but that's no longer the case.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 138 ++++++----------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h    |  15 ---
 2 files changed, 29 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 21125a9452b8..cdf2ab20166a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -138,52 +138,11 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
 	kfree(work);
 }
 
-
-static void amdgpu_flip_work_cleanup(struct amdgpu_flip_work *work)
-{
-	int i;
-
-	amdgpu_bo_unref(&work->old_abo);
-	dma_fence_put(work->excl);
-	for (i = 0; i < work->shared_count; ++i)
-		dma_fence_put(work->shared[i]);
-	kfree(work->shared);
-	kfree(work);
-}
-
-static void amdgpu_flip_cleanup_unreserve(struct amdgpu_flip_work *work,
-					  struct amdgpu_bo *new_abo)
-{
-	amdgpu_bo_unreserve(new_abo);
-	amdgpu_flip_work_cleanup(work);
-}
-
-static void amdgpu_flip_cleanup_unpin(struct amdgpu_flip_work *work,
-				      struct amdgpu_bo *new_abo)
-{
-	if (unlikely(amdgpu_bo_unpin(new_abo) != 0))
-		DRM_ERROR("failed to unpin new abo in error path\n");
-	amdgpu_flip_cleanup_unreserve(work, new_abo);
-}
-
-void amdgpu_crtc_cleanup_flip_ctx(struct amdgpu_flip_work *work,
-				  struct amdgpu_bo *new_abo)
-{
-	if (unlikely(amdgpu_bo_reserve(new_abo, true) != 0)) {
-		DRM_ERROR("failed to reserve new abo in error path\n");
-		amdgpu_flip_work_cleanup(work);
-		return;
-	}
-	amdgpu_flip_cleanup_unpin(work, new_abo);
-}
-
-int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc,
-			     struct drm_framebuffer *fb,
-			     struct drm_pending_vblank_event *event,
-			     uint32_t page_flip_flags,
-			     uint32_t target,
-			     struct amdgpu_flip_work **work_p,
-			     struct amdgpu_bo **new_abo_p)
+int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
+				 struct drm_framebuffer *fb,
+				 struct drm_pending_vblank_event *event,
+				 uint32_t page_flip_flags, uint32_t target,
+				 struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
@@ -196,7 +155,7 @@ int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc,
 	unsigned long flags;
 	u64 tiling_flags;
 	u64 base;
-	int r;
+	int i, r;
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (work == NULL)
@@ -257,80 +216,41 @@ int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc,
 		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		r = -EBUSY;
 		goto pflip_cleanup;
-
 	}
-	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-
-	*work_p = work;
-	*new_abo_p = new_abo;
-
-	return 0;
-
-pflip_cleanup:
-	amdgpu_crtc_cleanup_flip_ctx(work, new_abo);
-	return r;
-
-unpin:
-	amdgpu_flip_cleanup_unpin(work, new_abo);
-	return r;
-
-unreserve:
-	amdgpu_flip_cleanup_unreserve(work, new_abo);
-	return r;
 
-cleanup:
-	amdgpu_flip_work_cleanup(work);
-	return r;
-
-}
-
-void amdgpu_crtc_submit_flip(struct drm_crtc *crtc,
-			     struct drm_framebuffer *fb,
-			     struct amdgpu_flip_work *work,
-			     struct amdgpu_bo *new_abo)
-{
-	unsigned long flags;
-	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
-	spin_lock_irqsave(&crtc->dev->event_lock, flags);
 	amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING;
 	amdgpu_crtc->pflip_works = work;
 
+
+	DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n",
+					 amdgpu_crtc->crtc_id, amdgpu_crtc, work);
 	/* update crtc fb */
 	crtc->primary->fb = fb;
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-
-	DRM_DEBUG_DRIVER(
-			"crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n",
-			amdgpu_crtc->crtc_id, amdgpu_crtc, work);
-
 	amdgpu_flip_work_func(&work->flip_work.work);
-}
-
-int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event,
-				 uint32_t page_flip_flags,
-				 uint32_t target,
-				 struct drm_modeset_acquire_ctx *ctx)
-{
-	struct amdgpu_bo *new_abo;
-	struct amdgpu_flip_work *work;
-	int r;
+	return 0;
 
-	r = amdgpu_crtc_prepare_flip(crtc,
-				     fb,
-				     event,
-				     page_flip_flags,
-				     target,
-				     &work,
-				     &new_abo);
-	if (r)
-		return r;
+pflip_cleanup:
+	if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) {
+		DRM_ERROR("failed to reserve new abo in error path\n");
+		goto cleanup;
+	}
+unpin:
+	if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) {
+		DRM_ERROR("failed to unpin new abo in error path\n");
+	}
+unreserve:
+	amdgpu_bo_unreserve(new_abo);
 
-	amdgpu_crtc_submit_flip(crtc, fb, work, new_abo);
+cleanup:
+	amdgpu_bo_unref(&work->old_abo);
+	dma_fence_put(work->excl);
+	for (i = 0; i < work->shared_count; ++i)
+		dma_fence_put(work->shared[i]);
+	kfree(work->shared);
+	kfree(work);
 
-	return 0;
+	return r;
 }
 
 int amdgpu_crtc_set_config(struct drm_mode_set *set,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index db8f8dda209c..dbd10618ec20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -597,21 +597,6 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 				 struct drm_pending_vblank_event *event,
 				 uint32_t page_flip_flags, uint32_t target,
 				 struct drm_modeset_acquire_ctx *ctx);
-void amdgpu_crtc_cleanup_flip_ctx(struct amdgpu_flip_work *work,
-				  struct amdgpu_bo *new_abo);
-int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc,
-			     struct drm_framebuffer *fb,
-			     struct drm_pending_vblank_event *event,
-			     uint32_t page_flip_flags,
-			     uint32_t target,
-			     struct amdgpu_flip_work **work,
-			     struct amdgpu_bo **new_abo);
-
-void amdgpu_crtc_submit_flip(struct drm_crtc *crtc,
-			     struct drm_framebuffer *fb,
-			     struct amdgpu_flip_work *work,
-			     struct amdgpu_bo *new_abo);
-
 extern const struct drm_mode_config_funcs amdgpu_mode_funcs;
 
 #endif
-- 
cgit v1.2.3


From 0119dc6132d2110df8f3545bd0ffe29aa0752d6b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 22 Apr 2017 13:43:33 +0300
Subject: clk: x86: pmc-atom: Checking for IS_ERR() instead of NULL

clkdev_hw_create() returns NULLs on error, it doesn't return error
pointers.

Fixes: 41ee7caf59e1 ("clk: x86: add "mclk" alias for Baytrail/Cherrytrail")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/x86/clk-pmc-atom.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c
index fafc99120dc2..f99abc1106f0 100644
--- a/drivers/clk/x86/clk-pmc-atom.c
+++ b/drivers/clk/x86/clk-pmc-atom.c
@@ -339,8 +339,8 @@ static int plt_clk_probe(struct platform_device *pdev)
 		}
 	}
 	data->mclk_lookup = clkdev_hw_create(&data->clks[3]->hw, "mclk", NULL);
-	if (IS_ERR(data->mclk_lookup)) {
-		err = PTR_ERR(data->mclk_lookup);
+	if (!data->mclk_lookup) {
+		err = -ENOMEM;
 		goto err_unreg_clk_plt;
 	}
 
-- 
cgit v1.2.3


From 8ece1d83346bcc431090d59a2184276192189cdd Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 30 Apr 2017 22:54:16 +0200
Subject: ACPI / power: Delay turning off unused power resources after suspend

Commit 660b1113e0f3 (ACPI / PM: Fix consistency check for power resources
during resume) introduced a check for ACPI power resources which have
been turned on by the BIOS during suspend and turns these back off again.

This is causing problems on a Dell Venue Pro 11 7130 (i5-4300Y) it causes
the following messages to show up in dmesg:

[  131.014605] ACPI: Waking up from system sleep state S3
[  131.150271] acpi LNXPOWER:07: Turning OFF
[  131.150323] acpi LNXPOWER:06: Turning OFF
[  131.150911] acpi LNXPOWER:00: Turning OFF
[  131.169014] ACPI : EC: interrupt unblocked
[  131.181811] xhci_hcd 0000:00:14.0: System wakeup disabled by ACPI
[  133.535728] pci_raw_set_power_state: 76 callbacks suppressed
[  133.535735] iwlwifi 0000:01:00.0: Refused to change power state,
               currently in D3
[  133.597672] PM: noirq resume of devices complete after 2428.891 msecs

Followed by a bunch of iwlwifi errors later on and the pcie device
dropping from the bus (acpiphp thinks it has been unplugged).

Disabling the turning off of unused power resources fixes this. Instead
of adding a quirk for this system, this commit fixes this by moving the
disabling of unused power resources to later in the resume sequence
when the iwlwifi card has been moved out of D3 so the ref_count for
its power resource no longer is 0.

This new behavior seems to match the intend of the original commit which
commit-msg says: "(... which means that no devices are going to need them
any time soon) and we should turn them off".

This also avoids power resources which we need when bringing devices out
of D3 from getting bounced off and then back on again.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/power.c | 10 ++++++++++
 drivers/acpi/sleep.c |  1 +
 drivers/acpi/sleep.h |  1 +
 3 files changed, 12 insertions(+)

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 1c2b846c5776..3a6c9b741b23 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -864,6 +864,16 @@ void acpi_resume_power_resources(void)
 
 		mutex_unlock(&resource->resource_lock);
 	}
+
+	mutex_unlock(&power_resource_list_lock);
+}
+
+void acpi_turn_off_unused_power_resources(void)
+{
+	struct acpi_power_resource *resource;
+
+	mutex_lock(&power_resource_list_lock);
+
 	list_for_each_entry_reverse(resource, &acpi_power_resource_list, list_node) {
 		int result, state;
 
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index a4327af676fe..097d630ab886 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -474,6 +474,7 @@ static void acpi_pm_start(u32 acpi_state)
  */
 static void acpi_pm_end(void)
 {
+	acpi_turn_off_unused_power_resources();
 	acpi_scan_lock_release();
 	/*
 	 * This is necessary in case acpi_pm_finish() is not called during a
diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h
index a9cc34e663f9..a82ff74faf7a 100644
--- a/drivers/acpi/sleep.h
+++ b/drivers/acpi/sleep.h
@@ -6,6 +6,7 @@ extern struct list_head acpi_wakeup_device_list;
 extern struct mutex acpi_device_lock;
 
 extern void acpi_resume_power_resources(void);
+extern void acpi_turn_off_unused_power_resources(void);
 
 static inline acpi_status acpi_set_waking_vector(u32 wakeup_address)
 {
-- 
cgit v1.2.3


From 41dc750ea67f317c0deedde713d1728425524ef2 Mon Sep 17 00:00:00 2001
From: "Li, Fei" <fei.li@intel.com>
Date: Thu, 27 Apr 2017 01:47:25 +0000
Subject: cpuidle: check dev before usage in cpuidle_use_deepest_state()

In case of there is no cpuidle devices registered, dev will be null, and
panic will be triggered like below;
In this patch, add checking of dev before usage, like that done in
cpuidle_idle_call.

Panic without fix:
[  184.961328] BUG: unable to handle kernel NULL pointer dereference at
  (null)
[  184.961328] IP: cpuidle_use_deepest_state+0x30/0x60
...
[  184.961328]  play_idle+0x8d/0x210
[  184.961328]  ? __schedule+0x359/0x8e0
[  184.961328]  ? _raw_spin_unlock_irqrestore+0x28/0x50
[  184.961328]  ? kthread_queue_delayed_work+0x41/0x80
[  184.961328]  clamp_idle_injection_func+0x64/0x1e0

Fixes: bb8313b603eb8 (cpuidle: Allow enforcing deepest idle state selection)
Signed-off-by: Li, Fei <fei.li@intel.com>
Tested-by: Shi, Feng <fengx.shi@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: 4.10+ <stable@vger.kernel.org> # 4.10+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 548b90be7685..2706be7ed334 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -111,7 +111,8 @@ void cpuidle_use_deepest_state(bool enable)
 
 	preempt_disable();
 	dev = cpuidle_get_device();
-	dev->use_deepest_state = enable;
+	if (dev)
+		dev->use_deepest_state = enable;
 	preempt_enable();
 }
 
-- 
cgit v1.2.3


From 1b2e87687d3f951a66900cab6f1583d94099d2f7 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@intel.com>
Date: Sat, 22 Apr 2017 23:06:25 -0700
Subject: x86/intel_idle: add Gemini Lake support

Gemini Lake uses the same C-states as Broxton and also uses the
IRTL MSR's to determine maximum C-state latency.

Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Acked-by: Len Brown <len.brown@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 5805b041dd0f..216d7ec88c0c 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1097,6 +1097,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	ICPU(INTEL_FAM6_XEON_PHI_KNL,		idle_cpu_knl),
 	ICPU(INTEL_FAM6_XEON_PHI_KNM,		idle_cpu_knl),
 	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		idle_cpu_bxt),
+	ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE,	idle_cpu_bxt),
 	ICPU(INTEL_FAM6_ATOM_DENVERTON,		idle_cpu_dnv),
 	{}
 };
@@ -1309,6 +1310,7 @@ static void intel_idle_state_table_update(void)
 		ivt_idle_state_table_update();
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 		bxt_idle_state_table_update();
 		break;
 	case INTEL_FAM6_SKYLAKE_DESKTOP:
-- 
cgit v1.2.3


From 4ca41cb2ae09bfd9f84f053b8b9966e1bb8accc4 Mon Sep 17 00:00:00 2001
From: Ludovic Barre <ludovic.barre@st.com>
Date: Thu, 13 Apr 2017 19:15:56 +0200
Subject: dt-bindings: mtd: Document the STM32 QSPI bindings

This patch adds documentation of device tree bindings for the STM32
QSPI controller.

Signed-off-by: Ludovic Barre <ludovic.barre@st.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 .../devicetree/bindings/mtd/stm32-quadspi.txt      | 43 ++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mtd/stm32-quadspi.txt

diff --git a/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt b/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt
new file mode 100644
index 000000000000..ddd18c135148
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt
@@ -0,0 +1,43 @@
+* STMicroelectronics Quad Serial Peripheral Interface(QuadSPI)
+
+Required properties:
+- compatible: should be "st,stm32f469-qspi"
+- reg: the first contains the register location and length.
+       the second contains the memory mapping address and length
+- reg-names: should contain the reg names "qspi" "qspi_mm"
+- interrupts: should contain the interrupt for the device
+- clocks: the phandle of the clock needed by the QSPI controller
+- A pinctrl must be defined to set pins in mode of operation for QSPI transfer
+
+Optional properties:
+- resets: must contain the phandle to the reset controller.
+
+A spi flash must be a child of the nor_flash node and could have some
+properties. Also see jedec,spi-nor.txt.
+
+Required properties:
+- reg: chip-Select number (QSPI controller may connect 2 nor flashes)
+- spi-max-frequency: max frequency of spi bus
+
+Optional property:
+- spi-rx-bus-width: see ../spi/spi-bus.txt for the description
+
+Example:
+
+qspi: spi@a0001000 {
+	compatible = "st,stm32f469-qspi";
+	reg = <0xa0001000 0x1000>, <0x90000000 0x10000000>;
+	reg-names = "qspi", "qspi_mm";
+	interrupts = <91>;
+	resets = <&rcc STM32F4_AHB3_RESET(QSPI)>;
+	clocks = <&rcc 0 STM32F4_AHB3_CLOCK(QSPI)>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_qspi0>;
+
+	flash@0 {
+		reg = <0>;
+		spi-rx-bus-width = <4>;
+		spi-max-frequency = <108000000>;
+		...
+	};
+};
-- 
cgit v1.2.3


From 5981c245a890db6a6e16fb6d3838cc9fc9fdf0ff Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Mon, 6 Mar 2017 16:21:11 +0200
Subject: target/iblock: convert iblock_req.pending from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_iblock.c | 12 ++++++------
 drivers/target/target_core_iblock.h |  3 ++-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index d316ed537d59..bb069ebe4aa6 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -279,7 +279,7 @@ static void iblock_complete_cmd(struct se_cmd *cmd)
 	struct iblock_req *ibr = cmd->priv;
 	u8 status;
 
-	if (!atomic_dec_and_test(&ibr->pending))
+	if (!refcount_dec_and_test(&ibr->pending))
 		return;
 
 	if (atomic_read(&ibr->ib_bio_err_cnt))
@@ -487,7 +487,7 @@ iblock_execute_write_same(struct se_cmd *cmd)
 	bio_list_init(&list);
 	bio_list_add(&list, bio);
 
-	atomic_set(&ibr->pending, 1);
+	refcount_set(&ibr->pending, 1);
 
 	while (sectors) {
 		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
@@ -498,7 +498,7 @@ iblock_execute_write_same(struct se_cmd *cmd)
 			if (!bio)
 				goto fail_put_bios;
 
-			atomic_inc(&ibr->pending);
+			refcount_inc(&ibr->pending);
 			bio_list_add(&list, bio);
 		}
 
@@ -706,7 +706,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	cmd->priv = ibr;
 
 	if (!sgl_nents) {
-		atomic_set(&ibr->pending, 1);
+		refcount_set(&ibr->pending, 1);
 		iblock_complete_cmd(cmd);
 		return 0;
 	}
@@ -719,7 +719,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	bio_list_init(&list);
 	bio_list_add(&list, bio);
 
-	atomic_set(&ibr->pending, 2);
+	refcount_set(&ibr->pending, 2);
 	bio_cnt = 1;
 
 	for_each_sg(sgl, sg, sgl_nents, i) {
@@ -740,7 +740,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 			if (!bio)
 				goto fail_put_bios;
 
-			atomic_inc(&ibr->pending);
+			refcount_inc(&ibr->pending);
 			bio_list_add(&list, bio);
 			bio_cnt++;
 		}
diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h
index 718d3fcd3e7c..f2a5797217d4 100644
--- a/drivers/target/target_core_iblock.h
+++ b/drivers/target/target_core_iblock.h
@@ -2,6 +2,7 @@
 #define TARGET_CORE_IBLOCK_H
 
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <target/target_core_base.h>
 
 #define IBLOCK_VERSION		"4.0"
@@ -10,7 +11,7 @@
 #define IBLOCK_LBA_SHIFT	9
 
 struct iblock_req {
-	atomic_t pending;
+	refcount_t pending;
 	atomic_t ib_bio_err_cnt;
 } ____cacheline_aligned;
 
-- 
cgit v1.2.3


From d65e655706e4827fb061daff399e07806b1223ad Mon Sep 17 00:00:00 2001
From: Zhu Lingshan <lszhu@suse.com>
Date: Wed, 8 Mar 2017 14:31:34 +0800
Subject: target/pr: update PR out action code table

This commit updated persistent revervation out service action
code table in SPC-5 for development.

Signed-off-by: Zhu Lingshan <lszhu@suse.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_pr.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h
index 847bd470339c..772f9148e75c 100644
--- a/drivers/target/target_core_pr.h
+++ b/drivers/target/target_core_pr.h
@@ -7,7 +7,7 @@
 /*
  * PERSISTENT_RESERVE_OUT service action codes
  *
- * spc4r17 section 6.14.2 Table 171
+ * spc5r04b section 6.15.2 Table 174
  */
 #define PRO_REGISTER				0x00
 #define PRO_RESERVE				0x01
@@ -17,10 +17,11 @@
 #define PRO_PREEMPT_AND_ABORT			0x05
 #define PRO_REGISTER_AND_IGNORE_EXISTING_KEY	0x06
 #define PRO_REGISTER_AND_MOVE			0x07
+#define PRO_REPLACE_LOST_RESERVATION	0x08
 /*
  * PERSISTENT_RESERVE_IN service action codes
  *
- * spc4r17 section 6.13.1 Table 159
+ * spc5r04b section 6.14.1 Table 162
  */
 #define PRI_READ_KEYS				0x00
 #define PRI_READ_RESERVATION			0x01
@@ -29,13 +30,13 @@
 /*
  * PERSISTENT_RESERVE_ SCOPE field
  *
- * spc4r17 section 6.13.3.3 Table 163
+ * spc5r04b section 6.14.3.2 Table 166
  */
 #define PR_SCOPE_LU_SCOPE			0x00
 /*
  * PERSISTENT_RESERVE_* TYPE field
  *
- * spc4r17 section 6.13.3.4 Table 164
+ * spc5r04b section 6.14.3.3 Table 167
  */
 #define PR_TYPE_WRITE_EXCLUSIVE			0x01
 #define PR_TYPE_EXCLUSIVE_ACCESS		0x03
-- 
cgit v1.2.3


From 0e2eb7d12eaa8e391bf5615d4271bb87a649caaa Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 30 Mar 2017 10:12:39 -0700
Subject: target: Fix VERIFY and WRITE VERIFY command parsing

Use the value of the BYTCHK field to determine the size of the
Data-Out buffer. For VERIFY, honor the VRPROTECT, DPO and FUA
fields. This patch avoids that LIO complains about a mismatch
between the expected transfer length and the SCSI CDB length
if the value of the BYTCHK field is 0.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Max Lohrmann <post@wickenrode.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c | 71 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 62 insertions(+), 9 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index c194063f169b..ee35c90e3b8d 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -828,6 +828,59 @@ sbc_check_dpofua(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb)
 	return 0;
 }
 
+/**
+ * sbc_parse_verify - parse VERIFY, VERIFY_16 and WRITE VERIFY commands
+ * @cmd:     (in)  structure that describes the SCSI command to be parsed.
+ * @sectors: (out) Number of logical blocks on the storage medium that will be
+ *           affected by the SCSI command.
+ * @bufflen: (out) Expected length of the SCSI Data-Out buffer.
+ */
+static sense_reason_t sbc_parse_verify(struct se_cmd *cmd, int *sectors,
+				       u32 *bufflen)
+{
+	struct se_device *dev = cmd->se_dev;
+	u8 *cdb = cmd->t_task_cdb;
+	u8 bytchk = (cdb[1] >> 1) & 3;
+	sense_reason_t ret;
+
+	switch (cdb[0]) {
+	case VERIFY:
+	case WRITE_VERIFY:
+		*sectors = transport_get_sectors_10(cdb);
+		cmd->t_task_lba = transport_lba_32(cdb);
+		break;
+	case VERIFY_16:
+		*sectors = transport_get_sectors_16(cdb);
+		cmd->t_task_lba = transport_lba_64(cdb);
+		break;
+	default:
+		WARN_ON_ONCE(true);
+		return TCM_UNSUPPORTED_SCSI_OPCODE;
+	}
+
+	if (sbc_check_dpofua(dev, cmd, cdb))
+		return TCM_INVALID_CDB_FIELD;
+
+	ret = sbc_check_prot(dev, cmd, cdb, *sectors, true);
+	if (ret)
+		return ret;
+
+	switch (bytchk) {
+	case 0:
+		*bufflen = 0;
+		break;
+	case 1:
+		*bufflen = sbc_get_size(cmd, *sectors);
+		cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
+		break;
+	default:
+		pr_err("Unsupported BYTCHK value %d for SCSI opcode %#x\n",
+		       bytchk, cdb[0]);
+		return TCM_INVALID_CDB_FIELD;
+	}
+	return TCM_NO_SENSE;
+}
+
 sense_reason_t
 sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 {
@@ -895,7 +948,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		cmd->execute_cmd = sbc_execute_rw;
 		break;
 	case WRITE_10:
-	case WRITE_VERIFY:
 		sectors = transport_get_sectors_10(cdb);
 		cmd->t_task_lba = transport_lba_32(cdb);
 
@@ -909,6 +961,12 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
 		cmd->execute_cmd = sbc_execute_rw;
 		break;
+	case WRITE_VERIFY:
+		ret = sbc_parse_verify(cmd, &sectors, &size);
+		if (ret)
+			return ret;
+		cmd->execute_cmd = sbc_execute_rw;
+		goto check_lba;
 	case WRITE_12:
 		sectors = transport_get_sectors_12(cdb);
 		cmd->t_task_lba = transport_lba_32(cdb);
@@ -1106,14 +1164,9 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		break;
 	case VERIFY:
 	case VERIFY_16:
-		size = 0;
-		if (cdb[0] == VERIFY) {
-			sectors = transport_get_sectors_10(cdb);
-			cmd->t_task_lba = transport_lba_32(cdb);
-		} else {
-			sectors = transport_get_sectors_16(cdb);
-			cmd->t_task_lba = transport_lba_64(cdb);
-		}
+		ret = sbc_parse_verify(cmd, &sectors, &size);
+		if (ret)
+			return ret;
 		cmd->execute_cmd = sbc_emulate_noop;
 		goto check_lba;
 	case REZERO_UNIT:
-- 
cgit v1.2.3


From f11b55d13563e9428c88c873f4f03a6bef11ec0a Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 31 Mar 2017 19:53:35 +0400
Subject: tcm_fileio: Prevent information leak for short reads

If we failed to read data from backing file (probably because some one
truncate file under us), we must zerofill cmd's data, otherwise it will
be returned as is. Most likely cmd's data are unitialized pages from
page cache. This result in information leak.

(Change BUG_ON into -EINVAL se_cmd failure - nab)

testcase: https://github.com/dmonakhov/xfstests/commit/e11a1b7b907ca67b1be51a1594025600767366d5
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_file.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 87aa376a1a1a..dd8f32055266 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -277,12 +277,11 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
 	else
 		ret = vfs_iter_read(fd, &iter, &pos);
 
-	kfree(bvec);
-
 	if (is_write) {
 		if (ret < 0 || ret != data_length) {
 			pr_err("%s() write returned %d\n", __func__, ret);
-			return (ret < 0 ? ret : -EINVAL);
+			if (ret >= 0)
+				ret = -EINVAL;
 		}
 	} else {
 		/*
@@ -295,17 +294,29 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
 				pr_err("%s() returned %d, expecting %u for "
 						"S_ISBLK\n", __func__, ret,
 						data_length);
-				return (ret < 0 ? ret : -EINVAL);
+				if (ret >= 0)
+					ret = -EINVAL;
 			}
 		} else {
 			if (ret < 0) {
 				pr_err("%s() returned %d for non S_ISBLK\n",
 						__func__, ret);
-				return ret;
+			} else if (ret != data_length) {
+				/*
+				 * Short read case:
+				 * Probably some one truncate file under us.
+				 * We must explicitly zero sg-pages to prevent
+				 * expose uninizialized pages to userspace.
+				 */
+				if (ret < data_length)
+					ret += iov_iter_zero(data_length - ret, &iter);
+				else
+					ret = -EINVAL;
 			}
 		}
 	}
-	return 1;
+	kfree(bvec);
+	return ret;
 }
 
 static sense_reason_t
-- 
cgit v1.2.3


From 056e8924a072d22007275dfb8b247bb814765b67 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 31 Mar 2017 19:53:36 +0400
Subject: tcm: make pi data verification configurable

Currently ramdisk and fileio always perform PI verification
before and after backend IO. This approach is not very flexible.
Because some one may want to postpone this work to other layers in
IO stack. For example if we want to test blk_integrity_profile

testcase:
https://github.com/dmonakhov/xfstests/commit/dee408c868861d6b6871dbb3381facee7effdbe4
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_configfs.c | 34 ++++++++++++++++++++++++++++++++++
 drivers/target/target_core_file.c     |  6 ++++--
 drivers/target/target_core_rd.c       | 17 +++++++++--------
 include/target/target_core_base.h     |  1 +
 4 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 70657fd56440..a34a86652b96 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -533,6 +533,7 @@ DEF_CONFIGFS_ATTRIB_SHOW(emulate_3pc);
 DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_type);
 DEF_CONFIGFS_ATTRIB_SHOW(hw_pi_prot_type);
 DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_format);
+DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_verify);
 DEF_CONFIGFS_ATTRIB_SHOW(enforce_pr_isids);
 DEF_CONFIGFS_ATTRIB_SHOW(is_nonrot);
 DEF_CONFIGFS_ATTRIB_SHOW(emulate_rest_reord);
@@ -823,6 +824,7 @@ static ssize_t pi_prot_type_store(struct config_item *item,
 		ret = dev->transport->init_prot(dev);
 		if (ret) {
 			da->pi_prot_type = old_prot;
+			da->pi_prot_verify = (bool) da->pi_prot_type;
 			return ret;
 		}
 
@@ -830,6 +832,7 @@ static ssize_t pi_prot_type_store(struct config_item *item,
 		dev->transport->free_prot(dev);
 	}
 
+	da->pi_prot_verify = (bool) da->pi_prot_type;
 	pr_debug("dev[%p]: SE Device Protection Type: %d\n", dev, flag);
 	return count;
 }
@@ -872,6 +875,35 @@ static ssize_t pi_prot_format_store(struct config_item *item,
 	return count;
 }
 
+static ssize_t pi_prot_verify_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct se_dev_attrib *da = to_attrib(item);
+	bool flag;
+	int ret;
+
+	ret = strtobool(page, &flag);
+	if (ret < 0)
+		return ret;
+
+	if (!flag) {
+		da->pi_prot_verify = flag;
+		return count;
+	}
+	if (da->hw_pi_prot_type) {
+		pr_warn("DIF protection enabled on underlying hardware,"
+			" ignoring\n");
+		return count;
+	}
+	if (!da->pi_prot_type) {
+		pr_warn("DIF protection not supported by backend, ignoring\n");
+		return count;
+	}
+	da->pi_prot_verify = flag;
+
+	return count;
+}
+
 static ssize_t force_pr_aptpl_store(struct config_item *item,
 		const char *page, size_t count)
 {
@@ -1067,6 +1099,7 @@ CONFIGFS_ATTR(, emulate_3pc);
 CONFIGFS_ATTR(, pi_prot_type);
 CONFIGFS_ATTR_RO(, hw_pi_prot_type);
 CONFIGFS_ATTR(, pi_prot_format);
+CONFIGFS_ATTR(, pi_prot_verify);
 CONFIGFS_ATTR(, enforce_pr_isids);
 CONFIGFS_ATTR(, is_nonrot);
 CONFIGFS_ATTR(, emulate_rest_reord);
@@ -1104,6 +1137,7 @@ struct configfs_attribute *sbc_attrib_attrs[] = {
 	&attr_pi_prot_type,
 	&attr_hw_pi_prot_type,
 	&attr_pi_prot_format,
+	&attr_pi_prot_verify,
 	&attr_enforce_pr_isids,
 	&attr_is_nonrot,
 	&attr_emulate_rest_reord,
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index dd8f32055266..1bf6c31e4c21 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -554,7 +554,8 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		ret = fd_do_rw(cmd, file, dev->dev_attrib.block_size,
 			       sgl, sgl_nents, cmd->data_length, 0);
 
-		if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) {
+		if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type &&
+		    dev->dev_attrib.pi_prot_verify) {
 			u32 sectors = cmd->data_length >>
 					ilog2(dev->dev_attrib.block_size);
 
@@ -564,7 +565,8 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 				return rc;
 		}
 	} else {
-		if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
+		if (cmd->prot_type && dev->dev_attrib.pi_prot_type &&
+		    dev->dev_attrib.pi_prot_verify) {
 			u32 sectors = cmd->data_length >>
 					ilog2(dev->dev_attrib.block_size);
 
diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index ddc216c9f1f6..5f23f341f8d3 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -410,7 +410,7 @@ static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, bool is_read)
 	u32 prot_offset, prot_page;
 	u32 prot_npages __maybe_unused;
 	u64 tmp;
-	sense_reason_t rc = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	sense_reason_t rc = 0;
 
 	tmp = cmd->t_task_lba * se_dev->prot_length;
 	prot_offset = do_div(tmp, PAGE_SIZE);
@@ -423,13 +423,14 @@ static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, bool is_read)
 	prot_sg = &prot_table->sg_table[prot_page -
 					prot_table->page_start_offset];
 
-	if (is_read)
-		rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0,
-				    prot_sg, prot_offset);
-	else
-		rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0,
-				    cmd->t_prot_sg, 0);
-
+	if (se_dev->dev_attrib.pi_prot_verify) {
+		if (is_read)
+			rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0,
+					    prot_sg, prot_offset);
+		else
+			rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0,
+					    cmd->t_prot_sg, 0);
+	}
 	if (!rc)
 		sbc_dif_copy_prot(cmd, sectors, is_read, prot_sg, prot_offset);
 
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index ccfad0e9c2cd..0c1dce2ac6f0 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -664,6 +664,7 @@ struct se_dev_attrib {
 	int		pi_prot_format;
 	enum target_prot_type pi_prot_type;
 	enum target_prot_type hw_pi_prot_type;
+	int		pi_prot_verify;
 	int		enforce_pr_isids;
 	int		force_pr_aptpl;
 	int		is_nonrot;
-- 
cgit v1.2.3


From f1725110324d97d841609fd0b3536eb3ead77086 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sun, 9 Apr 2017 15:06:00 +0200
Subject: iscsi-target: Use kcalloc() in iscsit_allocate_iovecs()

* A multiplication for the size determination of a memory allocation
  indicated that an array data structure should be processed.
  Thus use the corresponding function "kcalloc".

  This issue was detected by using the Coccinelle software.

* Replace the specification of a data structure by a pointer dereference
  to make the corresponding size determination a bit safer according to
  the Linux coding style convention.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index e3f9ed3690b7..3cf9fb54b7d4 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -984,8 +984,7 @@ static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd)
 	u32 iov_count = max(1UL, DIV_ROUND_UP(cmd->se_cmd.data_length, PAGE_SIZE));
 
 	iov_count += ISCSI_IOV_DATA_BUFFER;
-
-	cmd->iov_data = kzalloc(iov_count * sizeof(struct kvec), GFP_KERNEL);
+	cmd->iov_data = kcalloc(iov_count, sizeof(*cmd->iov_data), GFP_KERNEL);
 	if (!cmd->iov_data) {
 		pr_err("Unable to allocate cmd->iov_data\n");
 		return -ENOMEM;
-- 
cgit v1.2.3


From c46e22f10612d91fb2cceef232e9d73c34be212a Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sun, 9 Apr 2017 15:34:50 +0200
Subject: iscsi-target: Delete error messages for failed memory allocations

The script "checkpatch.pl" pointed information out like the following.

WARNING: Possible unnecessary 'out of memory' message

Thus remove such statements here.

Link: http://events.linuxfoundation.org/sites/events/files/slides/LCJ16-Refactor_Strings-WSang_0.pdf
Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 37 ++++++++++---------------------------
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 3cf9fb54b7d4..16750da1584a 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -129,10 +129,8 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
 	}
 
 	tiqn = kzalloc(sizeof(struct iscsi_tiqn), GFP_KERNEL);
-	if (!tiqn) {
-		pr_err("Unable to allocate struct iscsi_tiqn\n");
+	if (!tiqn)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	sprintf(tiqn->tiqn, "%s", buf);
 	INIT_LIST_HEAD(&tiqn->tiqn_list);
@@ -364,7 +362,6 @@ struct iscsi_np *iscsit_add_np(
 
 	np = kzalloc(sizeof(struct iscsi_np), GFP_KERNEL);
 	if (!np) {
-		pr_err("Unable to allocate memory for struct iscsi_np\n");
 		mutex_unlock(&np_lock);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -698,10 +695,9 @@ static int __init iscsi_target_init_module(void)
 	pr_debug("iSCSI-Target "ISCSIT_VERSION"\n");
 
 	iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL);
-	if (!iscsit_global) {
-		pr_err("Unable to allocate memory for iscsit_global\n");
+	if (!iscsit_global)
 		return -1;
-	}
+
 	spin_lock_init(&iscsit_global->ts_bitmap_lock);
 	mutex_init(&auth_id_lock);
 	spin_lock_init(&sess_idr_lock);
@@ -714,10 +710,8 @@ static int __init iscsi_target_init_module(void)
 
 	size = BITS_TO_LONGS(ISCSIT_BITMAP_BITS) * sizeof(long);
 	iscsit_global->ts_bitmap = vzalloc(size);
-	if (!iscsit_global->ts_bitmap) {
-		pr_err("Unable to allocate iscsit_global->ts_bitmap\n");
+	if (!iscsit_global->ts_bitmap)
 		goto configfs_out;
-	}
 
 	lio_qr_cache = kmem_cache_create("lio_qr_cache",
 			sizeof(struct iscsi_queue_req),
@@ -985,10 +979,8 @@ static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd)
 
 	iov_count += ISCSI_IOV_DATA_BUFFER;
 	cmd->iov_data = kcalloc(iov_count, sizeof(*cmd->iov_data), GFP_KERNEL);
-	if (!cmd->iov_data) {
-		pr_err("Unable to allocate cmd->iov_data\n");
+	if (!cmd->iov_data)
 		return -ENOMEM;
-	}
 
 	cmd->orig_iov_data_count = iov_count;
 	return 0;
@@ -1849,8 +1841,6 @@ static int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 
 		ping_data = kzalloc(payload_length + 1, GFP_KERNEL);
 		if (!ping_data) {
-			pr_err("Unable to allocate memory for"
-				" NOPOUT ping data.\n");
 			ret = -1;
 			goto out;
 		}
@@ -1998,13 +1988,10 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	cmd->data_direction = DMA_NONE;
 
 	cmd->tmr_req = kzalloc(sizeof(struct iscsi_tmr_req), GFP_KERNEL);
-	if (!cmd->tmr_req) {
-		pr_err("Unable to allocate memory for"
-			" Task Management command!\n");
+	if (!cmd->tmr_req)
 		return iscsit_add_reject_cmd(cmd,
 					     ISCSI_REASON_BOOKMARK_NO_RESOURCES,
 					     buf);
-	}
 
 	/*
 	 * TASK_REASSIGN for ERL=2 / connection stays inside of
@@ -2264,11 +2251,9 @@ iscsit_handle_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 		struct kvec iov[3];
 
 		text_in = kzalloc(payload_length, GFP_KERNEL);
-		if (!text_in) {
-			pr_err("Unable to allocate memory for"
-				" incoming text parameters\n");
+		if (!text_in)
 			goto reject;
-		}
+
 		cmd->text_in_ptr = text_in;
 
 		memset(iov, 0, 3 * sizeof(struct kvec));
@@ -3352,11 +3337,9 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
 			 SENDTARGETS_BUF_LIMIT);
 
 	payload = kzalloc(buffer_len, GFP_KERNEL);
-	if (!payload) {
-		pr_err("Unable to allocate memory for sendtargets"
-				" response.\n");
+	if (!payload)
 		return -ENOMEM;
-	}
+
 	/*
 	 * Locate pointer to iqn./eui. string for ICF_SENDTARGETS_SINGLE
 	 * explicit case..
-- 
cgit v1.2.3


From 3829f38169aaf38c764afba852e320d8f3b006de Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sun, 9 Apr 2017 16:00:39 +0200
Subject: iscsi-target: Improve size determinations in four functions

Replace the specification of four data structures by pointer dereferences
as the parameter for the operator "sizeof" to make the corresponding size
determinations a bit safer according to the Linux coding style convention.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 16750da1584a..0f7ade04b583 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -128,7 +128,7 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
 		return ERR_PTR(-EINVAL);
 	}
 
-	tiqn = kzalloc(sizeof(struct iscsi_tiqn), GFP_KERNEL);
+	tiqn = kzalloc(sizeof(*tiqn), GFP_KERNEL);
 	if (!tiqn)
 		return ERR_PTR(-ENOMEM);
 
@@ -360,7 +360,7 @@ struct iscsi_np *iscsit_add_np(
 		return np;
 	}
 
-	np = kzalloc(sizeof(struct iscsi_np), GFP_KERNEL);
+	np = kzalloc(sizeof(*np), GFP_KERNEL);
 	if (!np) {
 		mutex_unlock(&np_lock);
 		return ERR_PTR(-ENOMEM);
@@ -693,8 +693,7 @@ static int __init iscsi_target_init_module(void)
 	int ret = 0, size;
 
 	pr_debug("iSCSI-Target "ISCSIT_VERSION"\n");
-
-	iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL);
+	iscsit_global = kzalloc(sizeof(*iscsit_global), GFP_KERNEL);
 	if (!iscsit_global)
 		return -1;
 
@@ -1986,8 +1985,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 		hdr->refcmdsn = cpu_to_be32(ISCSI_RESERVED_TAG);
 
 	cmd->data_direction = DMA_NONE;
-
-	cmd->tmr_req = kzalloc(sizeof(struct iscsi_tmr_req), GFP_KERNEL);
+	cmd->tmr_req = kzalloc(sizeof(*cmd->tmr_req), GFP_KERNEL);
 	if (!cmd->tmr_req)
 		return iscsit_add_reject_cmd(cmd,
 					     ISCSI_REASON_BOOKMARK_NO_RESOURCES,
-- 
cgit v1.2.3


From 55ec409202ff347d22673d90e01a159d3bd6d9cd Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sun, 9 Apr 2017 19:02:38 +0200
Subject: target: Use kcalloc() in two functions

* Multiplications for the size determination of memory allocations
  indicated that array data structures should be processed.
  Thus use the corresponding function "kcalloc".

  This issue was detected by using the Coccinelle software.

* Replace the specification of data structures by pointer dereferences
  to make the corresponding size determination a bit safer according to
  the Linux coding style convention.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_rd.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index 5f23f341f8d3..026857861107 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -210,8 +210,7 @@ static int rd_build_device_space(struct rd_dev *rd_dev)
 	total_sg_needed = rd_dev->rd_page_count;
 
 	sg_tables = (total_sg_needed / max_sg_per_table) + 1;
-
-	sg_table = kzalloc(sg_tables * sizeof(struct rd_dev_sg_table), GFP_KERNEL);
+	sg_table = kcalloc(sg_tables, sizeof(*sg_table), GFP_KERNEL);
 	if (!sg_table) {
 		pr_err("Unable to allocate memory for Ramdisk"
 		       " scatterlist tables\n");
@@ -271,8 +270,7 @@ static int rd_build_prot_space(struct rd_dev *rd_dev, int prot_length, int block
 	total_sg_needed = (rd_dev->rd_page_count * prot_length / block_size) + 1;
 
 	sg_tables = (total_sg_needed / max_sg_per_table) + 1;
-
-	sg_table = kzalloc(sg_tables * sizeof(struct rd_dev_sg_table), GFP_KERNEL);
+	sg_table = kcalloc(sg_tables, sizeof(*sg_table), GFP_KERNEL);
 	if (!sg_table) {
 		pr_err("Unable to allocate memory for Ramdisk protection"
 		       " scatterlist tables\n");
-- 
cgit v1.2.3


From fbc4040bf67b10ee66097c9603550773a3bc5c06 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sun, 9 Apr 2017 19:20:26 +0200
Subject: target: Delete error messages for failed memory allocations

The script "checkpatch.pl" pointed information out like the following.

WARNING: Possible unnecessary 'out of memory' message

Thus remove such statements here.

Link: http://events.linuxfoundation.org/sites/events/files/slides/LCJ16-Refactor_Strings-WSang_0.pdf
Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_rd.c | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index 026857861107..838dc128d494 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -48,10 +48,8 @@ static int rd_attach_hba(struct se_hba *hba, u32 host_id)
 	struct rd_host *rd_host;
 
 	rd_host = kzalloc(sizeof(struct rd_host), GFP_KERNEL);
-	if (!rd_host) {
-		pr_err("Unable to allocate memory for struct rd_host\n");
+	if (!rd_host)
 		return -ENOMEM;
-	}
 
 	rd_host->rd_host_id = host_id;
 
@@ -148,11 +146,8 @@ static int rd_allocate_sgl_table(struct rd_dev *rd_dev, struct rd_dev_sg_table *
 
 		sg = kcalloc(sg_per_table + chain_entry, sizeof(*sg),
 				GFP_KERNEL);
-		if (!sg) {
-			pr_err("Unable to allocate scatterlist array"
-				" for struct rd_dev\n");
+		if (!sg)
 			return -ENOMEM;
-		}
 
 		sg_init_table(sg, sg_per_table + chain_entry);
 
@@ -211,11 +206,8 @@ static int rd_build_device_space(struct rd_dev *rd_dev)
 
 	sg_tables = (total_sg_needed / max_sg_per_table) + 1;
 	sg_table = kcalloc(sg_tables, sizeof(*sg_table), GFP_KERNEL);
-	if (!sg_table) {
-		pr_err("Unable to allocate memory for Ramdisk"
-		       " scatterlist tables\n");
+	if (!sg_table)
 		return -ENOMEM;
-	}
 
 	rd_dev->sg_table_array = sg_table;
 	rd_dev->sg_table_count = sg_tables;
@@ -271,11 +263,8 @@ static int rd_build_prot_space(struct rd_dev *rd_dev, int prot_length, int block
 
 	sg_tables = (total_sg_needed / max_sg_per_table) + 1;
 	sg_table = kcalloc(sg_tables, sizeof(*sg_table), GFP_KERNEL);
-	if (!sg_table) {
-		pr_err("Unable to allocate memory for Ramdisk protection"
-		       " scatterlist tables\n");
+	if (!sg_table)
 		return -ENOMEM;
-	}
 
 	rd_dev->sg_prot_array = sg_table;
 	rd_dev->sg_prot_count = sg_tables;
@@ -297,10 +286,8 @@ static struct se_device *rd_alloc_device(struct se_hba *hba, const char *name)
 	struct rd_host *rd_host = hba->hba_ptr;
 
 	rd_dev = kzalloc(sizeof(struct rd_dev), GFP_KERNEL);
-	if (!rd_dev) {
-		pr_err("Unable to allocate memory for struct rd_dev\n");
+	if (!rd_dev)
 		return NULL;
-	}
 
 	rd_dev->rd_host = rd_host;
 
-- 
cgit v1.2.3


From 5d68fb72d3780555752bcf2526f759f953e08bc3 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sun, 9 Apr 2017 20:15:12 +0200
Subject: target: Improve size determinations in two functions

Replace the specification of two data structures by pointer dereferences
as the parameter for the operator "sizeof" to make the corresponding size
determinations a bit safer according to the Linux coding style convention.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_rd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index 838dc128d494..20253d04103f 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -47,7 +47,7 @@ static int rd_attach_hba(struct se_hba *hba, u32 host_id)
 {
 	struct rd_host *rd_host;
 
-	rd_host = kzalloc(sizeof(struct rd_host), GFP_KERNEL);
+	rd_host = kzalloc(sizeof(*rd_host), GFP_KERNEL);
 	if (!rd_host)
 		return -ENOMEM;
 
@@ -285,7 +285,7 @@ static struct se_device *rd_alloc_device(struct se_hba *hba, const char *name)
 	struct rd_dev *rd_dev;
 	struct rd_host *rd_host = hba->hba_ptr;
 
-	rd_dev = kzalloc(sizeof(struct rd_dev), GFP_KERNEL);
+	rd_dev = kzalloc(sizeof(*rd_dev), GFP_KERNEL);
 	if (!rd_dev)
 		return NULL;
 
-- 
cgit v1.2.3


From f318aef55fed0968af42ceef3976ee7cd858d845 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sun, 9 Apr 2017 20:25:11 +0200
Subject: target: Use kmalloc_array() in compare_and_write_callback()

* A multiplication for the size determination of a memory allocation
  indicated that an array data structure should be processed.
  Thus use the corresponding function "kmalloc_array".

  This issue was detected by using the Coccinelle software.

* Replace the specification of a data structure by a pointer dereference
  to make the corresponding size determination a bit safer according to
  the Linux coding style convention.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index ee35c90e3b8d..a7fa4a7339db 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -519,8 +519,8 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes
 		goto out;
 	}
 
-	write_sg = kmalloc(sizeof(struct scatterlist) * cmd->t_data_nents,
-			   GFP_KERNEL);
+	write_sg = kmalloc_array(cmd->t_data_nents, sizeof(*write_sg),
+				 GFP_KERNEL);
 	if (!write_sg) {
 		pr_err("Unable to allocate compare_and_write sg\n");
 		ret = TCM_OUT_OF_RESOURCES;
-- 
cgit v1.2.3


From df6751f3401f86d87158279850aa9bedeef2d504 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sun, 9 Apr 2017 21:07:14 +0200
Subject: target: Use kmalloc_array() in transport_kmap_data_sg()

A multiplication for the size determination of a memory allocation
indicated that an array data structure should be processed.
Thus use the corresponding function "kmalloc_array".

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index a0cd56ee5fe9..37f57357d4a0 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2311,7 +2311,7 @@ void *transport_kmap_data_sg(struct se_cmd *cmd)
 		return kmap(sg_page(sg)) + sg->offset;
 
 	/* >1 page. use vmap */
-	pages = kmalloc(sizeof(*pages) * cmd->t_data_nents, GFP_KERNEL);
+	pages = kmalloc_array(cmd->t_data_nents, sizeof(*pages), GFP_KERNEL);
 	if (!pages)
 		return NULL;
 
-- 
cgit v1.2.3


From 38d454f0b5fab60d76eed5dcdfc7823d43d087ab Mon Sep 17 00:00:00 2001
From: Christophe Vu-Brugier <cvubrugier@fastmail.fm>
Date: Wed, 12 Apr 2017 21:59:08 +0200
Subject: Documentation/target: add an example script to configure an iSCSI
 target

The script illustrates how to interact with configfs to create a very
simple LIO iSCSI target with a file or block device backstore.

The script can serve as a starting point for people that cannot use
targetcli because Python is not available on their machine.

Signed-off-by: Christophe Vu-Brugier <cvubrugier@fastmail.fm>
Signed-off-by: Vincent Donnefort <vdonnefort@gmail.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 Documentation/target/target-export-device | 80 +++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100755 Documentation/target/target-export-device

diff --git a/Documentation/target/target-export-device b/Documentation/target/target-export-device
new file mode 100755
index 000000000000..b803f4f886b5
--- /dev/null
+++ b/Documentation/target/target-export-device
@@ -0,0 +1,80 @@
+#!/bin/sh
+#
+# This script illustrates the sequence of operations in configfs to
+# create a very simple LIO iSCSI target with a file or block device
+# backstore.
+#
+# (C) Copyright 2014 Christophe Vu-Brugier <cvubrugier@fastmail.fm>
+#
+
+print_usage() {
+    cat <<EOF
+Usage: $(basename $0) [-p PORTAL] DEVICE|FILE
+Export a block device or a file as an iSCSI target with a single LUN
+EOF
+}
+
+die() {
+    echo $1
+    exit 1
+}
+
+while getopts "hp:" arg; do
+    case $arg in
+        h) print_usage; exit 0;;
+        p) PORTAL=${OPTARG};;
+    esac
+done
+shift $(($OPTIND - 1))
+
+DEVICE=$1
+[ -n "$DEVICE" ] || die "Missing device or file argument"
+[ -b $DEVICE -o -f $DEVICE ] || die "Invalid device or file: ${DEVICE}"
+IQN="iqn.2003-01.org.linux-iscsi.$(hostname):$(basename $DEVICE)"
+[ -n "$PORTAL" ] || PORTAL="0.0.0.0:3260"
+
+CONFIGFS=/sys/kernel/config
+CORE_DIR=$CONFIGFS/target/core
+ISCSI_DIR=$CONFIGFS/target/iscsi
+
+# Load the target modules and mount the config file system
+lsmod | grep -q configfs || modprobe configfs
+lsmod | grep -q target_core_mod || modprobe target_core_mod
+mount | grep -q ^configfs || mount -t configfs none $CONFIGFS
+mkdir -p $ISCSI_DIR
+
+# Create a backstore
+if [ -b $DEVICE ]; then
+    BACKSTORE_DIR=$CORE_DIR/iblock_0/data
+    mkdir -p $BACKSTORE_DIR
+    echo "udev_path=${DEVICE}" > $BACKSTORE_DIR/control
+else
+    BACKSTORE_DIR=$CORE_DIR/fileio_0/data
+    mkdir -p $BACKSTORE_DIR
+    DEVICE_SIZE=$(du -b $DEVICE | cut -f1)
+    echo "fd_dev_name=${DEVICE}" > $BACKSTORE_DIR/control
+    echo "fd_dev_size=${DEVICE_SIZE}" > $BACKSTORE_DIR/control
+    echo 1 > $BACKSTORE_DIR/attrib/emulate_write_cache
+fi
+echo 1 > $BACKSTORE_DIR/enable
+
+# Create an iSCSI target and a target portal group (TPG)
+mkdir $ISCSI_DIR/$IQN
+mkdir $ISCSI_DIR/$IQN/tpgt_1/
+
+# Create a LUN
+mkdir $ISCSI_DIR/$IQN/tpgt_1/lun/lun_0
+ln -s $BACKSTORE_DIR $ISCSI_DIR/$IQN/tpgt_1/lun/lun_0/data
+echo 1 > $ISCSI_DIR/$IQN/tpgt_1/enable
+
+# Create a network portal
+mkdir $ISCSI_DIR/$IQN/tpgt_1/np/$PORTAL
+
+# Disable authentication
+echo 0 > $ISCSI_DIR/$IQN/tpgt_1/attrib/authentication
+echo 1 > $ISCSI_DIR/$IQN/tpgt_1/attrib/generate_node_acls
+
+# Allow write access for non authenticated initiators
+echo 0 > $ISCSI_DIR/$IQN/tpgt_1/attrib/demo_mode_write_protect
+
+echo "Target ${IQN}, portal ${PORTAL} has been created"
-- 
cgit v1.2.3


From c2d26f18dcbc84799457852292c66967ff6626f1 Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Tue, 18 Apr 2017 14:10:05 -0500
Subject: target: Add WRITE_VERIFY_16

This patch addresses clients who needs write_verify_16 for
large volume groups such as AIX.

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c | 2 ++
 include/scsi/scsi_proto.h        | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index a7fa4a7339db..f9250b3c3fd4 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -850,6 +850,7 @@ static sense_reason_t sbc_parse_verify(struct se_cmd *cmd, int *sectors,
 		cmd->t_task_lba = transport_lba_32(cdb);
 		break;
 	case VERIFY_16:
+	case WRITE_VERIFY_16:
 		*sectors = transport_get_sectors_16(cdb);
 		cmd->t_task_lba = transport_lba_64(cdb);
 		break;
@@ -962,6 +963,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		cmd->execute_cmd = sbc_execute_rw;
 		break;
 	case WRITE_VERIFY:
+	case WRITE_VERIFY_16:
 		ret = sbc_parse_verify(cmd, &sectors, &size);
 		if (ret)
 			return ret;
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index 6ba66e01f6df..ce78ec8e367d 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -112,6 +112,7 @@
 #define WRITE_16              0x8a
 #define READ_ATTRIBUTE        0x8c
 #define WRITE_ATTRIBUTE	      0x8d
+#define WRITE_VERIFY_16	      0x8e
 #define VERIFY_16	      0x8f
 #define SYNCHRONIZE_CACHE_16  0x91
 #define WRITE_SAME_16	      0x93
-- 
cgit v1.2.3


From 4ec5bf0ea83930b96addf6b78225bf0355459d7f Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Fri, 21 Apr 2017 20:40:50 -0500
Subject: target/user: PGR Support

This adds initial PGR support for just TCMU, since tcmu doesn't
have the necessary IT_NEXUS info to process PGR in userspace,
so have those commands be processed in kernel.

HA support is not available yet, we will work on it if this patch
is acceptable.

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_configfs.c | 10 ++++-----
 drivers/target/target_core_device.c   | 38 +++++++++++++++++++++++++++++++++++
 drivers/target/target_core_pr.c       |  2 +-
 drivers/target/target_core_pscsi.c    |  3 ++-
 include/target/target_core_backend.h  |  1 +
 5 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index a34a86652b96..e7b62bc239a7 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -1400,7 +1400,7 @@ static ssize_t target_pr_res_holder_show(struct config_item *item, char *page)
 	struct se_device *dev = pr_to_dev(item);
 	int ret;
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return sprintf(page, "Passthrough\n");
 
 	spin_lock(&dev->dev_reservation_lock);
@@ -1540,7 +1540,7 @@ static ssize_t target_pr_res_type_show(struct config_item *item, char *page)
 {
 	struct se_device *dev = pr_to_dev(item);
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return sprintf(page, "SPC_PASSTHROUGH\n");
 	else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
 		return sprintf(page, "SPC2_RESERVATIONS\n");
@@ -1553,7 +1553,7 @@ static ssize_t target_pr_res_aptpl_active_show(struct config_item *item,
 {
 	struct se_device *dev = pr_to_dev(item);
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return 0;
 
 	return sprintf(page, "APTPL Bit Status: %s\n",
@@ -1565,7 +1565,7 @@ static ssize_t target_pr_res_aptpl_metadata_show(struct config_item *item,
 {
 	struct se_device *dev = pr_to_dev(item);
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return 0;
 
 	return sprintf(page, "Ready to process PR APTPL metadata..\n");
@@ -1611,7 +1611,7 @@ static ssize_t target_pr_res_aptpl_metadata_store(struct config_item *item,
 	u16 tpgt = 0;
 	u8 type = 0;
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return count;
 	if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
 		return count;
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index c754ae33bf7b..c4845678eb91 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1045,6 +1045,8 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 	sense_reason_t (*exec_cmd)(struct se_cmd *cmd))
 {
 	unsigned char *cdb = cmd->t_task_cdb;
+	struct se_device *dev = cmd->se_dev;
+	unsigned int size;
 
 	/*
 	 * Clear a lun set in the cdb if the initiator talking to use spoke
@@ -1076,6 +1078,42 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 		return TCM_NO_SENSE;
 	}
 
+	/*
+	 * For PERSISTENT RESERVE IN/OUT, RELEASE, and RESERVE we need to
+	 * emulate the response, since tcmu does not have the information
+	 * required to process these commands.
+	 */
+	if (!(dev->transport->transport_flags &
+	      TRANSPORT_FLAG_PASSTHROUGH_PGR)) {
+		if (cdb[0] == PERSISTENT_RESERVE_IN) {
+			cmd->execute_cmd = target_scsi3_emulate_pr_in;
+			size = (cdb[7] << 8) + cdb[8];
+			return target_cmd_size_check(cmd, size);
+		}
+		if (cdb[0] == PERSISTENT_RESERVE_OUT) {
+			cmd->execute_cmd = target_scsi3_emulate_pr_out;
+			size = (cdb[7] << 8) + cdb[8];
+			return target_cmd_size_check(cmd, size);
+		}
+
+		if (cdb[0] == RELEASE || cdb[0] == RELEASE_10) {
+			cmd->execute_cmd = target_scsi2_reservation_release;
+			if (cdb[0] == RELEASE_10)
+				size = (cdb[7] << 8) | cdb[8];
+			else
+				size = cmd->data_length;
+			return target_cmd_size_check(cmd, size);
+		}
+		if (cdb[0] == RESERVE || cdb[0] == RESERVE_10) {
+			cmd->execute_cmd = target_scsi2_reservation_reserve;
+			if (cdb[0] == RESERVE_10)
+				size = (cdb[7] << 8) | cdb[8];
+			else
+				size = cmd->data_length;
+			return target_cmd_size_check(cmd, size);
+		}
+	}
+
 	/* Set DATA_CDB flag for ops that should have it */
 	switch (cdb[0]) {
 	case READ_6:
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index e18051185846..129ca572673c 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -4147,7 +4147,7 @@ target_check_reservation(struct se_cmd *cmd)
 		return 0;
 	if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
 		return 0;
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
 		return 0;
 
 	spin_lock(&dev->dev_reservation_lock);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 94cda7991e80..8943a62eb203 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1081,7 +1081,8 @@ static const struct target_backend_ops pscsi_ops = {
 	.name			= "pscsi",
 	.owner			= THIS_MODULE,
 	.transport_flags	= TRANSPORT_FLAG_PASSTHROUGH |
-				  TRANSPORT_FLAG_PASSTHROUGH_ALUA,
+				  TRANSPORT_FLAG_PASSTHROUGH_ALUA |
+				  TRANSPORT_FLAG_PASSTHROUGH_PGR,
 	.attach_hba		= pscsi_attach_hba,
 	.detach_hba		= pscsi_detach_hba,
 	.pmode_enable_hba	= pscsi_pmode_enable_hba,
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 1b0f447ce850..e475531565fd 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -10,6 +10,7 @@
  * backend module.
  */
 #define TRANSPORT_FLAG_PASSTHROUGH_ALUA		0x2
+#define TRANSPORT_FLAG_PASSTHROUGH_PGR          0x4
 
 struct request_queue;
 struct scatterlist;
-- 
cgit v1.2.3


From c0dcafd8c52e0c36588d4d14c2ef4288830bc461 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 28 Apr 2017 10:03:38 +0200
Subject: target: fixup error message in
 target_tg_pt_gp_alua_access_type_store()

When setting up a target the error message:

Unable to do set ##_name ALUA state on non valid tg_pt_gp ID: 0

is displayed.
Apparently concatenation doesn't work in a string; one should be using
implicit string concatenation here.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_configfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index e7b62bc239a7..9e6ba367ae70 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -2545,7 +2545,7 @@ static ssize_t target_tg_pt_gp_alua_support_##_name##_store(		\
 	int ret;							\
 									\
 	if (!t->tg_pt_gp_valid_id) {					\
-		pr_err("Unable to do set ##_name ALUA state on non"	\
+		pr_err("Unable to do set " #_name " ALUA state on non"	\
 		       " valid tg_pt_gp ID: %hu\n",			\
 		       t->tg_pt_gp_valid_id);				\
 		return -EINVAL;						\
-- 
cgit v1.2.3


From 3d035237a57777b7672eb116133da01493b607c1 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 28 Apr 2017 10:04:04 +0200
Subject: target: fixup error message in target_tg_pt_gp_tg_pt_gp_id_store()

When setting up an ALUA target port group with an invalid ID the
error message

kstrtoul() returned -22 for tg_pt_gp_id

is displayed, which is not really helpful.
Convert it to something sane.
And while we're at it, join the messages onto a single line.

Signed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_configfs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 9e6ba367ae70..0326607e5ab8 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -2677,13 +2677,13 @@ static ssize_t target_tg_pt_gp_tg_pt_gp_id_store(struct config_item *item,
 
 	ret = kstrtoul(page, 0, &tg_pt_gp_id);
 	if (ret < 0) {
-		pr_err("kstrtoul() returned %d for"
-			" tg_pt_gp_id\n", ret);
+		pr_err("ALUA tg_pt_gp_id: invalid value '%s' for tg_pt_gp_id\n",
+		       page);
 		return ret;
 	}
 	if (tg_pt_gp_id > 0x0000ffff) {
-		pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum:"
-			" 0x0000ffff\n", tg_pt_gp_id);
+		pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum: 0x0000ffff\n",
+		       tg_pt_gp_id);
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 141685a39151aea95eb56562d2953e919c6c73da Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Tue, 2 May 2017 11:38:05 +0800
Subject: tcmu: Add dynamic growing data area feature support

Currently for the TCMU, the ring buffer size is fixed to 64K cmd
area + 1M data area, and this will be bottlenecks for high iops.

The struct tcmu_cmd_entry {} size is fixed about 112 bytes with
iovec[N] & N <= 4, and the size of struct iovec is about 16 bytes.

If N == 0, the ratio will be sizeof(cmd entry) : sizeof(datas) ==
112Bytes : (N * 4096)Bytes = 28 : 0, no data area is need.

If 0 < N <=4, the ratio will be sizeof(cmd entry) : sizeof(datas)
== 112Bytes : (N * 4096)Bytes = 28 : (N * 1024), so the max will
be 28 : 1024.

If N > 4, the sizeof(cmd entry) will be [(N - 4) *16 + 112] bytes,
and its corresponding data size will be [N * 4096], so the ratio
of sizeof(cmd entry) : sizeof(datas) == [(N - 4) * 16 + 112)Bytes
: (N * 4096)Bytes == 4/1024 - 12/(N * 1024), so the max is about
4 : 1024.

When N is bigger, the ratio will be smaller.

As the initial patch, we will set the cmd area size to 2M, and
the cmd area size to 32M. The TCMU will dynamically grows the data
area from 0 to max 32M size as needed.

The cmd area memory will be allocated through vmalloc(), and the
data area's blocks will be allocated individually later when needed.

The allocated data area block memory will be managed via radix tree.
For now the bitmap still be the most efficient way to search and
manage the block index, this could be update later.

Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Signed-off-by: Jianfei Hu <hujianfei@cmss.chinamobile.com>
Acked-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 338 ++++++++++++++++++++++++++------------
 1 file changed, 237 insertions(+), 101 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index f615c3bbb73e..02cf543a888d 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -2,6 +2,7 @@
  * Copyright (C) 2013 Shaohua Li <shli@kernel.org>
  * Copyright (C) 2014 Red Hat, Inc.
  * Copyright (C) 2015 Arrikto, Inc.
+ * Copyright (C) 2017 Chinamobile, Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -25,6 +26,7 @@
 #include <linux/parser.h>
 #include <linux/vmalloc.h>
 #include <linux/uio_driver.h>
+#include <linux/radix-tree.h>
 #include <linux/stringify.h>
 #include <linux/bitops.h>
 #include <linux/highmem.h>
@@ -63,15 +65,17 @@
  * this may have a 'UAM' comment.
  */
 
-
 #define TCMU_TIME_OUT (30 * MSEC_PER_SEC)
 
-#define DATA_BLOCK_BITS 256
-#define DATA_BLOCK_SIZE 4096
+/* For cmd area, the size is fixed 2M */
+#define CMDR_SIZE (2 * 1024 * 1024)
 
-#define CMDR_SIZE (16 * 4096)
+/* For data area, the size is fixed 32M */
+#define DATA_BLOCK_BITS (8 * 1024)
+#define DATA_BLOCK_SIZE 4096
 #define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE)
 
+/* The ring buffer size is 34M */
 #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
 
 static struct device *tcmu_root_device;
@@ -103,12 +107,14 @@ struct tcmu_dev {
 	size_t data_off;
 	size_t data_size;
 
-	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
-
 	wait_queue_head_t wait_cmdr;
 	/* TODO should this be a mutex? */
 	spinlock_t cmdr_lock;
 
+	uint32_t dbi_max;
+	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
+	struct radix_tree_root data_blocks;
+
 	struct idr commands;
 	spinlock_t commands_lock;
 
@@ -130,7 +136,9 @@ struct tcmu_cmd {
 
 	/* Can't use se_cmd when cleaning up expired cmds, because if
 	   cmd has been completed then accessing se_cmd is off limits */
-	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
+	uint32_t dbi_cnt;
+	uint32_t dbi_cur;
+	uint32_t *dbi;
 
 	unsigned long deadline;
 
@@ -161,6 +169,84 @@ static struct genl_family tcmu_genl_family __ro_after_init = {
 	.netnsok = true,
 };
 
+#define tcmu_cmd_set_dbi_cur(cmd, index) ((cmd)->dbi_cur = (index))
+#define tcmu_cmd_reset_dbi_cur(cmd) tcmu_cmd_set_dbi_cur(cmd, 0)
+#define tcmu_cmd_set_dbi(cmd, index) ((cmd)->dbi[(cmd)->dbi_cur++] = (index))
+#define tcmu_cmd_get_dbi(cmd) ((cmd)->dbi[(cmd)->dbi_cur++])
+
+static void tcmu_cmd_free_data(struct tcmu_cmd *tcmu_cmd)
+{
+	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
+	uint32_t i;
+
+	for (i = 0; i < tcmu_cmd->dbi_cnt; i++)
+		clear_bit(tcmu_cmd->dbi[i], udev->data_bitmap);
+}
+
+static int tcmu_get_empty_block(struct tcmu_dev *udev, void **addr)
+{
+	void *p;
+	uint32_t dbi;
+	int ret;
+
+	dbi = find_first_zero_bit(udev->data_bitmap, DATA_BLOCK_BITS);
+	if (dbi > udev->dbi_max)
+		udev->dbi_max = dbi;
+
+	set_bit(dbi, udev->data_bitmap);
+
+	p = radix_tree_lookup(&udev->data_blocks, dbi);
+	if (!p) {
+		p = kzalloc(DATA_BLOCK_SIZE, GFP_ATOMIC);
+		if (!p) {
+			clear_bit(dbi, udev->data_bitmap);
+			return -ENOMEM;
+		}
+
+		ret = radix_tree_insert(&udev->data_blocks, dbi, p);
+		if (ret) {
+			kfree(p);
+			clear_bit(dbi, udev->data_bitmap);
+			return ret;
+		}
+	}
+
+	*addr = p;
+	return dbi;
+}
+
+static void *tcmu_get_block_addr(struct tcmu_dev *udev, uint32_t dbi)
+{
+	return radix_tree_lookup(&udev->data_blocks, dbi);
+}
+
+static inline void tcmu_free_cmd(struct tcmu_cmd *tcmu_cmd)
+{
+	kfree(tcmu_cmd->dbi);
+	kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+}
+
+static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd)
+{
+	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
+	size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE);
+
+	if (se_cmd->se_cmd_flags & SCF_BIDI) {
+		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
+		data_length += round_up(se_cmd->t_bidi_data_sg->length,
+				DATA_BLOCK_SIZE);
+	}
+
+	return data_length;
+}
+
+static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd)
+{
+	size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
+
+	return data_length / DATA_BLOCK_SIZE;
+}
+
 static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 {
 	struct se_device *se_dev = se_cmd->se_dev;
@@ -178,6 +264,15 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 		tcmu_cmd->deadline = jiffies +
 					msecs_to_jiffies(udev->cmd_time_out);
 
+	tcmu_cmd_reset_dbi_cur(tcmu_cmd);
+	tcmu_cmd->dbi_cnt = tcmu_cmd_get_block_cnt(tcmu_cmd);
+	tcmu_cmd->dbi = kcalloc(tcmu_cmd->dbi_cnt, sizeof(uint32_t),
+				GFP_KERNEL);
+	if (!tcmu_cmd->dbi) {
+		kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+		return NULL;
+	}
+
 	idr_preload(GFP_KERNEL);
 	spin_lock_irq(&udev->commands_lock);
 	cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 0,
@@ -186,7 +281,7 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 	idr_preload_end();
 
 	if (cmd_id < 0) {
-		kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+		tcmu_free_cmd(tcmu_cmd);
 		return NULL;
 	}
 	tcmu_cmd->cmd_id = cmd_id;
@@ -248,10 +343,10 @@ static inline void new_iov(struct iovec **iov, int *iov_cnt,
 #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size)
 
 /* offset is relative to mb_addr */
-static inline size_t get_block_offset(struct tcmu_dev *dev,
-		int block, int remaining)
+static inline size_t get_block_offset_user(struct tcmu_dev *dev,
+		int dbi, int remaining)
 {
-	return dev->data_off + block * DATA_BLOCK_SIZE +
+	return dev->data_off + dbi * DATA_BLOCK_SIZE +
 		DATA_BLOCK_SIZE - remaining;
 }
 
@@ -260,14 +355,15 @@ static inline size_t iov_tail(struct tcmu_dev *udev, struct iovec *iov)
 	return (size_t)iov->iov_base + iov->iov_len;
 }
 
-static void alloc_and_scatter_data_area(struct tcmu_dev *udev,
-	struct scatterlist *data_sg, unsigned int data_nents,
-	struct iovec **iov, int *iov_cnt, bool copy_data)
+static int alloc_and_scatter_data_area(struct tcmu_dev *udev,
+	struct tcmu_cmd *tcmu_cmd, struct scatterlist *data_sg,
+	unsigned int data_nents, struct iovec **iov,
+	int *iov_cnt, bool copy_data)
 {
-	int i, block;
+	int i, dbi;
 	int block_remaining = 0;
-	void *from, *to;
-	size_t copy_bytes, to_offset;
+	void *from, *to = NULL;
+	size_t copy_bytes, to_offset, offset;
 	struct scatterlist *sg;
 
 	for_each_sg(data_sg, sg, data_nents, i) {
@@ -275,22 +371,28 @@ static void alloc_and_scatter_data_area(struct tcmu_dev *udev,
 		from = kmap_atomic(sg_page(sg)) + sg->offset;
 		while (sg_remaining > 0) {
 			if (block_remaining == 0) {
-				block = find_first_zero_bit(udev->data_bitmap,
-						DATA_BLOCK_BITS);
 				block_remaining = DATA_BLOCK_SIZE;
-				set_bit(block, udev->data_bitmap);
+				dbi = tcmu_get_empty_block(udev, &to);
+				if (dbi < 0) {
+					kunmap_atomic(from - sg->offset);
+					return dbi;
+				}
+				tcmu_cmd_set_dbi(tcmu_cmd, dbi);
 			}
+
 			copy_bytes = min_t(size_t, sg_remaining,
 					block_remaining);
-			to_offset = get_block_offset(udev, block,
+			to_offset = get_block_offset_user(udev, dbi,
 					block_remaining);
-			to = (void *)udev->mb_addr + to_offset;
+			offset = DATA_BLOCK_SIZE - block_remaining;
+			to = (void *)(unsigned long)to + offset;
+
 			if (*iov_cnt != 0 &&
 			    to_offset == iov_tail(udev, *iov)) {
 				(*iov)->iov_len += copy_bytes;
 			} else {
 				new_iov(iov, iov_cnt, udev);
-				(*iov)->iov_base = (void __user *) to_offset;
+				(*iov)->iov_base = (void __user *)to_offset;
 				(*iov)->iov_len = copy_bytes;
 			}
 			if (copy_data) {
@@ -303,33 +405,26 @@ static void alloc_and_scatter_data_area(struct tcmu_dev *udev,
 		}
 		kunmap_atomic(from - sg->offset);
 	}
-}
 
-static void free_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd)
-{
-	bitmap_xor(udev->data_bitmap, udev->data_bitmap, cmd->data_bitmap,
-		   DATA_BLOCK_BITS);
+	return 0;
 }
 
 static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 			     bool bidi)
 {
 	struct se_cmd *se_cmd = cmd->se_cmd;
-	int i, block;
+	int i, dbi;
 	int block_remaining = 0;
 	void *from, *to;
-	size_t copy_bytes, from_offset;
+	size_t copy_bytes, offset;
 	struct scatterlist *sg, *data_sg;
 	unsigned int data_nents;
-	DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
-
-	bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
+	uint32_t count = 0;
 
 	if (!bidi) {
 		data_sg = se_cmd->t_data_sg;
 		data_nents = se_cmd->t_data_nents;
 	} else {
-		uint32_t count;
 
 		/*
 		 * For bidi case, the first count blocks are for Data-Out
@@ -337,30 +432,26 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 		 * the Data-Out buffer blocks should be discarded.
 		 */
 		count = DIV_ROUND_UP(se_cmd->data_length, DATA_BLOCK_SIZE);
-		while (count--) {
-			block = find_first_bit(bitmap, DATA_BLOCK_BITS);
-			clear_bit(block, bitmap);
-		}
 
 		data_sg = se_cmd->t_bidi_data_sg;
 		data_nents = se_cmd->t_bidi_data_nents;
 	}
 
+	tcmu_cmd_set_dbi_cur(cmd, count);
+
 	for_each_sg(data_sg, sg, data_nents, i) {
 		int sg_remaining = sg->length;
 		to = kmap_atomic(sg_page(sg)) + sg->offset;
 		while (sg_remaining > 0) {
 			if (block_remaining == 0) {
-				block = find_first_bit(bitmap,
-						DATA_BLOCK_BITS);
 				block_remaining = DATA_BLOCK_SIZE;
-				clear_bit(block, bitmap);
+				dbi = tcmu_cmd_get_dbi(cmd);
+				from = tcmu_get_block_addr(udev, dbi);
 			}
 			copy_bytes = min_t(size_t, sg_remaining,
 					block_remaining);
-			from_offset = get_block_offset(udev, block,
-					block_remaining);
-			from = (void *) udev->mb_addr + from_offset;
+			offset = DATA_BLOCK_SIZE - block_remaining;
+			from = (void *)(unsigned long)from + offset;
 			tcmu_flush_dcache_range(from, copy_bytes);
 			memcpy(to + sg->length - sg_remaining, from,
 					copy_bytes);
@@ -420,27 +511,6 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d
 	return true;
 }
 
-static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd)
-{
-	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
-	size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE);
-
-	if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
-		data_length += round_up(se_cmd->t_bidi_data_sg->length,
-				DATA_BLOCK_SIZE);
-	}
-
-	return data_length;
-}
-
-static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd)
-{
-	size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
-
-	return data_length / DATA_BLOCK_SIZE;
-}
-
 static sense_reason_t
 tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 {
@@ -450,12 +520,11 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	struct tcmu_mailbox *mb;
 	struct tcmu_cmd_entry *entry;
 	struct iovec *iov;
-	int iov_cnt;
+	int iov_cnt, ret;
 	uint32_t cmd_head;
 	uint64_t cdb_off;
 	bool copy_to_data_area;
 	size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
-	DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS);
 
 	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -539,15 +608,19 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	entry->hdr.kflags = 0;
 	entry->hdr.uflags = 0;
 
-	bitmap_copy(old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS);
-
 	/* Handle allocating space from the data area */
 	iov = &entry->req.iov[0];
 	iov_cnt = 0;
 	copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE
 		|| se_cmd->se_cmd_flags & SCF_BIDI);
-	alloc_and_scatter_data_area(udev, se_cmd->t_data_sg,
-		se_cmd->t_data_nents, &iov, &iov_cnt, copy_to_data_area);
+	ret = alloc_and_scatter_data_area(udev, tcmu_cmd,
+			se_cmd->t_data_sg, se_cmd->t_data_nents,
+			&iov, &iov_cnt, copy_to_data_area);
+	if (ret) {
+		spin_unlock_irq(&udev->cmdr_lock);
+		pr_err("tcmu: alloc and scatter data failed\n");
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	}
 	entry->req.iov_cnt = iov_cnt;
 	entry->req.iov_dif_cnt = 0;
 
@@ -555,14 +628,17 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	if (se_cmd->se_cmd_flags & SCF_BIDI) {
 		iov_cnt = 0;
 		iov++;
-		alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg,
-				se_cmd->t_bidi_data_nents, &iov, &iov_cnt,
-				false);
+		ret = alloc_and_scatter_data_area(udev, tcmu_cmd,
+					se_cmd->t_bidi_data_sg,
+					se_cmd->t_bidi_data_nents,
+					&iov, &iov_cnt, false);
+		if (ret) {
+			spin_unlock_irq(&udev->cmdr_lock);
+			pr_err("tcmu: alloc and scatter bidi data failed\n");
+			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+		}
 		entry->req.iov_bidi_cnt = iov_cnt;
 	}
-	/* cmd's data_bitmap is what changed in process */
-	bitmap_xor(tcmu_cmd->data_bitmap, old_bitmap, udev->data_bitmap,
-			DATA_BLOCK_BITS);
 
 	/* All offsets relative to mb_addr, not start of entry! */
 	cdb_off = CMDR_OFF + cmd_head + base_command_size;
@@ -604,7 +680,7 @@ tcmu_queue_cmd(struct se_cmd *se_cmd)
 		idr_remove(&udev->commands, tcmu_cmd->cmd_id);
 		spin_unlock_irq(&udev->commands_lock);
 
-		kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+		tcmu_free_cmd(tcmu_cmd);
 	}
 
 	return ret;
@@ -615,44 +691,40 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
 	struct se_cmd *se_cmd = cmd->se_cmd;
 	struct tcmu_dev *udev = cmd->tcmu_dev;
 
-	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
-		/*
-		 * cmd has been completed already from timeout, just reclaim
-		 * data area space and free cmd
-		 */
-		free_data_area(udev, cmd);
+	/*
+	 * cmd has been completed already from timeout, just reclaim
+	 * data area space and free cmd
+	 */
+	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
+		goto out;
 
-		kmem_cache_free(tcmu_cmd_cache, cmd);
-		return;
-	}
+	tcmu_cmd_reset_dbi_cur(cmd);
 
 	if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) {
-		free_data_area(udev, cmd);
 		pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n",
 			cmd->se_cmd);
 		entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION;
 	} else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
 		memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
 			       se_cmd->scsi_sense_length);
-		free_data_area(udev, cmd);
 	} else if (se_cmd->se_cmd_flags & SCF_BIDI) {
 		/* Get Data-In buffer before clean up */
 		gather_data_area(udev, cmd, true);
-		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
 		gather_data_area(udev, cmd, false);
-		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_TO_DEVICE) {
-		free_data_area(udev, cmd);
+		/* TODO: */
 	} else if (se_cmd->data_direction != DMA_NONE) {
 		pr_warn("TCMU: data direction was %d!\n",
 			se_cmd->data_direction);
 	}
 
 	target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status);
-	cmd->se_cmd = NULL;
 
-	kmem_cache_free(tcmu_cmd_cache, cmd);
+out:
+	cmd->se_cmd = NULL;
+	tcmu_cmd_free_data(cmd);
+	tcmu_free_cmd(cmd);
 }
 
 static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
@@ -810,6 +882,54 @@ static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on)
 	return 0;
 }
 
+static void tcmu_blocks_release(struct tcmu_dev *udev, bool release_pending)
+{
+	uint32_t dbi, end;
+	void *addr;
+
+	spin_lock_irq(&udev->cmdr_lock);
+
+	end = udev->dbi_max + 1;
+
+	/* try to release all unused blocks */
+	dbi = find_first_zero_bit(udev->data_bitmap, end);
+	if (dbi >= end) {
+		spin_unlock_irq(&udev->cmdr_lock);
+		return;
+	}
+	do {
+		addr = radix_tree_delete(&udev->data_blocks, dbi);
+		kfree(addr);
+
+		dbi = find_next_zero_bit(udev->data_bitmap, end, dbi + 1);
+	} while (dbi < end);
+
+	if (!release_pending)
+		return;
+
+	/* try to release all pending blocks */
+	dbi = find_first_bit(udev->data_bitmap, end);
+	if (dbi >= end) {
+		spin_unlock_irq(&udev->cmdr_lock);
+		return;
+	}
+	do {
+		addr = radix_tree_delete(&udev->data_blocks, dbi);
+		kfree(addr);
+
+		dbi = find_next_bit(udev->data_bitmap, end, dbi + 1);
+	} while (dbi < end);
+
+	spin_unlock_irq(&udev->cmdr_lock);
+}
+
+static void tcmu_vma_close(struct vm_area_struct *vma)
+{
+	struct tcmu_dev *udev = vma->vm_private_data;
+
+	tcmu_blocks_release(udev, false);
+}
+
 /*
  * mmap code from uio.c. Copied here because we want to hook mmap()
  * and this stuff must come along.
@@ -845,17 +965,28 @@ static int tcmu_vma_fault(struct vm_fault *vmf)
 	 */
 	offset = (vmf->pgoff - mi) << PAGE_SHIFT;
 
-	addr = (void *)(unsigned long)info->mem[mi].addr + offset;
-	if (info->mem[mi].memtype == UIO_MEM_LOGICAL)
-		page = virt_to_page(addr);
-	else
+	if (offset < udev->data_off) {
+		/* For the vmalloc()ed cmd area pages */
+		addr = (void *)(unsigned long)info->mem[mi].addr + offset;
 		page = vmalloc_to_page(addr);
+	} else {
+		/* For the dynamically growing data area pages */
+		uint32_t dbi;
+
+		dbi = (offset - udev->data_off) / DATA_BLOCK_SIZE;
+		addr = tcmu_get_block_addr(udev, dbi);
+		if (!addr)
+			return VM_FAULT_NOPAGE;
+		page = virt_to_page(addr);
+	}
+
 	get_page(page);
 	vmf->page = page;
 	return 0;
 }
 
 static const struct vm_operations_struct tcmu_vm_ops = {
+	.close = tcmu_vma_close,
 	.fault = tcmu_vma_fault,
 };
 
@@ -963,7 +1094,7 @@ static int tcmu_configure_device(struct se_device *dev)
 
 	info->name = str;
 
-	udev->mb_addr = vzalloc(TCMU_RING_SIZE);
+	udev->mb_addr = vzalloc(CMDR_SIZE);
 	if (!udev->mb_addr) {
 		ret = -ENOMEM;
 		goto err_vzalloc;
@@ -972,8 +1103,9 @@ static int tcmu_configure_device(struct se_device *dev)
 	/* mailbox fits in first part of CMDR space */
 	udev->cmdr_size = CMDR_SIZE - CMDR_OFF;
 	udev->data_off = CMDR_SIZE;
-	udev->data_size = TCMU_RING_SIZE - CMDR_SIZE;
+	udev->data_size = DATA_SIZE;
 
+	/* Initialise the mailbox of the ring buffer */
 	mb = udev->mb_addr;
 	mb->version = TCMU_MAILBOX_VERSION;
 	mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC;
@@ -984,12 +1116,14 @@ static int tcmu_configure_device(struct se_device *dev)
 	WARN_ON(udev->data_size % PAGE_SIZE);
 	WARN_ON(udev->data_size % DATA_BLOCK_SIZE);
 
+	INIT_RADIX_TREE(&udev->data_blocks, GFP_ATOMIC);
+
 	info->version = __stringify(TCMU_MAILBOX_VERSION);
 
 	info->mem[0].name = "tcm-user command & data buffer";
 	info->mem[0].addr = (phys_addr_t)(uintptr_t)udev->mb_addr;
 	info->mem[0].size = TCMU_RING_SIZE;
-	info->mem[0].memtype = UIO_MEM_VIRTUAL;
+	info->mem[0].memtype = UIO_MEM_NONE;
 
 	info->irqcontrol = tcmu_irqcontrol;
 	info->irq = UIO_IRQ_CUSTOM;
@@ -1070,6 +1204,8 @@ static void tcmu_free_device(struct se_device *dev)
 	spin_unlock_irq(&udev->commands_lock);
 	WARN_ON(!all_expired);
 
+	tcmu_blocks_release(udev, true);
+
 	if (tcmu_dev_configured(udev)) {
 		tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
 				   udev->uio_info.uio_dev->minor);
-- 
cgit v1.2.3


From b6df4b79a5514a9c6c53533436704129ef45bf76 Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Tue, 2 May 2017 11:38:06 +0800
Subject: tcmu: Add global data block pool support

For each target there will be one ring, when the target number
grows larger and larger, it could eventually runs out of the
system memories.

In this patch for each target ring, currently for the cmd area
the size will be fixed to 8MB and for the data area the size
will grow from 0 to max 256K * PAGE_SIZE(1G for 4K page size).

For all the targets' data areas, they will get empty blocks
from the "global data block pool", which has limited to 512K *
PAGE_SIZE(2G for 4K page size) for now.

When the "global data block pool" has been used up, then any
target could wake up the unmap thread routine to shrink other
targets' data area memories. And the unmap thread routine will
always try to truncate the ring vma from the last using block
offset.

When user space has touched the data blocks out of tcmu_cmd
iov[], the tcmu_page_fault() will try to return one zeroed blocks.

Here we move the timeout's tcmu_handle_completions() into unmap
thread routine, that's to say when the timeout fired, it will
only do the tcmu_check_expired_cmd() and then wake up the unmap
thread to do the completions() and then try to shrink its idle
memories. Then the cmdr_lock could be a mutex and could simplify
this patch because the unmap_mapping_range() or zap_* may go to
sleep.

Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Signed-off-by: Jianfei Hu <hujianfei@cmss.chinamobile.com>
Acked-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 464 +++++++++++++++++++++++++++-----------
 1 file changed, 336 insertions(+), 128 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 02cf543a888d..0b29e4f00bce 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -31,6 +31,8 @@
 #include <linux/bitops.h>
 #include <linux/highmem.h>
 #include <linux/configfs.h>
+#include <linux/mutex.h>
+#include <linux/kthread.h>
 #include <net/genetlink.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -67,17 +69,24 @@
 
 #define TCMU_TIME_OUT (30 * MSEC_PER_SEC)
 
-/* For cmd area, the size is fixed 2M */
-#define CMDR_SIZE (2 * 1024 * 1024)
+/* For cmd area, the size is fixed 8MB */
+#define CMDR_SIZE (8 * 1024 * 1024)
 
-/* For data area, the size is fixed 32M */
-#define DATA_BLOCK_BITS (8 * 1024)
-#define DATA_BLOCK_SIZE 4096
+/*
+ * For data area, the block size is PAGE_SIZE and
+ * the total size is 256K * PAGE_SIZE.
+ */
+#define DATA_BLOCK_SIZE PAGE_SIZE
+#define DATA_BLOCK_BITS (256 * 1024)
 #define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE)
+#define DATA_BLOCK_INIT_BITS 128
 
-/* The ring buffer size is 34M */
+/* The total size of the ring is 8M + 256K * PAGE_SIZE */
 #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
 
+/* Default maximum of the global data blocks(512K * PAGE_SIZE) */
+#define TCMU_GLOBAL_MAX_BLOCKS (512 * 1024)
+
 static struct device *tcmu_root_device;
 
 struct tcmu_hba {
@@ -87,6 +96,8 @@ struct tcmu_hba {
 #define TCMU_CONFIG_LEN 256
 
 struct tcmu_dev {
+	struct list_head node;
+
 	struct se_device se_dev;
 
 	char *name;
@@ -98,6 +109,8 @@ struct tcmu_dev {
 
 	struct uio_info uio_info;
 
+	struct inode *inode;
+
 	struct tcmu_mailbox *mb_addr;
 	size_t dev_size;
 	u32 cmdr_size;
@@ -108,10 +121,11 @@ struct tcmu_dev {
 	size_t data_size;
 
 	wait_queue_head_t wait_cmdr;
-	/* TODO should this be a mutex? */
-	spinlock_t cmdr_lock;
+	struct mutex cmdr_lock;
 
+	bool waiting_global;
 	uint32_t dbi_max;
+	uint32_t dbi_thresh;
 	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
 	struct radix_tree_root data_blocks;
 
@@ -146,6 +160,13 @@ struct tcmu_cmd {
 	unsigned long flags;
 };
 
+static struct task_struct *unmap_thread;
+static wait_queue_head_t unmap_wait;
+static DEFINE_MUTEX(root_udev_mutex);
+static LIST_HEAD(root_udev);
+
+static atomic_t global_db_count = ATOMIC_INIT(0);
+
 static struct kmem_cache *tcmu_cmd_cache;
 
 /* multicast group */
@@ -174,48 +195,78 @@ static struct genl_family tcmu_genl_family __ro_after_init = {
 #define tcmu_cmd_set_dbi(cmd, index) ((cmd)->dbi[(cmd)->dbi_cur++] = (index))
 #define tcmu_cmd_get_dbi(cmd) ((cmd)->dbi[(cmd)->dbi_cur++])
 
-static void tcmu_cmd_free_data(struct tcmu_cmd *tcmu_cmd)
+static void tcmu_cmd_free_data(struct tcmu_cmd *tcmu_cmd, uint32_t len)
 {
 	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
 	uint32_t i;
 
-	for (i = 0; i < tcmu_cmd->dbi_cnt; i++)
+	for (i = 0; i < len; i++)
 		clear_bit(tcmu_cmd->dbi[i], udev->data_bitmap);
 }
 
-static int tcmu_get_empty_block(struct tcmu_dev *udev, void **addr)
+static inline bool tcmu_get_empty_block(struct tcmu_dev *udev,
+					struct tcmu_cmd *tcmu_cmd)
 {
-	void *p;
-	uint32_t dbi;
-	int ret;
+	struct page *page;
+	int ret, dbi;
 
-	dbi = find_first_zero_bit(udev->data_bitmap, DATA_BLOCK_BITS);
-	if (dbi > udev->dbi_max)
-		udev->dbi_max = dbi;
+	dbi = find_first_zero_bit(udev->data_bitmap, udev->dbi_thresh);
+	if (dbi == udev->dbi_thresh)
+		return false;
 
-	set_bit(dbi, udev->data_bitmap);
+	page = radix_tree_lookup(&udev->data_blocks, dbi);
+	if (!page) {
 
-	p = radix_tree_lookup(&udev->data_blocks, dbi);
-	if (!p) {
-		p = kzalloc(DATA_BLOCK_SIZE, GFP_ATOMIC);
-		if (!p) {
-			clear_bit(dbi, udev->data_bitmap);
-			return -ENOMEM;
+		if (atomic_add_return(1, &global_db_count) >
+					TCMU_GLOBAL_MAX_BLOCKS) {
+			atomic_dec(&global_db_count);
+			return false;
 		}
 
-		ret = radix_tree_insert(&udev->data_blocks, dbi, p);
+		/* try to get new page from the mm */
+		page = alloc_page(GFP_KERNEL);
+		if (!page)
+			return false;
+
+		ret = radix_tree_insert(&udev->data_blocks, dbi, page);
 		if (ret) {
-			kfree(p);
-			clear_bit(dbi, udev->data_bitmap);
-			return ret;
+			__free_page(page);
+			return false;
 		}
+
 	}
 
-	*addr = p;
-	return dbi;
+	if (dbi > udev->dbi_max)
+		udev->dbi_max = dbi;
+
+	set_bit(dbi, udev->data_bitmap);
+	tcmu_cmd_set_dbi(tcmu_cmd, dbi);
+
+	return true;
 }
 
-static void *tcmu_get_block_addr(struct tcmu_dev *udev, uint32_t dbi)
+static bool tcmu_get_empty_blocks(struct tcmu_dev *udev,
+				  struct tcmu_cmd *tcmu_cmd)
+{
+	int i;
+
+	udev->waiting_global = false;
+
+	for (i = tcmu_cmd->dbi_cur; i < tcmu_cmd->dbi_cnt; i++) {
+		if (!tcmu_get_empty_block(udev, tcmu_cmd))
+			goto err;
+	}
+	return true;
+
+err:
+	udev->waiting_global = true;
+	/* Try to wake up the unmap thread */
+	wake_up(&unmap_wait);
+	return false;
+}
+
+static inline struct page *
+tcmu_get_block_page(struct tcmu_dev *udev, uint32_t dbi)
 {
 	return radix_tree_lookup(&udev->data_blocks, dbi);
 }
@@ -355,7 +406,7 @@ static inline size_t iov_tail(struct tcmu_dev *udev, struct iovec *iov)
 	return (size_t)iov->iov_base + iov->iov_len;
 }
 
-static int alloc_and_scatter_data_area(struct tcmu_dev *udev,
+static int scatter_data_area(struct tcmu_dev *udev,
 	struct tcmu_cmd *tcmu_cmd, struct scatterlist *data_sg,
 	unsigned int data_nents, struct iovec **iov,
 	int *iov_cnt, bool copy_data)
@@ -365,19 +416,20 @@ static int alloc_and_scatter_data_area(struct tcmu_dev *udev,
 	void *from, *to = NULL;
 	size_t copy_bytes, to_offset, offset;
 	struct scatterlist *sg;
+	struct page *page;
 
 	for_each_sg(data_sg, sg, data_nents, i) {
 		int sg_remaining = sg->length;
 		from = kmap_atomic(sg_page(sg)) + sg->offset;
 		while (sg_remaining > 0) {
 			if (block_remaining == 0) {
+				if (to)
+					kunmap_atomic(to);
+
 				block_remaining = DATA_BLOCK_SIZE;
-				dbi = tcmu_get_empty_block(udev, &to);
-				if (dbi < 0) {
-					kunmap_atomic(from - sg->offset);
-					return dbi;
-				}
-				tcmu_cmd_set_dbi(tcmu_cmd, dbi);
+				dbi = tcmu_cmd_get_dbi(tcmu_cmd);
+				page = tcmu_get_block_page(udev, dbi);
+				to = kmap_atomic(page);
 			}
 
 			copy_bytes = min_t(size_t, sg_remaining,
@@ -405,6 +457,8 @@ static int alloc_and_scatter_data_area(struct tcmu_dev *udev,
 		}
 		kunmap_atomic(from - sg->offset);
 	}
+	if (to)
+		kunmap_atomic(to);
 
 	return 0;
 }
@@ -415,9 +469,10 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 	struct se_cmd *se_cmd = cmd->se_cmd;
 	int i, dbi;
 	int block_remaining = 0;
-	void *from, *to;
+	void *from = NULL, *to;
 	size_t copy_bytes, offset;
 	struct scatterlist *sg, *data_sg;
+	struct page *page;
 	unsigned int data_nents;
 	uint32_t count = 0;
 
@@ -444,9 +499,13 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 		to = kmap_atomic(sg_page(sg)) + sg->offset;
 		while (sg_remaining > 0) {
 			if (block_remaining == 0) {
+				if (from)
+					kunmap_atomic(from);
+
 				block_remaining = DATA_BLOCK_SIZE;
 				dbi = tcmu_cmd_get_dbi(cmd);
-				from = tcmu_get_block_addr(udev, dbi);
+				page = tcmu_get_block_page(udev, dbi);
+				from = kmap_atomic(page);
 			}
 			copy_bytes = min_t(size_t, sg_remaining,
 					block_remaining);
@@ -461,12 +520,13 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 		}
 		kunmap_atomic(to - sg->offset);
 	}
+	if (from)
+		kunmap_atomic(from);
 }
 
-static inline size_t spc_bitmap_free(unsigned long *bitmap)
+static inline size_t spc_bitmap_free(unsigned long *bitmap, uint32_t thresh)
 {
-	return DATA_BLOCK_SIZE * (DATA_BLOCK_BITS -
-			bitmap_weight(bitmap, DATA_BLOCK_BITS));
+	return DATA_BLOCK_SIZE * (thresh - bitmap_weight(bitmap, thresh));
 }
 
 /*
@@ -475,9 +535,12 @@ static inline size_t spc_bitmap_free(unsigned long *bitmap)
  *
  * Called with ring lock held.
  */
-static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t data_needed)
+static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
+		size_t cmd_size, size_t data_needed)
 {
 	struct tcmu_mailbox *mb = udev->mb_addr;
+	uint32_t blocks_needed = (data_needed + DATA_BLOCK_SIZE - 1)
+				/ DATA_BLOCK_SIZE;
 	size_t space, cmd_needed;
 	u32 cmd_head;
 
@@ -501,13 +564,41 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d
 		return false;
 	}
 
-	space = spc_bitmap_free(udev->data_bitmap);
+	/* try to check and get the data blocks as needed */
+	space = spc_bitmap_free(udev->data_bitmap, udev->dbi_thresh);
 	if (space < data_needed) {
-		pr_debug("no data space: only %zu available, but ask for %zu\n",
-				space, data_needed);
-		return false;
+		unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh;
+		unsigned long grow;
+
+		if (blocks_left < blocks_needed) {
+			pr_debug("no data space: only %lu available, but ask for %zu\n",
+					blocks_left * DATA_BLOCK_SIZE,
+					data_needed);
+			return false;
+		}
+
+		/* Try to expand the thresh */
+		if (!udev->dbi_thresh) {
+			/* From idle state */
+			uint32_t init_thresh = DATA_BLOCK_INIT_BITS;
+
+			udev->dbi_thresh = max(blocks_needed, init_thresh);
+		} else {
+			/*
+			 * Grow the data area by max(blocks needed,
+			 * dbi_thresh / 2), but limited to the max
+			 * DATA_BLOCK_BITS size.
+			 */
+			grow = max(blocks_needed, udev->dbi_thresh / 2);
+			udev->dbi_thresh += grow;
+			if (udev->dbi_thresh > DATA_BLOCK_BITS)
+				udev->dbi_thresh = DATA_BLOCK_BITS;
+		}
 	}
 
+	if (!tcmu_get_empty_blocks(udev, cmd))
+		return false;
+
 	return true;
 }
 
@@ -544,7 +635,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
 	WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
 
-	spin_lock_irq(&udev->cmdr_lock);
+	mutex_lock(&udev->cmdr_lock);
 
 	mb = udev->mb_addr;
 	cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
@@ -553,18 +644,18 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu "
 			"cmd ring/data area\n", command_size, data_length,
 			udev->cmdr_size, udev->data_size);
-		spin_unlock_irq(&udev->cmdr_lock);
+		mutex_unlock(&udev->cmdr_lock);
 		return TCM_INVALID_CDB_FIELD;
 	}
 
-	while (!is_ring_space_avail(udev, command_size, data_length)) {
+	while (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) {
 		int ret;
 		DEFINE_WAIT(__wait);
 
 		prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE);
 
 		pr_debug("sleeping for ring space\n");
-		spin_unlock_irq(&udev->cmdr_lock);
+		mutex_unlock(&udev->cmdr_lock);
 		if (udev->cmd_time_out)
 			ret = schedule_timeout(
 					msecs_to_jiffies(udev->cmd_time_out));
@@ -576,7 +667,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		}
 
-		spin_lock_irq(&udev->cmdr_lock);
+		mutex_lock(&udev->cmdr_lock);
 
 		/* We dropped cmdr_lock, cmd_head is stale */
 		cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
@@ -609,15 +700,18 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	entry->hdr.uflags = 0;
 
 	/* Handle allocating space from the data area */
+	tcmu_cmd_reset_dbi_cur(tcmu_cmd);
 	iov = &entry->req.iov[0];
 	iov_cnt = 0;
 	copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE
 		|| se_cmd->se_cmd_flags & SCF_BIDI);
-	ret = alloc_and_scatter_data_area(udev, tcmu_cmd,
-			se_cmd->t_data_sg, se_cmd->t_data_nents,
-			&iov, &iov_cnt, copy_to_data_area);
+	ret = scatter_data_area(udev, tcmu_cmd, se_cmd->t_data_sg,
+				se_cmd->t_data_nents, &iov, &iov_cnt,
+				copy_to_data_area);
 	if (ret) {
-		spin_unlock_irq(&udev->cmdr_lock);
+		tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt);
+		mutex_unlock(&udev->cmdr_lock);
+
 		pr_err("tcmu: alloc and scatter data failed\n");
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 	}
@@ -628,12 +722,14 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	if (se_cmd->se_cmd_flags & SCF_BIDI) {
 		iov_cnt = 0;
 		iov++;
-		ret = alloc_and_scatter_data_area(udev, tcmu_cmd,
+		ret = scatter_data_area(udev, tcmu_cmd,
 					se_cmd->t_bidi_data_sg,
 					se_cmd->t_bidi_data_nents,
 					&iov, &iov_cnt, false);
 		if (ret) {
-			spin_unlock_irq(&udev->cmdr_lock);
+			tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt);
+			mutex_unlock(&udev->cmdr_lock);
+
 			pr_err("tcmu: alloc and scatter bidi data failed\n");
 			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		}
@@ -648,8 +744,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
 	UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size);
 	tcmu_flush_dcache_range(mb, sizeof(*mb));
-
-	spin_unlock_irq(&udev->cmdr_lock);
+	mutex_unlock(&udev->cmdr_lock);
 
 	/* TODO: only if FLUSH and FUA? */
 	uio_event_notify(&udev->uio_info);
@@ -723,14 +818,13 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
 
 out:
 	cmd->se_cmd = NULL;
-	tcmu_cmd_free_data(cmd);
+	tcmu_cmd_free_data(cmd, cmd->dbi_cnt);
 	tcmu_free_cmd(cmd);
 }
 
 static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 {
 	struct tcmu_mailbox *mb;
-	unsigned long flags;
 	int handled = 0;
 
 	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) {
@@ -738,8 +832,6 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 		return 0;
 	}
 
-	spin_lock_irqsave(&udev->cmdr_lock, flags);
-
 	mb = udev->mb_addr;
 	tcmu_flush_dcache_range(mb, sizeof(*mb));
 
@@ -780,8 +872,6 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 	if (mb->cmd_tail == mb->cmd_head)
 		del_timer(&udev->timeout); /* no more pending cmds */
 
-	spin_unlock_irqrestore(&udev->cmdr_lock, flags);
-
 	wake_up(&udev->wait_cmdr);
 
 	return handled;
@@ -808,16 +898,14 @@ static void tcmu_device_timedout(unsigned long data)
 {
 	struct tcmu_dev *udev = (struct tcmu_dev *)data;
 	unsigned long flags;
-	int handled;
-
-	handled = tcmu_handle_completions(udev);
-
-	pr_warn("%d completions handled from timeout\n", handled);
 
 	spin_lock_irqsave(&udev->commands_lock, flags);
 	idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL);
 	spin_unlock_irqrestore(&udev->commands_lock, flags);
 
+	/* Try to wake up the ummap thread */
+	wake_up(&unmap_wait);
+
 	/*
 	 * We don't need to wakeup threads on wait_cmdr since they have their
 	 * own timeout.
@@ -862,7 +950,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	udev->cmd_time_out = TCMU_TIME_OUT;
 
 	init_waitqueue_head(&udev->wait_cmdr);
-	spin_lock_init(&udev->cmdr_lock);
+	mutex_init(&udev->cmdr_lock);
 
 	idr_init(&udev->commands);
 	spin_lock_init(&udev->commands_lock);
@@ -877,59 +965,13 @@ static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on)
 {
 	struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info);
 
+	mutex_lock(&tcmu_dev->cmdr_lock);
 	tcmu_handle_completions(tcmu_dev);
+	mutex_unlock(&tcmu_dev->cmdr_lock);
 
 	return 0;
 }
 
-static void tcmu_blocks_release(struct tcmu_dev *udev, bool release_pending)
-{
-	uint32_t dbi, end;
-	void *addr;
-
-	spin_lock_irq(&udev->cmdr_lock);
-
-	end = udev->dbi_max + 1;
-
-	/* try to release all unused blocks */
-	dbi = find_first_zero_bit(udev->data_bitmap, end);
-	if (dbi >= end) {
-		spin_unlock_irq(&udev->cmdr_lock);
-		return;
-	}
-	do {
-		addr = radix_tree_delete(&udev->data_blocks, dbi);
-		kfree(addr);
-
-		dbi = find_next_zero_bit(udev->data_bitmap, end, dbi + 1);
-	} while (dbi < end);
-
-	if (!release_pending)
-		return;
-
-	/* try to release all pending blocks */
-	dbi = find_first_bit(udev->data_bitmap, end);
-	if (dbi >= end) {
-		spin_unlock_irq(&udev->cmdr_lock);
-		return;
-	}
-	do {
-		addr = radix_tree_delete(&udev->data_blocks, dbi);
-		kfree(addr);
-
-		dbi = find_next_bit(udev->data_bitmap, end, dbi + 1);
-	} while (dbi < end);
-
-	spin_unlock_irq(&udev->cmdr_lock);
-}
-
-static void tcmu_vma_close(struct vm_area_struct *vma)
-{
-	struct tcmu_dev *udev = vma->vm_private_data;
-
-	tcmu_blocks_release(udev, false);
-}
-
 /*
  * mmap code from uio.c. Copied here because we want to hook mmap()
  * and this stuff must come along.
@@ -947,6 +989,60 @@ static int tcmu_find_mem_index(struct vm_area_struct *vma)
 	return -1;
 }
 
+static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi)
+{
+	struct page *page;
+	int ret;
+
+	mutex_lock(&udev->cmdr_lock);
+	page = tcmu_get_block_page(udev, dbi);
+	if (likely(page)) {
+		mutex_unlock(&udev->cmdr_lock);
+		return page;
+	}
+
+	/*
+	 * Normally it shouldn't be here:
+	 * Only when the userspace has touched the blocks which
+	 * are out of the tcmu_cmd's data iov[], and will return
+	 * one zeroed page.
+	 */
+	pr_warn("Block(%u) out of cmd's iov[] has been touched!\n", dbi);
+	pr_warn("Mostly it will be a bug of userspace, please have a check!\n");
+
+	if (dbi >= udev->dbi_thresh) {
+		/* Extern the udev->dbi_thresh to dbi + 1 */
+		udev->dbi_thresh = dbi + 1;
+		udev->dbi_max = dbi;
+	}
+
+	page = radix_tree_lookup(&udev->data_blocks, dbi);
+	if (!page) {
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!page) {
+			mutex_unlock(&udev->cmdr_lock);
+			return NULL;
+		}
+
+		ret = radix_tree_insert(&udev->data_blocks, dbi, page);
+		if (ret) {
+			mutex_unlock(&udev->cmdr_lock);
+			__free_page(page);
+			return NULL;
+		}
+
+		/*
+		 * Since this case is rare in page fault routine, here we
+		 * will allow the global_db_count >= TCMU_GLOBAL_MAX_BLOCKS
+		 * to reduce possible page fault call trace.
+		 */
+		atomic_inc(&global_db_count);
+	}
+	mutex_unlock(&udev->cmdr_lock);
+
+	return page;
+}
+
 static int tcmu_vma_fault(struct vm_fault *vmf)
 {
 	struct tcmu_dev *udev = vmf->vma->vm_private_data;
@@ -970,14 +1066,13 @@ static int tcmu_vma_fault(struct vm_fault *vmf)
 		addr = (void *)(unsigned long)info->mem[mi].addr + offset;
 		page = vmalloc_to_page(addr);
 	} else {
-		/* For the dynamically growing data area pages */
 		uint32_t dbi;
 
+		/* For the dynamically growing data area pages */
 		dbi = (offset - udev->data_off) / DATA_BLOCK_SIZE;
-		addr = tcmu_get_block_addr(udev, dbi);
-		if (!addr)
+		page = tcmu_try_get_block_page(udev, dbi);
+		if (!page)
 			return VM_FAULT_NOPAGE;
-		page = virt_to_page(addr);
 	}
 
 	get_page(page);
@@ -986,7 +1081,6 @@ static int tcmu_vma_fault(struct vm_fault *vmf)
 }
 
 static const struct vm_operations_struct tcmu_vm_ops = {
-	.close = tcmu_vma_close,
 	.fault = tcmu_vma_fault,
 };
 
@@ -1014,6 +1108,8 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
 	if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags))
 		return -EBUSY;
 
+	udev->inode = inode;
+
 	pr_debug("open\n");
 
 	return 0;
@@ -1104,6 +1200,8 @@ static int tcmu_configure_device(struct se_device *dev)
 	udev->cmdr_size = CMDR_SIZE - CMDR_OFF;
 	udev->data_off = CMDR_SIZE;
 	udev->data_size = DATA_SIZE;
+	udev->dbi_thresh = 0; /* Default in Idle state */
+	udev->waiting_global = false;
 
 	/* Initialise the mailbox of the ring buffer */
 	mb = udev->mb_addr;
@@ -1116,7 +1214,7 @@ static int tcmu_configure_device(struct se_device *dev)
 	WARN_ON(udev->data_size % PAGE_SIZE);
 	WARN_ON(udev->data_size % DATA_BLOCK_SIZE);
 
-	INIT_RADIX_TREE(&udev->data_blocks, GFP_ATOMIC);
+	INIT_RADIX_TREE(&udev->data_blocks, GFP_KERNEL);
 
 	info->version = __stringify(TCMU_MAILBOX_VERSION);
 
@@ -1149,6 +1247,10 @@ static int tcmu_configure_device(struct se_device *dev)
 	if (ret)
 		goto err_netlink;
 
+	mutex_lock(&root_udev_mutex);
+	list_add(&udev->node, &root_udev);
+	mutex_unlock(&root_udev_mutex);
+
 	return 0;
 
 err_netlink:
@@ -1183,6 +1285,23 @@ static bool tcmu_dev_configured(struct tcmu_dev *udev)
 	return udev->uio_info.uio_dev ? true : false;
 }
 
+static void tcmu_blocks_release(struct tcmu_dev *udev)
+{
+	int i;
+	struct page *page;
+
+	/* Try to release all block pages */
+	mutex_lock(&udev->cmdr_lock);
+	for (i = 0; i <= udev->dbi_max; i++) {
+		page = radix_tree_delete(&udev->data_blocks, i);
+		if (page) {
+			__free_page(page);
+			atomic_dec(&global_db_count);
+		}
+	}
+	mutex_unlock(&udev->cmdr_lock);
+}
+
 static void tcmu_free_device(struct se_device *dev)
 {
 	struct tcmu_dev *udev = TCMU_DEV(dev);
@@ -1192,6 +1311,10 @@ static void tcmu_free_device(struct se_device *dev)
 
 	del_timer_sync(&udev->timeout);
 
+	mutex_lock(&root_udev_mutex);
+	list_del(&udev->node);
+	mutex_unlock(&root_udev_mutex);
+
 	vfree(udev->mb_addr);
 
 	/* Upper layer should drain all requests before calling this */
@@ -1204,7 +1327,7 @@ static void tcmu_free_device(struct se_device *dev)
 	spin_unlock_irq(&udev->commands_lock);
 	WARN_ON(!all_expired);
 
-	tcmu_blocks_release(udev, true);
+	tcmu_blocks_release(udev);
 
 	if (tcmu_dev_configured(udev)) {
 		tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
@@ -1392,6 +1515,81 @@ static struct target_backend_ops tcmu_ops = {
 	.tb_dev_attrib_attrs	= NULL,
 };
 
+static int unmap_thread_fn(void *data)
+{
+	struct tcmu_dev *udev;
+	loff_t off;
+	uint32_t start, end, block;
+	struct page *page;
+	int i;
+
+	while (1) {
+		DEFINE_WAIT(__wait);
+
+		prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE);
+		schedule();
+		finish_wait(&unmap_wait, &__wait);
+
+		mutex_lock(&root_udev_mutex);
+		list_for_each_entry(udev, &root_udev, node) {
+			mutex_lock(&udev->cmdr_lock);
+
+			/* Try to complete the finished commands first */
+			tcmu_handle_completions(udev);
+
+			/* Skip the udevs waiting the global pool or in idle */
+			if (udev->waiting_global || !udev->dbi_thresh) {
+				mutex_unlock(&udev->cmdr_lock);
+				continue;
+			}
+
+			end = udev->dbi_max + 1;
+			block = find_last_bit(udev->data_bitmap, end);
+			if (block == udev->dbi_max) {
+				/*
+				 * The last bit is dbi_max, so there is
+				 * no need to shrink any blocks.
+				 */
+				mutex_unlock(&udev->cmdr_lock);
+				continue;
+			} else if (block == end) {
+				/* The current udev will goto idle state */
+				udev->dbi_thresh = start = 0;
+				udev->dbi_max = 0;
+			} else {
+				udev->dbi_thresh = start = block + 1;
+				udev->dbi_max = block;
+			}
+
+			/* Here will truncate the data area from off */
+			off = udev->data_off + start * DATA_BLOCK_SIZE;
+			unmap_mapping_range(udev->inode->i_mapping, off, 0, 1);
+
+			/* Release the block pages */
+			for (i = start; i < end; i++) {
+				page = radix_tree_delete(&udev->data_blocks, i);
+				if (page) {
+					__free_page(page);
+					atomic_dec(&global_db_count);
+				}
+			}
+			mutex_unlock(&udev->cmdr_lock);
+		}
+
+		/*
+		 * Try to wake up the udevs who are waiting
+		 * for the global data pool.
+		 */
+		list_for_each_entry(udev, &root_udev, node) {
+			if (udev->waiting_global)
+				wake_up(&udev->wait_cmdr);
+		}
+		mutex_unlock(&root_udev_mutex);
+	}
+
+	return 0;
+}
+
 static int __init tcmu_module_init(void)
 {
 	int ret, i, len = 0;
@@ -1437,8 +1635,17 @@ static int __init tcmu_module_init(void)
 	if (ret)
 		goto out_attrs;
 
+	init_waitqueue_head(&unmap_wait);
+	unmap_thread = kthread_run(unmap_thread_fn, NULL, "tcmu_unmap");
+	if (IS_ERR(unmap_thread)) {
+		ret = PTR_ERR(unmap_thread);
+		goto out_unreg_transport;
+	}
+
 	return 0;
 
+out_unreg_transport:
+	target_backend_unregister(&tcmu_ops);
 out_attrs:
 	kfree(tcmu_attrs);
 out_unreg_genl:
@@ -1453,6 +1660,7 @@ out_free_cache:
 
 static void __exit tcmu_module_exit(void)
 {
+	kthread_stop(unmap_thread);
 	target_backend_unregister(&tcmu_ops);
 	kfree(tcmu_attrs);
 	genl_unregister_family(&tcmu_genl_family);
-- 
cgit v1.2.3


From c667186f1c01ca8970c785888868b7ffd74e51ee Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 27 Apr 2017 19:06:48 +0100
Subject: arm64: KVM: Fix decoding of Rt/Rt2 when trapping AArch32 CP accesses

Our 32bit CP14/15 handling inherited some of the ARMv7 code for handling
the trapped system registers, completely missing the fact that the
fields for Rt and Rt2 are now 5 bit wide, and not 4...

Let's fix it, and provide an accessor for the most common Rt case.

Cc: stable@vger.kernel.org
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h | 6 ++++++
 arch/arm64/kvm/sys_regs.c            | 8 ++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f5ea0ba70f07..fe39e6841326 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -240,6 +240,12 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
 }
 
+static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
+{
+	u32 esr = kvm_vcpu_get_hsr(vcpu);
+	return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+}
+
 static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 {
 	return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 8ddcee6e4702..ea9fbb5c17d0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1529,8 +1529,8 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
-	int Rt = (hsr >> 5) & 0xf;
-	int Rt2 = (hsr >> 10) & 0xf;
+	int Rt = kvm_vcpu_sys_get_rt(vcpu);
+	int Rt2 = (hsr >> 10) & 0x1f;
 
 	params.is_aarch32 = true;
 	params.is_32bit = false;
@@ -1586,7 +1586,7 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
-	int Rt  = (hsr >> 5) & 0xf;
+	int Rt  = kvm_vcpu_sys_get_rt(vcpu);
 
 	params.is_aarch32 = true;
 	params.is_32bit = true;
@@ -1688,7 +1688,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	struct sys_reg_params params;
 	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
-	int Rt = (esr >> 5) & 0x1f;
+	int Rt = kvm_vcpu_sys_get_rt(vcpu);
 	int ret;
 
 	trace_kvm_handle_sys_reg(esr);
-- 
cgit v1.2.3


From e345da82bd6bdfa8492f80b3ce4370acfd868d95 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Fri, 21 Apr 2017 17:05:08 +0200
Subject: drm/edid: Add 10 bpc quirk for LGD 764 panel in HP zBook 17 G2

The builtin eDP panel in the HP zBook 17 G2 supports 10 bpc,
as advertised by the Laptops product specs and verified via
injecting a fixed edid + photometer measurements, but edid
reports unknown depth, so drivers fall back to 6 bpc.

Add a quirk to get the full 10 bpc.

Cc: stable@vger.kernel.org
Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1492787108-23959-1-git-send-email-mario.kleiner.de@gmail.com
---
 drivers/gpu/drm/drm_edid.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index fad3d44e4642..2e55599816aa 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -80,6 +80,8 @@
 #define EDID_QUIRK_FORCE_12BPC			(1 << 9)
 /* Force 6bpc */
 #define EDID_QUIRK_FORCE_6BPC			(1 << 10)
+/* Force 10bpc */
+#define EDID_QUIRK_FORCE_10BPC			(1 << 11)
 
 struct detailed_mode_closure {
 	struct drm_connector *connector;
@@ -122,6 +124,9 @@ static const struct edid_quirk {
 	{ "FCM", 13600, EDID_QUIRK_PREFER_LARGE_75 |
 	  EDID_QUIRK_DETAILED_IN_CM },
 
+	/* LGD panel of HP zBook 17 G2, eDP 10 bpc, but reports unknown bpc */
+	{ "LGD", 764, EDID_QUIRK_FORCE_10BPC },
+
 	/* LG Philips LCD LP154W01-A5 */
 	{ "LPL", 0, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE },
 	{ "LPL", 0x2a00, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE },
@@ -4244,6 +4249,9 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 	if (quirks & EDID_QUIRK_FORCE_8BPC)
 		connector->display_info.bpc = 8;
 
+	if (quirks & EDID_QUIRK_FORCE_10BPC)
+		connector->display_info.bpc = 10;
+
 	if (quirks & EDID_QUIRK_FORCE_12BPC)
 		connector->display_info.bpc = 12;
 
-- 
cgit v1.2.3


From 45f580c42e5c125d55dbd8099750a1998de3d917 Mon Sep 17 00:00:00 2001
From: Maksim Salau <maksim.salau@gmail.com>
Date: Tue, 2 May 2017 13:47:53 +0200
Subject: video: fbdev: udlfb: Fix buffer on stack

Allocate buffers on HEAP instead of STACK for local array
that is to be sent using usb_control_msg().

Signed-off-by: Maksim Salau <maksim.salau@gmail.com>
Cc: Bernie Thompson <bernie@plugable.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/udlfb.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index e9c2f7ba3c8e..6a3c353de7c3 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -1487,15 +1487,25 @@ static struct device_attribute fb_device_attrs[] = {
 static int dlfb_select_std_channel(struct dlfb_data *dev)
 {
 	int ret;
-	u8 set_def_chn[] = {	   0x57, 0xCD, 0xDC, 0xA7,
+	void *buf;
+	static const u8 set_def_chn[] = {
+				0x57, 0xCD, 0xDC, 0xA7,
 				0x1C, 0x88, 0x5E, 0x15,
 				0x60, 0xFE, 0xC6, 0x97,
 				0x16, 0x3D, 0x47, 0xF2  };
 
+	buf = kmemdup(set_def_chn, sizeof(set_def_chn), GFP_KERNEL);
+
+	if (!buf)
+		return -ENOMEM;
+
 	ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
 			NR_USB_REQUEST_CHANNEL,
 			(USB_DIR_OUT | USB_TYPE_VENDOR), 0, 0,
-			set_def_chn, sizeof(set_def_chn), USB_CTRL_SET_TIMEOUT);
+			buf, sizeof(set_def_chn), USB_CTRL_SET_TIMEOUT);
+
+	kfree(buf);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From dc85e9a87420613b3129d5cc5ecd79c58351c546 Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Tue, 2 May 2017 13:47:53 +0200
Subject: sm501fb: don't return zero on failure path in sm501fb_start()

If fbmem iomemory mapping failed, sm501fb_start() breaks off
initialization, deallocates resources, but returns zero.
As a result, double deallocation can happen in sm501fb_stop().

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/sm501fb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/fbdev/sm501fb.c b/drivers/video/fbdev/sm501fb.c
index d80bc8a3200f..67e314fdd947 100644
--- a/drivers/video/fbdev/sm501fb.c
+++ b/drivers/video/fbdev/sm501fb.c
@@ -1600,6 +1600,7 @@ static int sm501fb_start(struct sm501fb_info *info,
 	info->fbmem = ioremap(res->start, resource_size(res));
 	if (info->fbmem == NULL) {
 		dev_err(dev, "cannot remap framebuffer\n");
+		ret = -ENXIO;
 		goto err_mem_res;
 	}
 
-- 
cgit v1.2.3


From 5c9cfda13dc503f4b25b486a57974e0414adb6f1 Mon Sep 17 00:00:00 2001
From: Karim Eshapa <karim.eshapa@gmail.com>
Date: Tue, 2 May 2017 13:47:54 +0200
Subject: drivers/video/fbdev/omap/lcd_mipid.c: Use time comparison kernel
 macros

Use time_before_eq time comparison defind kernel macro
that has safety check.

Signed-off-by: Karim Eshapa <karim.eshapa@gmail.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/omap/lcd_mipid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c
index c81f150589e1..df9e6ebcfad5 100644
--- a/drivers/video/fbdev/omap/lcd_mipid.c
+++ b/drivers/video/fbdev/omap/lcd_mipid.c
@@ -174,7 +174,7 @@ static void hw_guard_wait(struct mipid_device *md)
 {
 	unsigned long wait = md->hw_guard_end - jiffies;
 
-	if ((long)wait > 0 && wait <= md->hw_guard_wait) {
+	if ((long)wait > 0 && time_before_eq(wait,  md->hw_guard_wait)) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule_timeout(wait);
 	}
-- 
cgit v1.2.3


From 45753c5f315749711b935a2506ee5c10eef5c23d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 2 May 2017 10:31:18 +0200
Subject: srcu: Debloat the <linux/rcu_segcblist.h> header

Linus noticed that the <linux/rcu_segcblist.h> has huge inline functions
which should not be inline at all.

As a first step in cleaning this up, move them all to kernel/rcu/ and
only keep an absolute minimum of data type defines in the header:

  before:   -rw-r--r-- 1 mingo mingo 22284 May  2 10:25 include/linux/rcu_segcblist.h
   after:   -rw-r--r-- 1 mingo mingo  3180 May  2 10:22 include/linux/rcu_segcblist.h

More can be done, such as uninlining the large functions, which inlining
is unjustified even if it's an RCU internal matter.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcu_segcblist.h | 628 +---------------------------------------
 kernel/rcu/rcu_segcblist.h    | 645 ++++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/srcutiny.c         |   1 +
 kernel/rcu/srcutree.c         |   1 +
 kernel/rcu/tree.h             |   3 +-
 5 files changed, 652 insertions(+), 626 deletions(-)
 create mode 100644 kernel/rcu/rcu_segcblist.h

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index ced8f313fd05..ba4d2621d9ca 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -20,8 +20,8 @@
  * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 
-#ifndef __KERNEL_RCU_SEGCBLIST_H
-#define __KERNEL_RCU_SEGCBLIST_H
+#ifndef __INCLUDE_LINUX_RCU_SEGCBLIST_H
+#define __INCLUDE_LINUX_RCU_SEGCBLIST_H
 
 /* Simple unsegmented callback lists. */
 struct rcu_cblist {
@@ -33,102 +33,6 @@ struct rcu_cblist {
 
 #define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
 
-/* Initialize simple callback list. */
-static inline void rcu_cblist_init(struct rcu_cblist *rclp)
-{
-	rclp->head = NULL;
-	rclp->tail = &rclp->head;
-	rclp->len = 0;
-	rclp->len_lazy = 0;
-}
-
-/* Is simple callback list empty? */
-static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
-{
-	return !rclp->head;
-}
-
-/* Return number of callbacks in simple callback list. */
-static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
-{
-	return rclp->len;
-}
-
-/* Return number of lazy callbacks in simple callback list. */
-static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
-{
-	return rclp->len_lazy;
-}
-
-/*
- * Debug function to actually count the number of callbacks.
- * If the number exceeds the limit specified, return -1.
- */
-static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
-{
-	int cnt = 0;
-	struct rcu_head **rhpp = &rclp->head;
-
-	for (;;) {
-		if (!*rhpp)
-			return cnt;
-		if (++cnt > lim)
-			return -1;
-		rhpp = &(*rhpp)->next;
-	}
-}
-
-/*
- * Dequeue the oldest rcu_head structure from the specified callback
- * list.  This function assumes that the callback is non-lazy, but
- * the caller can later invoke rcu_cblist_dequeued_lazy() if it
- * finds otherwise (and if it cares about laziness).  This allows
- * different users to have different ways of determining laziness.
- */
-static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
-{
-	struct rcu_head *rhp;
-
-	rhp = rclp->head;
-	if (!rhp)
-		return NULL;
-	rclp->len--;
-	rclp->head = rhp->next;
-	if (!rclp->head)
-		rclp->tail = &rclp->head;
-	return rhp;
-}
-
-/*
- * Account for the fact that a previously dequeued callback turned out
- * to be marked as lazy.
- */
-static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
-{
-	rclp->len_lazy--;
-}
-
-/*
- * Interim function to return rcu_cblist head pointer.  Longer term, the
- * rcu_cblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
-{
-	return rclp->head;
-}
-
-/*
- * Interim function to return rcu_cblist head pointer.  Longer term, the
- * rcu_cblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
-{
-	WARN_ON_ONCE(rcu_cblist_empty(rclp));
-	return rclp->tail;
-}
-
 /* Complicated segmented callback lists.  ;-) */
 
 /*
@@ -183,530 +87,4 @@ struct rcu_segcblist {
 	.tails[RCU_NEXT_TAIL] = &n.head, \
 }
 
-/*
- * Initialize an rcu_segcblist structure.
- */
-static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
-{
-	int i;
-
-	BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
-	BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
-	rsclp->head = NULL;
-	for (i = 0; i < RCU_CBLIST_NSEGS; i++)
-		rsclp->tails[i] = &rsclp->head;
-	rsclp->len = 0;
-	rsclp->len_lazy = 0;
-}
-
-/*
- * Is the specified rcu_segcblist structure empty?
- *
- * But careful!  The fact that the ->head field is NULL does not
- * necessarily imply that there are no callbacks associated with
- * this structure.  When callbacks are being invoked, they are
- * removed as a group.  If callback invocation must be preempted,
- * the remaining callbacks will be added back to the list.  Either
- * way, the counts are updated later.
- *
- * So it is often the case that rcu_segcblist_n_cbs() should be used
- * instead.
- */
-static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
-{
-	return !rsclp->head;
-}
-
-/* Return number of callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
-{
-	return READ_ONCE(rsclp->len);
-}
-
-/* Return number of lazy callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
-{
-	return rsclp->len_lazy;
-}
-
-/* Return number of lazy callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
-{
-	return rsclp->len - rsclp->len_lazy;
-}
-
-/*
- * Is the specified rcu_segcblist enabled, for example, not corresponding
- * to an offline or callback-offloaded CPU?
- */
-static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
-{
-	return !!rsclp->tails[RCU_NEXT_TAIL];
-}
-
-/*
- * Disable the specified rcu_segcblist structure, so that callbacks can
- * no longer be posted to it.  This structure must be empty.
- */
-static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
-{
-	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
-	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
-	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
-	rsclp->tails[RCU_NEXT_TAIL] = NULL;
-}
-
-/*
- * Is the specified segment of the specified rcu_segcblist structure
- * empty of callbacks?
- */
-static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
-{
-	if (seg == RCU_DONE_TAIL)
-		return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
-	return rsclp->tails[seg - 1] == rsclp->tails[seg];
-}
-
-/*
- * Are all segments following the specified segment of the specified
- * rcu_segcblist structure empty of callbacks?  (The specified
- * segment might well contain callbacks.)
- */
-static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
-{
-	return !*rsclp->tails[seg];
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * are ready to be invoked?
- */
-static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
-{
-	return rcu_segcblist_is_enabled(rsclp) &&
-	       &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * are still pending, that is, not yet ready to be invoked?
- */
-static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
-{
-	return rcu_segcblist_is_enabled(rsclp) &&
-	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
-}
-
-/*
- * Dequeue and return the first ready-to-invoke callback.  If there
- * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
- * to avoid interference.  Does not protect from interference from other
- * CPUs or tasks.
- */
-static inline struct rcu_head *
-rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
-{
-	unsigned long flags;
-	int i;
-	struct rcu_head *rhp;
-
-	local_irq_save(flags);
-	if (!rcu_segcblist_ready_cbs(rsclp)) {
-		local_irq_restore(flags);
-		return NULL;
-	}
-	rhp = rsclp->head;
-	BUG_ON(!rhp);
-	rsclp->head = rhp->next;
-	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
-		if (rsclp->tails[i] != &rhp->next)
-			break;
-		rsclp->tails[i] = &rsclp->head;
-	}
-	smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
-	WRITE_ONCE(rsclp->len, rsclp->len - 1);
-	local_irq_restore(flags);
-	return rhp;
-}
-
-/*
- * Account for the fact that a previously dequeued callback turned out
- * to be marked as lazy.
- */
-static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	rsclp->len_lazy--;
-	local_irq_restore(flags);
-}
-
-/*
- * Return a pointer to the first callback in the specified rcu_segcblist
- * structure.  This is useful for diagnostics.
- */
-static inline struct rcu_head *
-rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
-{
-	if (rcu_segcblist_is_enabled(rsclp))
-		return rsclp->head;
-	return NULL;
-}
-
-/*
- * Return a pointer to the first pending callback in the specified
- * rcu_segcblist structure.  This is useful just after posting a given
- * callback -- if that callback is the first pending callback, then
- * you cannot rely on someone else having already started up the required
- * grace period.
- */
-static inline struct rcu_head *
-rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
-{
-	if (rcu_segcblist_is_enabled(rsclp))
-		return *rsclp->tails[RCU_DONE_TAIL];
-	return NULL;
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * have not yet been processed beyond having been posted, that is,
- * does it contain callbacks in its last segment?
- */
-static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
-{
-	return rcu_segcblist_is_enabled(rsclp) &&
-	       !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
-}
-
-/*
- * Enqueue the specified callback onto the specified rcu_segcblist
- * structure, updating accounting as needed.  Note that the ->len
- * field may be accessed locklessly, hence the WRITE_ONCE().
- * The ->len field is used by rcu_barrier() and friends to determine
- * if it must post a callback on this structure, and it is OK
- * for rcu_barrier() to sometimes post callbacks needlessly, but
- * absolutely not OK for it to ever miss posting a callback.
- */
-static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
-					 struct rcu_head *rhp, bool lazy)
-{
-	WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
-	if (lazy)
-		rsclp->len_lazy++;
-	smp_mb(); /* Ensure counts are updated before callback is enqueued. */
-	rhp->next = NULL;
-	*rsclp->tails[RCU_NEXT_TAIL] = rhp;
-	rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
-}
-
-/*
- * Entrain the specified callback onto the specified rcu_segcblist at
- * the end of the last non-empty segment.  If the entire rcu_segcblist
- * is empty, make no change, but return false.
- *
- * This is intended for use by rcu_barrier()-like primitives, -not-
- * for normal grace-period use.  IMPORTANT:  The callback you enqueue
- * will wait for all prior callbacks, NOT necessarily for a grace
- * period.  You have been warned.
- */
-static inline bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
-					 struct rcu_head *rhp, bool lazy)
-{
-	int i;
-
-	if (rcu_segcblist_n_cbs(rsclp) == 0)
-		return false;
-	WRITE_ONCE(rsclp->len, rsclp->len + 1);
-	if (lazy)
-		rsclp->len_lazy++;
-	smp_mb(); /* Ensure counts are updated before callback is entrained. */
-	rhp->next = NULL;
-	for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--)
-		if (rsclp->tails[i] != rsclp->tails[i - 1])
-			break;
-	*rsclp->tails[i] = rhp;
-	for (; i <= RCU_NEXT_TAIL; i++)
-		rsclp->tails[i] = &rhp->next;
-	return true;
-}
-
-/*
- * Extract only the counts from the specified rcu_segcblist structure,
- * and place them in the specified rcu_cblist structure.  This function
- * supports both callback orphaning and invocation, hence the separation
- * of counts and callbacks.  (Callbacks ready for invocation must be
- * orphaned and adopted separately from pending callbacks, but counts
- * apply to all callbacks.  Locking must be used to make sure that
- * both orphaned-callbacks lists are consistent.)
- */
-static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
-					       struct rcu_cblist *rclp)
-{
-	rclp->len_lazy += rsclp->len_lazy;
-	rclp->len += rsclp->len;
-	rsclp->len_lazy = 0;
-	WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
-}
-
-/*
- * Extract only those callbacks ready to be invoked from the specified
- * rcu_segcblist structure and place them in the specified rcu_cblist
- * structure.
- */
-static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
-						  struct rcu_cblist *rclp)
-{
-	int i;
-
-	if (!rcu_segcblist_ready_cbs(rsclp))
-		return; /* Nothing to do. */
-	*rclp->tail = rsclp->head;
-	rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
-	*rsclp->tails[RCU_DONE_TAIL] = NULL;
-	rclp->tail = rsclp->tails[RCU_DONE_TAIL];
-	for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
-		if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
-			rsclp->tails[i] = &rsclp->head;
-}
-
-/*
- * Extract only those callbacks still pending (not yet ready to be
- * invoked) from the specified rcu_segcblist structure and place them in
- * the specified rcu_cblist structure.  Note that this loses information
- * about any callbacks that might have been partway done waiting for
- * their grace period.  Too bad!  They will have to start over.
- */
-static inline void
-rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
-			       struct rcu_cblist *rclp)
-{
-	int i;
-
-	if (!rcu_segcblist_pend_cbs(rsclp))
-		return; /* Nothing to do. */
-	*rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
-	rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
-	*rsclp->tails[RCU_DONE_TAIL] = NULL;
-	for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
-		rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
-}
-
-/*
- * Move the entire contents of the specified rcu_segcblist structure,
- * counts, callbacks, and all, to the specified rcu_cblist structure.
- * @@@ Why do we need this???  Moving early-boot CBs to NOCB lists?
- * @@@ Memory barrier needed?  (Not if only used at boot time...)
- */
-static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
-					     struct rcu_cblist *rclp)
-{
-	rcu_segcblist_extract_done_cbs(rsclp, rclp);
-	rcu_segcblist_extract_pend_cbs(rsclp, rclp);
-	rcu_segcblist_extract_count(rsclp, rclp);
-}
-
-/*
- * Insert counts from the specified rcu_cblist structure in the
- * specified rcu_segcblist structure.
- */
-static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
-					      struct rcu_cblist *rclp)
-{
-	rsclp->len_lazy += rclp->len_lazy;
-	/* ->len sampled locklessly. */
-	WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
-	rclp->len_lazy = 0;
-	rclp->len = 0;
-}
-
-/*
- * Move callbacks from the specified rcu_cblist to the beginning of the
- * done-callbacks segment of the specified rcu_segcblist.
- */
-static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
-						 struct rcu_cblist *rclp)
-{
-	int i;
-
-	if (!rclp->head)
-		return; /* No callbacks to move. */
-	*rclp->tail = rsclp->head;
-	rsclp->head = rclp->head;
-	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
-		if (&rsclp->head == rsclp->tails[i])
-			rsclp->tails[i] = rclp->tail;
-		else
-			break;
-	rclp->head = NULL;
-	rclp->tail = &rclp->head;
-}
-
-/*
- * Move callbacks from the specified rcu_cblist to the end of the
- * new-callbacks segment of the specified rcu_segcblist.
- */
-static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
-						 struct rcu_cblist *rclp)
-{
-	if (!rclp->head)
-		return; /* Nothing to do. */
-	*rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
-	rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
-	rclp->head = NULL;
-	rclp->tail = &rclp->head;
-}
-
-/*
- * Advance the callbacks in the specified rcu_segcblist structure based
- * on the current value passed in for the grace-period counter.
- */
-static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
-					 unsigned long seq)
-{
-	int i, j;
-
-	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
-	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
-		return;
-
-	/*
-	 * Find all callbacks whose ->gp_seq numbers indicate that they
-	 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
-	 */
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
-		if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
-			break;
-		rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
-	}
-
-	/* If no callbacks moved, nothing more need be done. */
-	if (i == RCU_WAIT_TAIL)
-		return;
-
-	/* Clean up tail pointers that might have been misordered above. */
-	for (j = RCU_WAIT_TAIL; j < i; j++)
-		rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
-
-	/*
-	 * Callbacks moved, so clean up the misordered ->tails[] pointers
-	 * that now point into the middle of the list of ready-to-invoke
-	 * callbacks.  The overall effect is to copy down the later pointers
-	 * into the gap that was created by the now-ready segments.
-	 */
-	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
-		if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
-			break;  /* No more callbacks. */
-		rsclp->tails[j] = rsclp->tails[i];
-		rsclp->gp_seq[j] = rsclp->gp_seq[i];
-	}
-}
-
-/*
- * "Accelerate" callbacks based on more-accurate grace-period information.
- * The reason for this is that RCU does not synchronize the beginnings and
- * ends of grace periods, and that callbacks are posted locally.  This in
- * turn means that the callbacks must be labelled conservatively early
- * on, as getting exact information would degrade both performance and
- * scalability.  When more accurate grace-period information becomes
- * available, previously posted callbacks can be "accelerated", marking
- * them to complete at the end of the earlier grace period.
- *
- * This function operates on an rcu_segcblist structure, and also the
- * grace-period sequence number seq at which new callbacks would become
- * ready to invoke.  Returns true if there are callbacks that won't be
- * ready to invoke until seq, false otherwise.
- */
-static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
-					    unsigned long seq)
-{
-	int i;
-
-	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
-	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
-		return false;
-
-	/*
-	 * Find the segment preceding the oldest segment of callbacks
-	 * whose ->gp_seq[] completion is at or after that passed in via
-	 * "seq", skipping any empty segments.  This oldest segment, along
-	 * with any later segments, can be merged in with any newly arrived
-	 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
-	 * as their ->gp_seq[] grace-period completion sequence number.
-	 */
-	for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
-		if (rsclp->tails[i] != rsclp->tails[i - 1] &&
-		    ULONG_CMP_LT(rsclp->gp_seq[i], seq))
-			break;
-
-	/*
-	 * If all the segments contain callbacks that correspond to
-	 * earlier grace-period sequence numbers than "seq", leave.
-	 * Assuming that the rcu_segcblist structure has enough
-	 * segments in its arrays, this can only happen if some of
-	 * the non-done segments contain callbacks that really are
-	 * ready to invoke.  This situation will get straightened
-	 * out by the next call to rcu_segcblist_advance().
-	 *
-	 * Also advance to the oldest segment of callbacks whose
-	 * ->gp_seq[] completion is at or after that passed in via "seq",
-	 * skipping any empty segments.
-	 */
-	if (++i >= RCU_NEXT_TAIL)
-		return false;
-
-	/*
-	 * Merge all later callbacks, including newly arrived callbacks,
-	 * into the segment located by the for-loop above.  Assign "seq"
-	 * as the ->gp_seq[] value in order to correctly handle the case
-	 * where there were no pending callbacks in the rcu_segcblist
-	 * structure other than in the RCU_NEXT_TAIL segment.
-	 */
-	for (; i < RCU_NEXT_TAIL; i++) {
-		rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
-		rsclp->gp_seq[i] = seq;
-	}
-	return true;
-}
-
-/*
- * Scan the specified rcu_segcblist structure for callbacks that need
- * a grace period later than the one specified by "seq".  We don't look
- * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
- * have a grace-period sequence number.
- */
-static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
-						  unsigned long seq)
-{
-	int i;
-
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
-		if (rsclp->tails[i - 1] != rsclp->tails[i] &&
-		    ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
-			return true;
-	return false;
-}
-
-/*
- * Interim function to return rcu_segcblist head pointer.  Longer term, the
- * rcu_segcblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
-{
-	return rsclp->head;
-}
-
-/*
- * Interim function to return rcu_segcblist head pointer.  Longer term, the
- * rcu_segcblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
-{
-	WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
-	return rsclp->tails[RCU_NEXT_TAIL];
-}
-
-#endif /* __KERNEL_RCU_SEGCBLIST_H */
+#endif /* __INCLUDE_LINUX_RCU_SEGCBLIST_H */
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
new file mode 100644
index 000000000000..d98d2f9b8d59
--- /dev/null
+++ b/kernel/rcu/rcu_segcblist.h
@@ -0,0 +1,645 @@
+/*
+ * RCU segmented callback lists
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#include <linux/rcu_segcblist.h>
+
+/* Initialize simple callback list. */
+static inline void rcu_cblist_init(struct rcu_cblist *rclp)
+{
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+	rclp->len = 0;
+	rclp->len_lazy = 0;
+}
+
+/* Is simple callback list empty? */
+static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
+{
+	return !rclp->head;
+}
+
+/* Return number of callbacks in simple callback list. */
+static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
+{
+	return rclp->len;
+}
+
+/* Return number of lazy callbacks in simple callback list. */
+static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
+{
+	return rclp->len_lazy;
+}
+
+/*
+ * Debug function to actually count the number of callbacks.
+ * If the number exceeds the limit specified, return -1.
+ */
+static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
+{
+	int cnt = 0;
+	struct rcu_head **rhpp = &rclp->head;
+
+	for (;;) {
+		if (!*rhpp)
+			return cnt;
+		if (++cnt > lim)
+			return -1;
+		rhpp = &(*rhpp)->next;
+	}
+}
+
+/*
+ * Dequeue the oldest rcu_head structure from the specified callback
+ * list.  This function assumes that the callback is non-lazy, but
+ * the caller can later invoke rcu_cblist_dequeued_lazy() if it
+ * finds otherwise (and if it cares about laziness).  This allows
+ * different users to have different ways of determining laziness.
+ */
+static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
+{
+	struct rcu_head *rhp;
+
+	rhp = rclp->head;
+	if (!rhp)
+		return NULL;
+	rclp->len--;
+	rclp->head = rhp->next;
+	if (!rclp->head)
+		rclp->tail = &rclp->head;
+	return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
+{
+	rclp->len_lazy--;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer.  Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
+{
+	return rclp->head;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer.  Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
+{
+	WARN_ON_ONCE(rcu_cblist_empty(rclp));
+	return rclp->tail;
+}
+
+/*
+ * Initialize an rcu_segcblist structure.
+ */
+static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
+{
+	int i;
+
+	BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
+	BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
+	rsclp->head = NULL;
+	for (i = 0; i < RCU_CBLIST_NSEGS; i++)
+		rsclp->tails[i] = &rsclp->head;
+	rsclp->len = 0;
+	rsclp->len_lazy = 0;
+}
+
+/*
+ * Is the specified rcu_segcblist structure empty?
+ *
+ * But careful!  The fact that the ->head field is NULL does not
+ * necessarily imply that there are no callbacks associated with
+ * this structure.  When callbacks are being invoked, they are
+ * removed as a group.  If callback invocation must be preempted,
+ * the remaining callbacks will be added back to the list.  Either
+ * way, the counts are updated later.
+ *
+ * So it is often the case that rcu_segcblist_n_cbs() should be used
+ * instead.
+ */
+static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
+{
+	return !rsclp->head;
+}
+
+/* Return number of callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
+{
+	return READ_ONCE(rsclp->len);
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
+{
+	return rsclp->len_lazy;
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
+{
+	return rsclp->len - rsclp->len_lazy;
+}
+
+/*
+ * Is the specified rcu_segcblist enabled, for example, not corresponding
+ * to an offline or callback-offloaded CPU?
+ */
+static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
+{
+	return !!rsclp->tails[RCU_NEXT_TAIL];
+}
+
+/*
+ * Disable the specified rcu_segcblist structure, so that callbacks can
+ * no longer be posted to it.  This structure must be empty.
+ */
+static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
+{
+	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
+	rsclp->tails[RCU_NEXT_TAIL] = NULL;
+}
+
+/*
+ * Is the specified segment of the specified rcu_segcblist structure
+ * empty of callbacks?
+ */
+static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
+{
+	if (seg == RCU_DONE_TAIL)
+		return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
+	return rsclp->tails[seg - 1] == rsclp->tails[seg];
+}
+
+/*
+ * Are all segments following the specified segment of the specified
+ * rcu_segcblist structure empty of callbacks?  (The specified
+ * segment might well contain callbacks.)
+ */
+static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
+{
+	return !*rsclp->tails[seg];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are ready to be invoked?
+ */
+static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are still pending, that is, not yet ready to be invoked?
+ */
+static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
+}
+
+/*
+ * Dequeue and return the first ready-to-invoke callback.  If there
+ * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
+ * to avoid interference.  Does not protect from interference from other
+ * CPUs or tasks.
+ */
+static inline struct rcu_head *
+rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
+{
+	unsigned long flags;
+	int i;
+	struct rcu_head *rhp;
+
+	local_irq_save(flags);
+	if (!rcu_segcblist_ready_cbs(rsclp)) {
+		local_irq_restore(flags);
+		return NULL;
+	}
+	rhp = rsclp->head;
+	BUG_ON(!rhp);
+	rsclp->head = rhp->next;
+	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
+		if (rsclp->tails[i] != &rhp->next)
+			break;
+		rsclp->tails[i] = &rsclp->head;
+	}
+	smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
+	WRITE_ONCE(rsclp->len, rsclp->len - 1);
+	local_irq_restore(flags);
+	return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	rsclp->len_lazy--;
+	local_irq_restore(flags);
+}
+
+/*
+ * Return a pointer to the first callback in the specified rcu_segcblist
+ * structure.  This is useful for diagnostics.
+ */
+static inline struct rcu_head *
+rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
+{
+	if (rcu_segcblist_is_enabled(rsclp))
+		return rsclp->head;
+	return NULL;
+}
+
+/*
+ * Return a pointer to the first pending callback in the specified
+ * rcu_segcblist structure.  This is useful just after posting a given
+ * callback -- if that callback is the first pending callback, then
+ * you cannot rely on someone else having already started up the required
+ * grace period.
+ */
+static inline struct rcu_head *
+rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
+{
+	if (rcu_segcblist_is_enabled(rsclp))
+		return *rsclp->tails[RCU_DONE_TAIL];
+	return NULL;
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * have not yet been processed beyond having been posted, that is,
+ * does it contain callbacks in its last segment?
+ */
+static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
+}
+
+/*
+ * Enqueue the specified callback onto the specified rcu_segcblist
+ * structure, updating accounting as needed.  Note that the ->len
+ * field may be accessed locklessly, hence the WRITE_ONCE().
+ * The ->len field is used by rcu_barrier() and friends to determine
+ * if it must post a callback on this structure, and it is OK
+ * for rcu_barrier() to sometimes post callbacks needlessly, but
+ * absolutely not OK for it to ever miss posting a callback.
+ */
+static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
+					 struct rcu_head *rhp, bool lazy)
+{
+	WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
+	if (lazy)
+		rsclp->len_lazy++;
+	smp_mb(); /* Ensure counts are updated before callback is enqueued. */
+	rhp->next = NULL;
+	*rsclp->tails[RCU_NEXT_TAIL] = rhp;
+	rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
+}
+
+/*
+ * Entrain the specified callback onto the specified rcu_segcblist at
+ * the end of the last non-empty segment.  If the entire rcu_segcblist
+ * is empty, make no change, but return false.
+ *
+ * This is intended for use by rcu_barrier()-like primitives, -not-
+ * for normal grace-period use.  IMPORTANT:  The callback you enqueue
+ * will wait for all prior callbacks, NOT necessarily for a grace
+ * period.  You have been warned.
+ */
+static inline bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
+					 struct rcu_head *rhp, bool lazy)
+{
+	int i;
+
+	if (rcu_segcblist_n_cbs(rsclp) == 0)
+		return false;
+	WRITE_ONCE(rsclp->len, rsclp->len + 1);
+	if (lazy)
+		rsclp->len_lazy++;
+	smp_mb(); /* Ensure counts are updated before callback is entrained. */
+	rhp->next = NULL;
+	for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] != rsclp->tails[i - 1])
+			break;
+	*rsclp->tails[i] = rhp;
+	for (; i <= RCU_NEXT_TAIL; i++)
+		rsclp->tails[i] = &rhp->next;
+	return true;
+}
+
+/*
+ * Extract only the counts from the specified rcu_segcblist structure,
+ * and place them in the specified rcu_cblist structure.  This function
+ * supports both callback orphaning and invocation, hence the separation
+ * of counts and callbacks.  (Callbacks ready for invocation must be
+ * orphaned and adopted separately from pending callbacks, but counts
+ * apply to all callbacks.  Locking must be used to make sure that
+ * both orphaned-callbacks lists are consistent.)
+ */
+static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
+					       struct rcu_cblist *rclp)
+{
+	rclp->len_lazy += rsclp->len_lazy;
+	rclp->len += rsclp->len;
+	rsclp->len_lazy = 0;
+	WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
+}
+
+/*
+ * Extract only those callbacks ready to be invoked from the specified
+ * rcu_segcblist structure and place them in the specified rcu_cblist
+ * structure.
+ */
+static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
+						  struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rcu_segcblist_ready_cbs(rsclp))
+		return; /* Nothing to do. */
+	*rclp->tail = rsclp->head;
+	rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
+	*rsclp->tails[RCU_DONE_TAIL] = NULL;
+	rclp->tail = rsclp->tails[RCU_DONE_TAIL];
+	for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
+			rsclp->tails[i] = &rsclp->head;
+}
+
+/*
+ * Extract only those callbacks still pending (not yet ready to be
+ * invoked) from the specified rcu_segcblist structure and place them in
+ * the specified rcu_cblist structure.  Note that this loses information
+ * about any callbacks that might have been partway done waiting for
+ * their grace period.  Too bad!  They will have to start over.
+ */
+static inline void
+rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
+			       struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rcu_segcblist_pend_cbs(rsclp))
+		return; /* Nothing to do. */
+	*rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
+	rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
+	*rsclp->tails[RCU_DONE_TAIL] = NULL;
+	for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
+		rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Move the entire contents of the specified rcu_segcblist structure,
+ * counts, callbacks, and all, to the specified rcu_cblist structure.
+ * @@@ Why do we need this???  Moving early-boot CBs to NOCB lists?
+ * @@@ Memory barrier needed?  (Not if only used at boot time...)
+ */
+static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
+					     struct rcu_cblist *rclp)
+{
+	rcu_segcblist_extract_done_cbs(rsclp, rclp);
+	rcu_segcblist_extract_pend_cbs(rsclp, rclp);
+	rcu_segcblist_extract_count(rsclp, rclp);
+}
+
+/*
+ * Insert counts from the specified rcu_cblist structure in the
+ * specified rcu_segcblist structure.
+ */
+static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
+					      struct rcu_cblist *rclp)
+{
+	rsclp->len_lazy += rclp->len_lazy;
+	/* ->len sampled locklessly. */
+	WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
+	rclp->len_lazy = 0;
+	rclp->len = 0;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the beginning of the
+ * done-callbacks segment of the specified rcu_segcblist.
+ */
+static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
+						 struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rclp->head)
+		return; /* No callbacks to move. */
+	*rclp->tail = rsclp->head;
+	rsclp->head = rclp->head;
+	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
+		if (&rsclp->head == rsclp->tails[i])
+			rsclp->tails[i] = rclp->tail;
+		else
+			break;
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the end of the
+ * new-callbacks segment of the specified rcu_segcblist.
+ */
+static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
+						 struct rcu_cblist *rclp)
+{
+	if (!rclp->head)
+		return; /* Nothing to do. */
+	*rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
+	rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+}
+
+/*
+ * Advance the callbacks in the specified rcu_segcblist structure based
+ * on the current value passed in for the grace-period counter.
+ */
+static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
+					 unsigned long seq)
+{
+	int i, j;
+
+	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
+		return;
+
+	/*
+	 * Find all callbacks whose ->gp_seq numbers indicate that they
+	 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
+	 */
+	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
+		if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+			break;
+		rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
+	}
+
+	/* If no callbacks moved, nothing more need be done. */
+	if (i == RCU_WAIT_TAIL)
+		return;
+
+	/* Clean up tail pointers that might have been misordered above. */
+	for (j = RCU_WAIT_TAIL; j < i; j++)
+		rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
+
+	/*
+	 * Callbacks moved, so clean up the misordered ->tails[] pointers
+	 * that now point into the middle of the list of ready-to-invoke
+	 * callbacks.  The overall effect is to copy down the later pointers
+	 * into the gap that was created by the now-ready segments.
+	 */
+	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
+		if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
+			break;  /* No more callbacks. */
+		rsclp->tails[j] = rsclp->tails[i];
+		rsclp->gp_seq[j] = rsclp->gp_seq[i];
+	}
+}
+
+/*
+ * "Accelerate" callbacks based on more-accurate grace-period information.
+ * The reason for this is that RCU does not synchronize the beginnings and
+ * ends of grace periods, and that callbacks are posted locally.  This in
+ * turn means that the callbacks must be labelled conservatively early
+ * on, as getting exact information would degrade both performance and
+ * scalability.  When more accurate grace-period information becomes
+ * available, previously posted callbacks can be "accelerated", marking
+ * them to complete at the end of the earlier grace period.
+ *
+ * This function operates on an rcu_segcblist structure, and also the
+ * grace-period sequence number seq at which new callbacks would become
+ * ready to invoke.  Returns true if there are callbacks that won't be
+ * ready to invoke until seq, false otherwise.
+ */
+static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
+					    unsigned long seq)
+{
+	int i;
+
+	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
+		return false;
+
+	/*
+	 * Find the segment preceding the oldest segment of callbacks
+	 * whose ->gp_seq[] completion is at or after that passed in via
+	 * "seq", skipping any empty segments.  This oldest segment, along
+	 * with any later segments, can be merged in with any newly arrived
+	 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
+	 * as their ->gp_seq[] grace-period completion sequence number.
+	 */
+	for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] != rsclp->tails[i - 1] &&
+		    ULONG_CMP_LT(rsclp->gp_seq[i], seq))
+			break;
+
+	/*
+	 * If all the segments contain callbacks that correspond to
+	 * earlier grace-period sequence numbers than "seq", leave.
+	 * Assuming that the rcu_segcblist structure has enough
+	 * segments in its arrays, this can only happen if some of
+	 * the non-done segments contain callbacks that really are
+	 * ready to invoke.  This situation will get straightened
+	 * out by the next call to rcu_segcblist_advance().
+	 *
+	 * Also advance to the oldest segment of callbacks whose
+	 * ->gp_seq[] completion is at or after that passed in via "seq",
+	 * skipping any empty segments.
+	 */
+	if (++i >= RCU_NEXT_TAIL)
+		return false;
+
+	/*
+	 * Merge all later callbacks, including newly arrived callbacks,
+	 * into the segment located by the for-loop above.  Assign "seq"
+	 * as the ->gp_seq[] value in order to correctly handle the case
+	 * where there were no pending callbacks in the rcu_segcblist
+	 * structure other than in the RCU_NEXT_TAIL segment.
+	 */
+	for (; i < RCU_NEXT_TAIL; i++) {
+		rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
+		rsclp->gp_seq[i] = seq;
+	}
+	return true;
+}
+
+/*
+ * Scan the specified rcu_segcblist structure for callbacks that need
+ * a grace period later than the one specified by "seq".  We don't look
+ * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
+ * have a grace-period sequence number.
+ */
+static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
+						  unsigned long seq)
+{
+	int i;
+
+	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
+		if (rsclp->tails[i - 1] != rsclp->tails[i] &&
+		    ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+			return true;
+	return false;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer.  Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
+{
+	return rsclp->head;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer.  Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
+{
+	WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
+	return rsclp->tails[RCU_NEXT_TAIL];
+}
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index b8293527ee18..36e1f82faed1 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -30,6 +30,7 @@
 #include <linux/srcu.h>
 
 #include <linux/rcu_node_tree.h>
+#include "rcu_segcblist.h"
 #include "rcu.h"
 
 static int init_srcu_struct_fields(struct srcu_struct *sp)
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 87b070de6371..3ae8474557df 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -38,6 +38,7 @@
 #include <linux/srcu.h>
 
 #include "rcu.h"
+#include "rcu_segcblist.h"
 
 ulong exp_holdoff = 25 * 1000; /* Holdoff (ns) for auto-expediting. */
 module_param(exp_holdoff, ulong, 0444);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 0e598ab08fea..ba38262c3554 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -30,9 +30,10 @@
 #include <linux/seqlock.h>
 #include <linux/swait.h>
 #include <linux/stop_machine.h>
-#include <linux/rcu_segcblist.h>
 #include <linux/rcu_node_tree.h>
 
+#include "rcu_segcblist.h"
+
 /*
  * Dynticks per-CPU state.
  */
-- 
cgit v1.2.3


From 98059b98619d093366462ff0a4e1258e946accb9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 2 May 2017 06:30:12 -0700
Subject: rcu: Separately compile large rcu_segcblist functions

This commit creates a new kernel/rcu/rcu_segcblist.c file that
contains non-trivial segcblist functions.  Trivial functions
remain as static inline functions in kernel/rcu/rcu_segcblist.h

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 init/Kconfig               |   3 +
 kernel/rcu/Makefile        |   1 +
 kernel/rcu/rcu_segcblist.c | 505 ++++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/rcu_segcblist.h | 533 +++------------------------------------------
 4 files changed, 544 insertions(+), 498 deletions(-)
 create mode 100644 kernel/rcu/rcu_segcblist.c

diff --git a/init/Kconfig b/init/Kconfig
index 42a346b0df43..1d3475fc9496 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -573,6 +573,9 @@ config RCU_STALL_COMMON
 	  the tiny variants to disable RCU CPU stall warnings, while
 	  making these warnings mandatory for the tree variants.
 
+config RCU_NEED_SEGCBLIST
+	def_bool ( TREE_RCU || PREEMPT_RCU || TINY_SRCU || TREE_SRCU )
+
 config CONTEXT_TRACKING
        bool
 
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 158e6593d58c..23803c7d5180 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_TREE_RCU) += tree.o
 obj-$(CONFIG_PREEMPT_RCU) += tree.o
 obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
 obj-$(CONFIG_TINY_RCU) += tiny.o
+obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
new file mode 100644
index 000000000000..2b62a38b080f
--- /dev/null
+++ b/kernel/rcu/rcu_segcblist.c
@@ -0,0 +1,505 @@
+/*
+ * RCU segmented callback lists, function definitions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
+#include "rcu_segcblist.h"
+
+/* Initialize simple callback list. */
+void rcu_cblist_init(struct rcu_cblist *rclp)
+{
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+	rclp->len = 0;
+	rclp->len_lazy = 0;
+}
+
+/*
+ * Debug function to actually count the number of callbacks.
+ * If the number exceeds the limit specified, return -1.
+ */
+long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
+{
+	int cnt = 0;
+	struct rcu_head **rhpp = &rclp->head;
+
+	for (;;) {
+		if (!*rhpp)
+			return cnt;
+		if (++cnt > lim)
+			return -1;
+		rhpp = &(*rhpp)->next;
+	}
+}
+
+/*
+ * Dequeue the oldest rcu_head structure from the specified callback
+ * list.  This function assumes that the callback is non-lazy, but
+ * the caller can later invoke rcu_cblist_dequeued_lazy() if it
+ * finds otherwise (and if it cares about laziness).  This allows
+ * different users to have different ways of determining laziness.
+ */
+struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
+{
+	struct rcu_head *rhp;
+
+	rhp = rclp->head;
+	if (!rhp)
+		return NULL;
+	rclp->len--;
+	rclp->head = rhp->next;
+	if (!rclp->head)
+		rclp->tail = &rclp->head;
+	return rhp;
+}
+
+/*
+ * Initialize an rcu_segcblist structure.
+ */
+void rcu_segcblist_init(struct rcu_segcblist *rsclp)
+{
+	int i;
+
+	BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
+	BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
+	rsclp->head = NULL;
+	for (i = 0; i < RCU_CBLIST_NSEGS; i++)
+		rsclp->tails[i] = &rsclp->head;
+	rsclp->len = 0;
+	rsclp->len_lazy = 0;
+}
+
+/*
+ * Disable the specified rcu_segcblist structure, so that callbacks can
+ * no longer be posted to it.  This structure must be empty.
+ */
+void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
+{
+	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
+	rsclp->tails[RCU_NEXT_TAIL] = NULL;
+}
+
+/*
+ * Is the specified segment of the specified rcu_segcblist structure
+ * empty of callbacks?
+ */
+bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
+{
+	if (seg == RCU_DONE_TAIL)
+		return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
+	return rsclp->tails[seg - 1] == rsclp->tails[seg];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are ready to be invoked?
+ */
+bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are still pending, that is, not yet ready to be invoked?
+ */
+bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
+}
+
+/*
+ * Dequeue and return the first ready-to-invoke callback.  If there
+ * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
+ * to avoid interference.  Does not protect from interference from other
+ * CPUs or tasks.
+ */
+struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
+{
+	unsigned long flags;
+	int i;
+	struct rcu_head *rhp;
+
+	local_irq_save(flags);
+	if (!rcu_segcblist_ready_cbs(rsclp)) {
+		local_irq_restore(flags);
+		return NULL;
+	}
+	rhp = rsclp->head;
+	BUG_ON(!rhp);
+	rsclp->head = rhp->next;
+	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
+		if (rsclp->tails[i] != &rhp->next)
+			break;
+		rsclp->tails[i] = &rsclp->head;
+	}
+	smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
+	WRITE_ONCE(rsclp->len, rsclp->len - 1);
+	local_irq_restore(flags);
+	return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	rsclp->len_lazy--;
+	local_irq_restore(flags);
+}
+
+/*
+ * Return a pointer to the first callback in the specified rcu_segcblist
+ * structure.  This is useful for diagnostics.
+ */
+struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
+{
+	if (rcu_segcblist_is_enabled(rsclp))
+		return rsclp->head;
+	return NULL;
+}
+
+/*
+ * Return a pointer to the first pending callback in the specified
+ * rcu_segcblist structure.  This is useful just after posting a given
+ * callback -- if that callback is the first pending callback, then
+ * you cannot rely on someone else having already started up the required
+ * grace period.
+ */
+struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
+{
+	if (rcu_segcblist_is_enabled(rsclp))
+		return *rsclp->tails[RCU_DONE_TAIL];
+	return NULL;
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * have not yet been processed beyond having been posted, that is,
+ * does it contain callbacks in its last segment?
+ */
+bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
+{
+	return rcu_segcblist_is_enabled(rsclp) &&
+	       !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
+}
+
+/*
+ * Enqueue the specified callback onto the specified rcu_segcblist
+ * structure, updating accounting as needed.  Note that the ->len
+ * field may be accessed locklessly, hence the WRITE_ONCE().
+ * The ->len field is used by rcu_barrier() and friends to determine
+ * if it must post a callback on this structure, and it is OK
+ * for rcu_barrier() to sometimes post callbacks needlessly, but
+ * absolutely not OK for it to ever miss posting a callback.
+ */
+void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
+			   struct rcu_head *rhp, bool lazy)
+{
+	WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
+	if (lazy)
+		rsclp->len_lazy++;
+	smp_mb(); /* Ensure counts are updated before callback is enqueued. */
+	rhp->next = NULL;
+	*rsclp->tails[RCU_NEXT_TAIL] = rhp;
+	rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
+}
+
+/*
+ * Entrain the specified callback onto the specified rcu_segcblist at
+ * the end of the last non-empty segment.  If the entire rcu_segcblist
+ * is empty, make no change, but return false.
+ *
+ * This is intended for use by rcu_barrier()-like primitives, -not-
+ * for normal grace-period use.  IMPORTANT:  The callback you enqueue
+ * will wait for all prior callbacks, NOT necessarily for a grace
+ * period.  You have been warned.
+ */
+bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
+			   struct rcu_head *rhp, bool lazy)
+{
+	int i;
+
+	if (rcu_segcblist_n_cbs(rsclp) == 0)
+		return false;
+	WRITE_ONCE(rsclp->len, rsclp->len + 1);
+	if (lazy)
+		rsclp->len_lazy++;
+	smp_mb(); /* Ensure counts are updated before callback is entrained. */
+	rhp->next = NULL;
+	for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] != rsclp->tails[i - 1])
+			break;
+	*rsclp->tails[i] = rhp;
+	for (; i <= RCU_NEXT_TAIL; i++)
+		rsclp->tails[i] = &rhp->next;
+	return true;
+}
+
+/*
+ * Extract only the counts from the specified rcu_segcblist structure,
+ * and place them in the specified rcu_cblist structure.  This function
+ * supports both callback orphaning and invocation, hence the separation
+ * of counts and callbacks.  (Callbacks ready for invocation must be
+ * orphaned and adopted separately from pending callbacks, but counts
+ * apply to all callbacks.  Locking must be used to make sure that
+ * both orphaned-callbacks lists are consistent.)
+ */
+void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
+					       struct rcu_cblist *rclp)
+{
+	rclp->len_lazy += rsclp->len_lazy;
+	rclp->len += rsclp->len;
+	rsclp->len_lazy = 0;
+	WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
+}
+
+/*
+ * Extract only those callbacks ready to be invoked from the specified
+ * rcu_segcblist structure and place them in the specified rcu_cblist
+ * structure.
+ */
+void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
+				    struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rcu_segcblist_ready_cbs(rsclp))
+		return; /* Nothing to do. */
+	*rclp->tail = rsclp->head;
+	rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
+	*rsclp->tails[RCU_DONE_TAIL] = NULL;
+	rclp->tail = rsclp->tails[RCU_DONE_TAIL];
+	for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
+			rsclp->tails[i] = &rsclp->head;
+}
+
+/*
+ * Extract only those callbacks still pending (not yet ready to be
+ * invoked) from the specified rcu_segcblist structure and place them in
+ * the specified rcu_cblist structure.  Note that this loses information
+ * about any callbacks that might have been partway done waiting for
+ * their grace period.  Too bad!  They will have to start over.
+ */
+void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
+				    struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rcu_segcblist_pend_cbs(rsclp))
+		return; /* Nothing to do. */
+	*rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
+	rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
+	*rsclp->tails[RCU_DONE_TAIL] = NULL;
+	for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
+		rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Insert counts from the specified rcu_cblist structure in the
+ * specified rcu_segcblist structure.
+ */
+void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
+				struct rcu_cblist *rclp)
+{
+	rsclp->len_lazy += rclp->len_lazy;
+	/* ->len sampled locklessly. */
+	WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
+	rclp->len_lazy = 0;
+	rclp->len = 0;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the beginning of the
+ * done-callbacks segment of the specified rcu_segcblist.
+ */
+void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
+				   struct rcu_cblist *rclp)
+{
+	int i;
+
+	if (!rclp->head)
+		return; /* No callbacks to move. */
+	*rclp->tail = rsclp->head;
+	rsclp->head = rclp->head;
+	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
+		if (&rsclp->head == rsclp->tails[i])
+			rsclp->tails[i] = rclp->tail;
+		else
+			break;
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the end of the
+ * new-callbacks segment of the specified rcu_segcblist.
+ */
+void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
+				   struct rcu_cblist *rclp)
+{
+	if (!rclp->head)
+		return; /* Nothing to do. */
+	*rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
+	rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
+	rclp->head = NULL;
+	rclp->tail = &rclp->head;
+}
+
+/*
+ * Advance the callbacks in the specified rcu_segcblist structure based
+ * on the current value passed in for the grace-period counter.
+ */
+void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
+{
+	int i, j;
+
+	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
+		return;
+
+	/*
+	 * Find all callbacks whose ->gp_seq numbers indicate that they
+	 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
+	 */
+	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
+		if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+			break;
+		rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
+	}
+
+	/* If no callbacks moved, nothing more need be done. */
+	if (i == RCU_WAIT_TAIL)
+		return;
+
+	/* Clean up tail pointers that might have been misordered above. */
+	for (j = RCU_WAIT_TAIL; j < i; j++)
+		rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
+
+	/*
+	 * Callbacks moved, so clean up the misordered ->tails[] pointers
+	 * that now point into the middle of the list of ready-to-invoke
+	 * callbacks.  The overall effect is to copy down the later pointers
+	 * into the gap that was created by the now-ready segments.
+	 */
+	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
+		if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
+			break;  /* No more callbacks. */
+		rsclp->tails[j] = rsclp->tails[i];
+		rsclp->gp_seq[j] = rsclp->gp_seq[i];
+	}
+}
+
+/*
+ * "Accelerate" callbacks based on more-accurate grace-period information.
+ * The reason for this is that RCU does not synchronize the beginnings and
+ * ends of grace periods, and that callbacks are posted locally.  This in
+ * turn means that the callbacks must be labelled conservatively early
+ * on, as getting exact information would degrade both performance and
+ * scalability.  When more accurate grace-period information becomes
+ * available, previously posted callbacks can be "accelerated", marking
+ * them to complete at the end of the earlier grace period.
+ *
+ * This function operates on an rcu_segcblist structure, and also the
+ * grace-period sequence number seq at which new callbacks would become
+ * ready to invoke.  Returns true if there are callbacks that won't be
+ * ready to invoke until seq, false otherwise.
+ */
+bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
+{
+	int i;
+
+	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
+		return false;
+
+	/*
+	 * Find the segment preceding the oldest segment of callbacks
+	 * whose ->gp_seq[] completion is at or after that passed in via
+	 * "seq", skipping any empty segments.  This oldest segment, along
+	 * with any later segments, can be merged in with any newly arrived
+	 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
+	 * as their ->gp_seq[] grace-period completion sequence number.
+	 */
+	for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
+		if (rsclp->tails[i] != rsclp->tails[i - 1] &&
+		    ULONG_CMP_LT(rsclp->gp_seq[i], seq))
+			break;
+
+	/*
+	 * If all the segments contain callbacks that correspond to
+	 * earlier grace-period sequence numbers than "seq", leave.
+	 * Assuming that the rcu_segcblist structure has enough
+	 * segments in its arrays, this can only happen if some of
+	 * the non-done segments contain callbacks that really are
+	 * ready to invoke.  This situation will get straightened
+	 * out by the next call to rcu_segcblist_advance().
+	 *
+	 * Also advance to the oldest segment of callbacks whose
+	 * ->gp_seq[] completion is at or after that passed in via "seq",
+	 * skipping any empty segments.
+	 */
+	if (++i >= RCU_NEXT_TAIL)
+		return false;
+
+	/*
+	 * Merge all later callbacks, including newly arrived callbacks,
+	 * into the segment located by the for-loop above.  Assign "seq"
+	 * as the ->gp_seq[] value in order to correctly handle the case
+	 * where there were no pending callbacks in the rcu_segcblist
+	 * structure other than in the RCU_NEXT_TAIL segment.
+	 */
+	for (; i < RCU_NEXT_TAIL; i++) {
+		rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
+		rsclp->gp_seq[i] = seq;
+	}
+	return true;
+}
+
+/*
+ * Scan the specified rcu_segcblist structure for callbacks that need
+ * a grace period later than the one specified by "seq".  We don't look
+ * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
+ * have a grace-period sequence number.
+ */
+bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
+				    unsigned long seq)
+{
+	int i;
+
+	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
+		if (rsclp->tails[i - 1] != rsclp->tails[i] &&
+		    ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+			return true;
+	return false;
+}
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index d98d2f9b8d59..86bc1101b806 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -1,5 +1,5 @@
 /*
- * RCU segmented callback lists
+ * RCU segmented callback lists, internal-to-rcu header file
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -22,15 +22,6 @@
 
 #include <linux/rcu_segcblist.h>
 
-/* Initialize simple callback list. */
-static inline void rcu_cblist_init(struct rcu_cblist *rclp)
-{
-	rclp->head = NULL;
-	rclp->tail = &rclp->head;
-	rclp->len = 0;
-	rclp->len_lazy = 0;
-}
-
 /* Is simple callback list empty? */
 static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
 {
@@ -49,45 +40,6 @@ static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
 	return rclp->len_lazy;
 }
 
-/*
- * Debug function to actually count the number of callbacks.
- * If the number exceeds the limit specified, return -1.
- */
-static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
-{
-	int cnt = 0;
-	struct rcu_head **rhpp = &rclp->head;
-
-	for (;;) {
-		if (!*rhpp)
-			return cnt;
-		if (++cnt > lim)
-			return -1;
-		rhpp = &(*rhpp)->next;
-	}
-}
-
-/*
- * Dequeue the oldest rcu_head structure from the specified callback
- * list.  This function assumes that the callback is non-lazy, but
- * the caller can later invoke rcu_cblist_dequeued_lazy() if it
- * finds otherwise (and if it cares about laziness).  This allows
- * different users to have different ways of determining laziness.
- */
-static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
-{
-	struct rcu_head *rhp;
-
-	rhp = rclp->head;
-	if (!rhp)
-		return NULL;
-	rclp->len--;
-	rclp->head = rhp->next;
-	if (!rclp->head)
-		rclp->tail = &rclp->head;
-	return rhp;
-}
-
 /*
  * Account for the fact that a previously dequeued callback turned out
  * to be marked as lazy.
@@ -118,21 +70,9 @@ static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
 	return rclp->tail;
 }
 
-/*
- * Initialize an rcu_segcblist structure.
- */
-static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
-{
-	int i;
-
-	BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
-	BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
-	rsclp->head = NULL;
-	for (i = 0; i < RCU_CBLIST_NSEGS; i++)
-		rsclp->tails[i] = &rsclp->head;
-	rsclp->len = 0;
-	rsclp->len_lazy = 0;
-}
+void rcu_cblist_init(struct rcu_cblist *rclp);
+long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim);
+struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp);
 
 /*
  * Is the specified rcu_segcblist structure empty?
@@ -179,29 +119,6 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
 	return !!rsclp->tails[RCU_NEXT_TAIL];
 }
 
-/*
- * Disable the specified rcu_segcblist structure, so that callbacks can
- * no longer be posted to it.  This structure must be empty.
- */
-static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
-{
-	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
-	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
-	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
-	rsclp->tails[RCU_NEXT_TAIL] = NULL;
-}
-
-/*
- * Is the specified segment of the specified rcu_segcblist structure
- * empty of callbacks?
- */
-static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
-{
-	if (seg == RCU_DONE_TAIL)
-		return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
-	return rsclp->tails[seg - 1] == rsclp->tails[seg];
-}
-
 /*
  * Are all segments following the specified segment of the specified
  * rcu_segcblist structure empty of callbacks?  (The specified
@@ -212,417 +129,6 @@ static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
 	return !*rsclp->tails[seg];
 }
 
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * are ready to be invoked?
- */
-static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
-{
-	return rcu_segcblist_is_enabled(rsclp) &&
-	       &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * are still pending, that is, not yet ready to be invoked?
- */
-static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
-{
-	return rcu_segcblist_is_enabled(rsclp) &&
-	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
-}
-
-/*
- * Dequeue and return the first ready-to-invoke callback.  If there
- * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
- * to avoid interference.  Does not protect from interference from other
- * CPUs or tasks.
- */
-static inline struct rcu_head *
-rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
-{
-	unsigned long flags;
-	int i;
-	struct rcu_head *rhp;
-
-	local_irq_save(flags);
-	if (!rcu_segcblist_ready_cbs(rsclp)) {
-		local_irq_restore(flags);
-		return NULL;
-	}
-	rhp = rsclp->head;
-	BUG_ON(!rhp);
-	rsclp->head = rhp->next;
-	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
-		if (rsclp->tails[i] != &rhp->next)
-			break;
-		rsclp->tails[i] = &rsclp->head;
-	}
-	smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
-	WRITE_ONCE(rsclp->len, rsclp->len - 1);
-	local_irq_restore(flags);
-	return rhp;
-}
-
-/*
- * Account for the fact that a previously dequeued callback turned out
- * to be marked as lazy.
- */
-static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	rsclp->len_lazy--;
-	local_irq_restore(flags);
-}
-
-/*
- * Return a pointer to the first callback in the specified rcu_segcblist
- * structure.  This is useful for diagnostics.
- */
-static inline struct rcu_head *
-rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
-{
-	if (rcu_segcblist_is_enabled(rsclp))
-		return rsclp->head;
-	return NULL;
-}
-
-/*
- * Return a pointer to the first pending callback in the specified
- * rcu_segcblist structure.  This is useful just after posting a given
- * callback -- if that callback is the first pending callback, then
- * you cannot rely on someone else having already started up the required
- * grace period.
- */
-static inline struct rcu_head *
-rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
-{
-	if (rcu_segcblist_is_enabled(rsclp))
-		return *rsclp->tails[RCU_DONE_TAIL];
-	return NULL;
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * have not yet been processed beyond having been posted, that is,
- * does it contain callbacks in its last segment?
- */
-static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
-{
-	return rcu_segcblist_is_enabled(rsclp) &&
-	       !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
-}
-
-/*
- * Enqueue the specified callback onto the specified rcu_segcblist
- * structure, updating accounting as needed.  Note that the ->len
- * field may be accessed locklessly, hence the WRITE_ONCE().
- * The ->len field is used by rcu_barrier() and friends to determine
- * if it must post a callback on this structure, and it is OK
- * for rcu_barrier() to sometimes post callbacks needlessly, but
- * absolutely not OK for it to ever miss posting a callback.
- */
-static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
-					 struct rcu_head *rhp, bool lazy)
-{
-	WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
-	if (lazy)
-		rsclp->len_lazy++;
-	smp_mb(); /* Ensure counts are updated before callback is enqueued. */
-	rhp->next = NULL;
-	*rsclp->tails[RCU_NEXT_TAIL] = rhp;
-	rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
-}
-
-/*
- * Entrain the specified callback onto the specified rcu_segcblist at
- * the end of the last non-empty segment.  If the entire rcu_segcblist
- * is empty, make no change, but return false.
- *
- * This is intended for use by rcu_barrier()-like primitives, -not-
- * for normal grace-period use.  IMPORTANT:  The callback you enqueue
- * will wait for all prior callbacks, NOT necessarily for a grace
- * period.  You have been warned.
- */
-static inline bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
-					 struct rcu_head *rhp, bool lazy)
-{
-	int i;
-
-	if (rcu_segcblist_n_cbs(rsclp) == 0)
-		return false;
-	WRITE_ONCE(rsclp->len, rsclp->len + 1);
-	if (lazy)
-		rsclp->len_lazy++;
-	smp_mb(); /* Ensure counts are updated before callback is entrained. */
-	rhp->next = NULL;
-	for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--)
-		if (rsclp->tails[i] != rsclp->tails[i - 1])
-			break;
-	*rsclp->tails[i] = rhp;
-	for (; i <= RCU_NEXT_TAIL; i++)
-		rsclp->tails[i] = &rhp->next;
-	return true;
-}
-
-/*
- * Extract only the counts from the specified rcu_segcblist structure,
- * and place them in the specified rcu_cblist structure.  This function
- * supports both callback orphaning and invocation, hence the separation
- * of counts and callbacks.  (Callbacks ready for invocation must be
- * orphaned and adopted separately from pending callbacks, but counts
- * apply to all callbacks.  Locking must be used to make sure that
- * both orphaned-callbacks lists are consistent.)
- */
-static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
-					       struct rcu_cblist *rclp)
-{
-	rclp->len_lazy += rsclp->len_lazy;
-	rclp->len += rsclp->len;
-	rsclp->len_lazy = 0;
-	WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
-}
-
-/*
- * Extract only those callbacks ready to be invoked from the specified
- * rcu_segcblist structure and place them in the specified rcu_cblist
- * structure.
- */
-static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
-						  struct rcu_cblist *rclp)
-{
-	int i;
-
-	if (!rcu_segcblist_ready_cbs(rsclp))
-		return; /* Nothing to do. */
-	*rclp->tail = rsclp->head;
-	rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
-	*rsclp->tails[RCU_DONE_TAIL] = NULL;
-	rclp->tail = rsclp->tails[RCU_DONE_TAIL];
-	for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
-		if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
-			rsclp->tails[i] = &rsclp->head;
-}
-
-/*
- * Extract only those callbacks still pending (not yet ready to be
- * invoked) from the specified rcu_segcblist structure and place them in
- * the specified rcu_cblist structure.  Note that this loses information
- * about any callbacks that might have been partway done waiting for
- * their grace period.  Too bad!  They will have to start over.
- */
-static inline void
-rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
-			       struct rcu_cblist *rclp)
-{
-	int i;
-
-	if (!rcu_segcblist_pend_cbs(rsclp))
-		return; /* Nothing to do. */
-	*rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
-	rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
-	*rsclp->tails[RCU_DONE_TAIL] = NULL;
-	for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
-		rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
-}
-
-/*
- * Move the entire contents of the specified rcu_segcblist structure,
- * counts, callbacks, and all, to the specified rcu_cblist structure.
- * @@@ Why do we need this???  Moving early-boot CBs to NOCB lists?
- * @@@ Memory barrier needed?  (Not if only used at boot time...)
- */
-static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
-					     struct rcu_cblist *rclp)
-{
-	rcu_segcblist_extract_done_cbs(rsclp, rclp);
-	rcu_segcblist_extract_pend_cbs(rsclp, rclp);
-	rcu_segcblist_extract_count(rsclp, rclp);
-}
-
-/*
- * Insert counts from the specified rcu_cblist structure in the
- * specified rcu_segcblist structure.
- */
-static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
-					      struct rcu_cblist *rclp)
-{
-	rsclp->len_lazy += rclp->len_lazy;
-	/* ->len sampled locklessly. */
-	WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
-	rclp->len_lazy = 0;
-	rclp->len = 0;
-}
-
-/*
- * Move callbacks from the specified rcu_cblist to the beginning of the
- * done-callbacks segment of the specified rcu_segcblist.
- */
-static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
-						 struct rcu_cblist *rclp)
-{
-	int i;
-
-	if (!rclp->head)
-		return; /* No callbacks to move. */
-	*rclp->tail = rsclp->head;
-	rsclp->head = rclp->head;
-	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
-		if (&rsclp->head == rsclp->tails[i])
-			rsclp->tails[i] = rclp->tail;
-		else
-			break;
-	rclp->head = NULL;
-	rclp->tail = &rclp->head;
-}
-
-/*
- * Move callbacks from the specified rcu_cblist to the end of the
- * new-callbacks segment of the specified rcu_segcblist.
- */
-static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
-						 struct rcu_cblist *rclp)
-{
-	if (!rclp->head)
-		return; /* Nothing to do. */
-	*rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
-	rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
-	rclp->head = NULL;
-	rclp->tail = &rclp->head;
-}
-
-/*
- * Advance the callbacks in the specified rcu_segcblist structure based
- * on the current value passed in for the grace-period counter.
- */
-static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
-					 unsigned long seq)
-{
-	int i, j;
-
-	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
-	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
-		return;
-
-	/*
-	 * Find all callbacks whose ->gp_seq numbers indicate that they
-	 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
-	 */
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
-		if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
-			break;
-		rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
-	}
-
-	/* If no callbacks moved, nothing more need be done. */
-	if (i == RCU_WAIT_TAIL)
-		return;
-
-	/* Clean up tail pointers that might have been misordered above. */
-	for (j = RCU_WAIT_TAIL; j < i; j++)
-		rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
-
-	/*
-	 * Callbacks moved, so clean up the misordered ->tails[] pointers
-	 * that now point into the middle of the list of ready-to-invoke
-	 * callbacks.  The overall effect is to copy down the later pointers
-	 * into the gap that was created by the now-ready segments.
-	 */
-	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
-		if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
-			break;  /* No more callbacks. */
-		rsclp->tails[j] = rsclp->tails[i];
-		rsclp->gp_seq[j] = rsclp->gp_seq[i];
-	}
-}
-
-/*
- * "Accelerate" callbacks based on more-accurate grace-period information.
- * The reason for this is that RCU does not synchronize the beginnings and
- * ends of grace periods, and that callbacks are posted locally.  This in
- * turn means that the callbacks must be labelled conservatively early
- * on, as getting exact information would degrade both performance and
- * scalability.  When more accurate grace-period information becomes
- * available, previously posted callbacks can be "accelerated", marking
- * them to complete at the end of the earlier grace period.
- *
- * This function operates on an rcu_segcblist structure, and also the
- * grace-period sequence number seq at which new callbacks would become
- * ready to invoke.  Returns true if there are callbacks that won't be
- * ready to invoke until seq, false otherwise.
- */
-static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
-					    unsigned long seq)
-{
-	int i;
-
-	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
-	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
-		return false;
-
-	/*
-	 * Find the segment preceding the oldest segment of callbacks
-	 * whose ->gp_seq[] completion is at or after that passed in via
-	 * "seq", skipping any empty segments.  This oldest segment, along
-	 * with any later segments, can be merged in with any newly arrived
-	 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
-	 * as their ->gp_seq[] grace-period completion sequence number.
-	 */
-	for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
-		if (rsclp->tails[i] != rsclp->tails[i - 1] &&
-		    ULONG_CMP_LT(rsclp->gp_seq[i], seq))
-			break;
-
-	/*
-	 * If all the segments contain callbacks that correspond to
-	 * earlier grace-period sequence numbers than "seq", leave.
-	 * Assuming that the rcu_segcblist structure has enough
-	 * segments in its arrays, this can only happen if some of
-	 * the non-done segments contain callbacks that really are
-	 * ready to invoke.  This situation will get straightened
-	 * out by the next call to rcu_segcblist_advance().
-	 *
-	 * Also advance to the oldest segment of callbacks whose
-	 * ->gp_seq[] completion is at or after that passed in via "seq",
-	 * skipping any empty segments.
-	 */
-	if (++i >= RCU_NEXT_TAIL)
-		return false;
-
-	/*
-	 * Merge all later callbacks, including newly arrived callbacks,
-	 * into the segment located by the for-loop above.  Assign "seq"
-	 * as the ->gp_seq[] value in order to correctly handle the case
-	 * where there were no pending callbacks in the rcu_segcblist
-	 * structure other than in the RCU_NEXT_TAIL segment.
-	 */
-	for (; i < RCU_NEXT_TAIL; i++) {
-		rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
-		rsclp->gp_seq[i] = seq;
-	}
-	return true;
-}
-
-/*
- * Scan the specified rcu_segcblist structure for callbacks that need
- * a grace period later than the one specified by "seq".  We don't look
- * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
- * have a grace-period sequence number.
- */
-static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
-						  unsigned long seq)
-{
-	int i;
-
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
-		if (rsclp->tails[i - 1] != rsclp->tails[i] &&
-		    ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
-			return true;
-	return false;
-}
-
 /*
  * Interim function to return rcu_segcblist head pointer.  Longer term, the
  * rcu_segcblist will be used more pervasively, removing the need for this
@@ -643,3 +149,34 @@ static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
 	WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
 	return rsclp->tails[RCU_NEXT_TAIL];
 }
+
+void rcu_segcblist_init(struct rcu_segcblist *rsclp);
+void rcu_segcblist_disable(struct rcu_segcblist *rsclp);
+bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg);
+bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
+bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
+struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp);
+void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp);
+struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
+struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp);
+bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp);
+void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
+			   struct rcu_head *rhp, bool lazy);
+bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
+			   struct rcu_head *rhp, bool lazy);
+void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
+				 struct rcu_cblist *rclp);
+void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
+				    struct rcu_cblist *rclp);
+void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
+				    struct rcu_cblist *rclp);
+void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
+				struct rcu_cblist *rclp);
+void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
+				   struct rcu_cblist *rclp);
+void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
+				   struct rcu_cblist *rclp);
+void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
+bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
+bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
+				    unsigned long seq);
-- 
cgit v1.2.3


From 8ef0f37efb7863a04b1e4102d42b7c0b1a59d40f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 2 May 2017 08:18:40 -0700
Subject: rcu: Open-code the rcu_cblist_empty() function

Because the rcu_cblist_empty() just samples the ->head pointer, and
because the rcu_cblist structure is quite straightforward, it makes
sense to open-code rcu_cblist_empty(p) as !p->head, cutting out a
level of indirection.  This commit makes this change.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/rcu/rcu_segcblist.h | 8 +-------
 kernel/rcu/tree.c          | 9 ++++-----
 kernel/rcu/tree_plugin.h   | 4 ++--
 3 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 86bc1101b806..7d18d41f0116 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -22,12 +22,6 @@
 
 #include <linux/rcu_segcblist.h>
 
-/* Is simple callback list empty? */
-static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
-{
-	return !rclp->head;
-}
-
 /* Return number of callbacks in simple callback list. */
 static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
 {
@@ -66,7 +60,7 @@ static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
  */
 static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
 {
-	WARN_ON_ONCE(rcu_cblist_empty(rclp));
+	WARN_ON_ONCE(!rclp->head);
 	return rclp->tail;
 }
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 91fff49d5869..35152414760d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2647,9 +2647,9 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 
 	/* First adopt the ready-to-invoke callbacks, then the done ones. */
 	rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done);
-	WARN_ON_ONCE(!rcu_cblist_empty(&rsp->orphan_done));
+	WARN_ON_ONCE(rsp->orphan_done.head);
 	rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
-	WARN_ON_ONCE(!rcu_cblist_empty(&rsp->orphan_pend));
+	WARN_ON_ONCE(rsp->orphan_pend.head);
 	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) !=
 		     !rcu_segcblist_n_cbs(&rdp->cblist));
 }
@@ -2800,9 +2800,8 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 
 	local_irq_save(flags);
 	count = -rcu_cblist_n_cbs(&rcl);
-	trace_rcu_batch_end(rsp->name, count, !rcu_cblist_empty(&rcl),
-			    need_resched(), is_idle_task(current),
-			    rcu_is_callbacks_kthread());
+	trace_rcu_batch_end(rsp->name, count, !!rcl.head, need_resched(),
+			    is_idle_task(current), rcu_is_callbacks_kthread());
 
 	/* Update counts and requeue any remaining callbacks. */
 	rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7f1d677a2a25..6b4b1f8a272d 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1942,12 +1942,12 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
 		return false;
 
 	/* First, enqueue the donelist, if any.  This preserves CB ordering. */
-	if (!rcu_cblist_empty(&rsp->orphan_done)) {
+	if (rsp->orphan_done.head) {
 		__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_done),
 					rcu_cblist_tail(&rsp->orphan_done),
 					ql, qll, flags);
 	}
-	if (!rcu_cblist_empty(&rsp->orphan_pend)) {
+	if (rsp->orphan_pend.head) {
 		__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_pend),
 					rcu_cblist_tail(&rsp->orphan_pend),
 					ql, qll, flags);
-- 
cgit v1.2.3


From 4b27f20b40a23f03df682eb1f69e9dc3da7d3b93 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 2 May 2017 08:45:25 -0700
Subject: rcu: Open-code the rcu_cblist_n_cbs() function

Because the rcu_cblist_n_cbs() just samples the ->len counter, and
because the rcu_cblist structure is quite straightforward, it makes
sense to open-code rcu_cblist_n_cbs(p) as p->len, cutting out a level
of indirection.  This commit makes this change.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/rcu/rcu_segcblist.h | 6 ------
 kernel/rcu/tree.c          | 9 ++++-----
 kernel/rcu/tree_plugin.h   | 2 +-
 kernel/rcu/tree_trace.c    | 2 +-
 4 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 7d18d41f0116..424a6b230921 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -22,12 +22,6 @@
 
 #include <linux/rcu_segcblist.h>
 
-/* Return number of callbacks in simple callback list. */
-static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
-{
-	return rclp->len;
-}
-
 /* Return number of lazy callbacks in simple callback list. */
 static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
 {
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 35152414760d..942d529fccbc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2633,9 +2633,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 		return;
 
 	/* Do the accounting first. */
-	rdp->n_cbs_adopted += rcu_cblist_n_cbs(&rsp->orphan_done);
-	if (rcu_cblist_n_lazy_cbs(&rsp->orphan_done) !=
-	    rcu_cblist_n_cbs(&rsp->orphan_done))
+	rdp->n_cbs_adopted += rsp->orphan_done.len;
+	if (rcu_cblist_n_lazy_cbs(&rsp->orphan_done) != rsp->orphan_done.len)
 		rcu_idle_count_callbacks_posted();
 	rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
 
@@ -2792,14 +2791,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 		 * Stop only if limit reached and CPU has something to do.
 		 * Note: The rcl structure counts down from zero.
 		 */
-		if (-rcu_cblist_n_cbs(&rcl) >= bl &&
+		if (-rcl.len >= bl &&
 		    (need_resched() ||
 		     (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
 			break;
 	}
 
 	local_irq_save(flags);
-	count = -rcu_cblist_n_cbs(&rcl);
+	count = -rcl.len;
 	trace_rcu_batch_end(rsp->name, count, !!rcl.head, need_resched(),
 			    is_idle_task(current), rcu_is_callbacks_kthread());
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 6b4b1f8a272d..7ebe357df155 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1934,7 +1934,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
 						     struct rcu_data *rdp,
 						     unsigned long flags)
 {
-	long ql = rcu_cblist_n_cbs(&rsp->orphan_done);
+	long ql = rsp->orphan_done.len;
 	long qll = rcu_cblist_n_lazy_cbs(&rsp->orphan_done);
 
 	/* If this is not a no-CBs CPU, tell the caller to do it the old way. */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 30c5bf89ee58..b7743aa2965f 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -278,7 +278,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 		   rsp->n_force_qs - rsp->n_force_qs_ngp,
 		   READ_ONCE(rsp->n_force_qs_lh),
 		   rcu_cblist_n_lazy_cbs(&rsp->orphan_done),
-		   rcu_cblist_n_cbs(&rsp->orphan_done));
+		   rsp->orphan_done.len);
 	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
 		if (rnp->level != level) {
 			seq_puts(m, "\n");
-- 
cgit v1.2.3


From 933dfbd7c437bbbf65caae785dfa105fbfaa8485 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 2 May 2017 08:48:33 -0700
Subject: rcu: Open-code the rcu_cblist_n_lazy_cbs() function

Because the rcu_cblist_n_lazy_cbs() just samples the ->len_lazy counter,
and because the rcu_cblist structure is quite straightforward, it makes
sense to open-code rcu_cblist_n_lazy_cbs(p) as p->len_lazy, cutting out
a level of indirection.  This commit makes this change.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/rcu/rcu_segcblist.h | 6 ------
 kernel/rcu/tree.c          | 2 +-
 kernel/rcu/tree_plugin.h   | 2 +-
 kernel/rcu/tree_trace.c    | 2 +-
 4 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 424a6b230921..6e36e36478cd 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -22,12 +22,6 @@
 
 #include <linux/rcu_segcblist.h>
 
-/* Return number of lazy callbacks in simple callback list. */
-static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
-{
-	return rclp->len_lazy;
-}
-
 /*
  * Account for the fact that a previously dequeued callback turned out
  * to be marked as lazy.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 942d529fccbc..1205c8ad138a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2634,7 +2634,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 
 	/* Do the accounting first. */
 	rdp->n_cbs_adopted += rsp->orphan_done.len;
-	if (rcu_cblist_n_lazy_cbs(&rsp->orphan_done) != rsp->orphan_done.len)
+	if (rsp->orphan_done.len_lazy != rsp->orphan_done.len)
 		rcu_idle_count_callbacks_posted();
 	rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7ebe357df155..c9a48657512a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1935,7 +1935,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
 						     unsigned long flags)
 {
 	long ql = rsp->orphan_done.len;
-	long qll = rcu_cblist_n_lazy_cbs(&rsp->orphan_done);
+	long qll = rsp->orphan_done.len_lazy;
 
 	/* If this is not a no-CBs CPU, tell the caller to do it the old way. */
 	if (!rcu_is_nocb_cpu(smp_processor_id()))
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index b7743aa2965f..6cea17a1ea30 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -277,7 +277,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 		   rsp->n_force_qs, rsp->n_force_qs_ngp,
 		   rsp->n_force_qs - rsp->n_force_qs_ngp,
 		   READ_ONCE(rsp->n_force_qs_lh),
-		   rcu_cblist_n_lazy_cbs(&rsp->orphan_done),
+		   rsp->orphan_done.len_lazy,
 		   rsp->orphan_done.len);
 	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
 		if (rnp->level != level) {
-- 
cgit v1.2.3


From 5466d21411baf413c838d9a197f6734e94910a65 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 1 May 2017 17:03:44 -0400
Subject: pNFS: Don't clear the layout return info if there are segments to
 return

In pnfs_clear_layoutreturn_info, ensure that we don't clear the layout
return info if there are new segments queued for return due to, for
instance, a race between a LAYOUTRETURN and a failed I/O attempt.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 140ecd7d350f..cea1e838efae 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -322,9 +322,15 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
 static void
 pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
 {
+	struct pnfs_layout_segment *lseg;
 	lo->plh_return_iomode = 0;
 	lo->plh_return_seq = 0;
 	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+		if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+			continue;
+		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
+	}
 }
 
 static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
@@ -367,9 +373,9 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
 	struct pnfs_layout_segment *lseg, *next;
 
 	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
-	pnfs_clear_layoutreturn_info(lo);
 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
 		pnfs_clear_lseg_state(lseg, lseg_list);
+	pnfs_clear_layoutreturn_info(lo);
 	pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
 	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
 	    !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
-- 
cgit v1.2.3


From 61f454e30c18a28924e96be12592c0d5e24bcc81 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 1 May 2017 17:06:56 -0400
Subject: pNFS: Fix a deadlock when coalescing writes and returning the layout

Consider the following deadlock:

Process P1	Process P2		Process P3
==========	==========		==========
					lock_page(page)

		lseg = pnfs_update_layout(inode)

lo = NFS_I(inode)->layout
pnfs_error_mark_layout_for_return(lo)

		lock_page(page)

					lseg = pnfs_update_layout(inode)

In this scenario,
- P1 has declared the layout to be in error, but P2 holds a reference to
  a layout segment on that inode, so the layoutreturn is deferred.
- P2 is waiting for a page lock held by P3.
- P3 is asking for a new layout segment, but is blocked waiting
  for the layoutreturn.

The fix is to ensure that pnfs_error_mark_layout_for_return() does
not set the NFS_LAYOUT_RETURN flag, which blocks P3. Instead, we allow
the latter to call LAYOUTGET so that it can make progress and unblock
P2.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cea1e838efae..adc6ec28d4b5 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2063,8 +2063,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
 		return;
 	}
 	pnfs_set_plh_return_info(lo, range.iomode, 0);
-	/* Block LAYOUTGET */
-	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
 	/*
 	 * mark all matching lsegs so that we are sure to have no live
 	 * segments at hand when sending layoutreturn. See pnfs_put_lseg()
-- 
cgit v1.2.3


From 5f0114832a6eca70ee7563ba99b8e32da68f944b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 1 May 2017 17:35:58 -0400
Subject: pNFS: Fix a typo in pnfs_generic_alloc_ds_commits

If the layout segment is invalid, we want to just resend the remaining
writes.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs_nfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 7697ac0ff81a..ae600ab1a646 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -223,7 +223,7 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
 		 */
 		if (!pnfs_is_valid_lseg(bucket->clseg) &&
 		    !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags))
-			continue;
+			break;
 		data = nfs_commitdata_alloc(false);
 		if (!data)
 			break;
-- 
cgit v1.2.3


From 80112bffb0ed50257165f673aa73301a4458cd95 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 18 Apr 2017 11:32:15 -0400
Subject: drm/amdgpu: update revision id settings for BR/ST
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add new RIDs.

Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Xie <AlexBin.Xie@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 703bd55d6075..758d636a6f52 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1932,6 +1932,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		case 0xca:
 		case 0xce:
 		case 0x88:
+		case 0xe6:
 			/* B6 */
 			adev->gfx.config.max_cu_per_sh = 6;
 			break;
@@ -1964,17 +1965,28 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		adev->gfx.config.max_backends_per_se = 1;
 
 		switch (adev->pdev->revision) {
+		case 0x80:
+		case 0x81:
 		case 0xc0:
 		case 0xc1:
 		case 0xc2:
 		case 0xc4:
 		case 0xc8:
 		case 0xc9:
+		case 0xd6:
+		case 0xda:
+		case 0xe9:
+		case 0xea:
 			adev->gfx.config.max_cu_per_sh = 3;
 			break;
+		case 0x83:
 		case 0xd0:
 		case 0xd1:
 		case 0xd2:
+		case 0xd4:
+		case 0xdb:
+		case 0xe1:
+		case 0xe2:
 		default:
 			adev->gfx.config.max_cu_per_sh = 2;
 			break;
-- 
cgit v1.2.3


From 42ce22439fdbbdb4d95d2979056ed5b075dd8403 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 1 May 2017 16:20:42 -0400
Subject: drm/amdgpu/gfx9: use actual gpu num se setting for ngg allocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rather than using a hardcoded value.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 4073fd24dd13..ad2e08a943cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -39,7 +39,6 @@
 
 #define GFX9_NUM_GFX_RINGS     1
 #define GFX9_NUM_COMPUTE_RINGS 8
-#define GFX9_NUM_SE		4
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
 
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
@@ -842,7 +841,7 @@ static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
 	}
 	size_se = size_se ? size_se : default_size_se;
 
-	ngg_buf->size = size_se * GFX9_NUM_SE;
+	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
 				    &ngg_buf->bo,
-- 
cgit v1.2.3


From 0274a9c55614a5c89dd04bca376dbe252f5ea334 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 17 Apr 2017 17:30:27 -0400
Subject: drm/amdgpu/gfx9: fix typo in mpd init

Using the wrong macro for soc15 register access.

Reviewed-by: monk liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ad2e08a943cd..178fe11bd7f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1989,12 +1989,12 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	ring->wptr = 0;
-	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
 
 	/* set the vmid for the queue */
 	mqd->cp_hqd_vmid = 0;
 
-	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
 	mqd->cp_hqd_persistent_state = tmp;
 
-- 
cgit v1.2.3


From fca4ce697f304e5d1f739399d97cbfb2b918fa7f Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 17 Apr 2017 17:34:42 -0400
Subject: drm/amdgpu/gfx9: add additional MQD initialization

Need to properly set the ROQ space setting.

Reviewed-by: monk liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 178fe11bd7f9..5f2ae4cb510f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1998,6 +1998,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
 	mqd->cp_hqd_persistent_state = tmp;
 
+	/* set MIN_IB_AVAIL_SIZE */
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+	mqd->cp_hqd_ib_control = tmp;
+
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 
-- 
cgit v1.2.3


From 43b9176faae1543d1a190db8eb098a8284d364f3 Mon Sep 17 00:00:00 2001
From: Shaoyun Liu <Shaoyun.Liu@amd.com>
Date: Fri, 28 Apr 2017 16:14:59 -0400
Subject: drm/amdgpu: Reserve 0-2 invalidation reg sets for none-amdgpu usages

Firmware used reg set 2 for tlb invalidation. AMDGPU can start from reg
set 3 to avoid the conflict. AMDKFD will use the reg set 0 or 1 when
necesary.

Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
Reviewws-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index e5d4dfeae3fe..dc1e1c1d6b24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -386,7 +386,7 @@ static int gmc_v9_0_early_init(void *handle)
 static int gmc_v9_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 0 };
+	unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 3, 3 };
 	unsigned i;
 
 	for(i = 0; i < adev->num_rings; ++i) {
-- 
cgit v1.2.3


From 8a8b56638bcac4e64cccc88bf95a0f9f4b19a2fb Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Fri, 28 Apr 2017 10:50:26 +0100
Subject: metag/uaccess: Fix access_ok()

The __user_bad() macro used by access_ok() has a few corner cases
noticed by Al Viro where it doesn't behave correctly:

 - The kernel range check has off by 1 errors which permit access to the
   first and last byte of the kernel mapped range.

 - The kernel range check ends at LINCORE_BASE rather than
   META_MEMORY_LIMIT, which is ineffective when the kernel is in global
   space (an extremely uncommon configuration).

There are a couple of other shortcomings here too:

 - Access to the whole of the other address space is permitted (i.e. the
   global half of the address space when the kernel is in local space).
   This isn't ideal as it could theoretically still contain privileged
   mappings set up by the bootloader.

 - The size argument is unused, permitting user copies which start on
   valid pages at the end of the user address range and cross the
   boundary into the kernel address space (e.g. addr = 0x3ffffff0, size
   > 0x10).

It isn't very convenient to add size checks when disallowing certain
regions, and it seems far safer to be sure and explicit about what
userland is able to access, so invert the logic to allow certain regions
instead, and fix the off by 1 errors and missing size checks. This also
allows the get_fs() == KERNEL_DS check to be more easily optimised into
the user address range case.

We now have 3 such allowed regions:

 - The user address range (incorporating the get_fs() == KERNEL_DS
   check).

 - NULL (some kernel code expects this to work, and we'll always catch
   the fault anyway).

 - The core code memory region.

Fixes: 373cd784d0fc ("metag: Memory handling")
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
Cc: stable@vger.kernel.org
---
 arch/metag/include/asm/uaccess.h | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
index 469a2f1393d3..1e5f26d2dce8 100644
--- a/arch/metag/include/asm/uaccess.h
+++ b/arch/metag/include/asm/uaccess.h
@@ -28,24 +28,32 @@
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
-#define __kernel_ok (segment_eq(get_fs(), KERNEL_DS))
-/*
- * Explicitly allow NULL pointers here. Parts of the kernel such
- * as readv/writev use access_ok to validate pointers, but want
- * to allow NULL pointers for various reasons. NULL pointers are
- * safe to allow through because the first page is not mappable on
- * Meta.
- *
- * We also wish to avoid letting user code access the system area
- * and the kernel half of the address space.
- */
-#define __user_bad(addr, size) (((addr) > 0 && (addr) < META_MEMORY_BASE) || \
-				((addr) > PAGE_OFFSET &&		\
-				 (addr) < LINCORE_BASE))
-
 static inline int __access_ok(unsigned long addr, unsigned long size)
 {
-	return __kernel_ok || !__user_bad(addr, size);
+	/*
+	 * Allow access to the user mapped memory area, but not the system area
+	 * before it. The check extends to the top of the address space when
+	 * kernel access is allowed (there's no real reason to user copy to the
+	 * system area in any case).
+	 */
+	if (likely(addr >= META_MEMORY_BASE && addr < get_fs().seg &&
+		   size <= get_fs().seg - addr))
+		return true;
+	/*
+	 * Explicitly allow NULL pointers here. Parts of the kernel such
+	 * as readv/writev use access_ok to validate pointers, but want
+	 * to allow NULL pointers for various reasons. NULL pointers are
+	 * safe to allow through because the first page is not mappable on
+	 * Meta.
+	 */
+	if (!addr)
+		return true;
+	/* Allow access to core code memory area... */
+	if (addr >= LINCORE_CODE_BASE && addr <= LINCORE_CODE_LIMIT &&
+	    size <= LINCORE_CODE_LIMIT + 1 - addr)
+		return true;
+	/* ... but no other areas. */
+	return false;
 }
 
 #define access_ok(type, addr, size) __access_ok((unsigned long)(addr),	\
-- 
cgit v1.2.3


From 3a158a62da0673db918b53ac1440845a5b64fd90 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 2 May 2017 19:41:06 +0100
Subject: metag/uaccess: Check access_ok in strncpy_from_user

The metag implementation of strncpy_from_user() doesn't validate the src
pointer, which could allow reading of arbitrary kernel memory. Add a
short access_ok() check to prevent that.

Its still possible for it to read across the user/kernel boundary, but
it will invariably reach a NUL character after only 9 bytes, leaking
only a static kernel address being loaded into D0Re0 at the beginning of
__start, which is acceptable for the immediate fix.

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
Cc: stable@vger.kernel.org
---
 arch/metag/include/asm/uaccess.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
index 1e5f26d2dce8..500f1be6e0fe 100644
--- a/arch/metag/include/asm/uaccess.h
+++ b/arch/metag/include/asm/uaccess.h
@@ -199,8 +199,13 @@ do {                                                            \
 extern long __must_check __strncpy_from_user(char *dst, const char __user *src,
 					     long count);
 
-#define strncpy_from_user(dst, src, count) __strncpy_from_user(dst, src, count)
-
+static inline long
+strncpy_from_user(char *dst, const char __user *src, long count)
+{
+	if (!access_ok(VERIFY_READ, src, 1))
+		return -EFAULT;
+	return __strncpy_from_user(dst, src, count);
+}
 /*
  * Return the size of a string (including the ending 0)
  *
-- 
cgit v1.2.3


From 4f6d9bfc882450631251052756ed84658ec88a70 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 15 Apr 2017 22:22:14 +0300
Subject: ringtest: fix an assert statement

There is an || vs && typo so the assert can never be triggered.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 tools/virtio/ringtest/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
index f31353fac541..2801ab7547e2 100644
--- a/tools/virtio/ringtest/main.c
+++ b/tools/virtio/ringtest/main.c
@@ -86,7 +86,7 @@ void set_affinity(const char *arg)
 	cpu = strtol(arg, &endptr, 0);
 	assert(!*endptr);
 
-	assert(cpu >= 0 || cpu < CPU_SETSIZE);
+	assert(cpu >= 0 && cpu < CPU_SETSIZE);
 
 	self = pthread_self();
 	CPU_ZERO(&cpuset);
-- 
cgit v1.2.3


From 9b2bbdb227588455afcc3b03475fa9b0a35d83af Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 6 Mar 2017 18:19:39 +0200
Subject: virtio: wrap find_vqs

We are going to add more parameters to find_vqs, let's wrap the call so
we don't need to tweak all drivers every time.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/block/virtio_blk.c                 | 3 +--
 drivers/char/virtio_console.c              | 6 +++---
 drivers/crypto/virtio/virtio_crypto_core.c | 3 +--
 drivers/gpu/drm/virtio/virtgpu_kms.c       | 3 +--
 drivers/net/caif/caif_virtio.c             | 3 +--
 drivers/net/virtio_net.c                   | 3 +--
 drivers/rpmsg/virtio_rpmsg_bus.c           | 2 +-
 drivers/scsi/virtio_scsi.c                 | 3 +--
 drivers/virtio/virtio_balloon.c            | 3 +--
 drivers/virtio/virtio_input.c              | 3 +--
 include/linux/virtio_config.h              | 9 +++++++++
 net/vmw_vsock/virtio_transport.c           | 6 +++---
 12 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1d4c9f8bc1e1..c08c30c35035 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -455,8 +455,7 @@ static int init_vq(struct virtio_blk *vblk)
 	}
 
 	/* Discover virtqueues and write information to configuration.  */
-	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
-			&desc);
+	err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
 	if (err)
 		goto out;
 
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 87fe111d0be6..d0699c5fec43 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1945,9 +1945,9 @@ static int init_vqs(struct ports_device *portdev)
 		}
 	}
 	/* Find the queues. */
-	err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs,
-					      io_callbacks,
-					      (const char **)io_names, NULL);
+	err = virtio_find_vqs(portdev->vdev, nr_queues, vqs,
+			      io_callbacks,
+			      (const char **)io_names, NULL);
 	if (err)
 		goto free;
 
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index 21472e427f6f..a111cd72797b 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -119,8 +119,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi)
 		names[i] = vi->data_vq[i].name;
 	}
 
-	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-					 names, NULL);
+	ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, callbacks, names, NULL);
 	if (ret)
 		goto err_find;
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 491866865c33..1e1c90b30d4a 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -175,8 +175,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
 	DRM_INFO("virgl 3d acceleration not supported by guest\n");
 #endif
 
-	ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs,
-					    callbacks, names, NULL);
+	ret = virtio_find_vqs(vgdev->vdev, 2, vqs, callbacks, names, NULL);
 	if (ret) {
 		DRM_ERROR("failed to find virt queues\n");
 		goto err_vqs;
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index bc0eb47eccee..6122768c8644 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -679,8 +679,7 @@ static int cfv_probe(struct virtio_device *vdev)
 		goto err;
 
 	/* Get the TX virtio ring. This is a "guest side vring". */
-	err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names,
-			NULL);
+	err = virtio_find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names, NULL);
 	if (err)
 		goto err;
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f36584616e7d..71f447ab440e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2079,8 +2079,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 		names[txq2vq(i)] = vi->sq[i].name;
 	}
 
-	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-					 names, NULL);
+	ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, callbacks, names, NULL);
 	if (ret)
 		goto err_find;
 
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 5e66e081027e..f7cade09d38a 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -869,7 +869,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
 	init_waitqueue_head(&vrp->sendq);
 
 	/* We expect two virtqueues, rx and tx (and in this order) */
-	err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
+	err = virtio_find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
 	if (err)
 		goto free_vrp;
 
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 939c47df73fa..e9222dcb9707 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -870,8 +870,7 @@ static int virtscsi_init(struct virtio_device *vdev,
 	}
 
 	/* Discover virtqueues and write information to configuration.  */
-	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
-			&desc);
+	err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
 	if (err)
 		goto out;
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 34adf9b9c053..408c174ef0d5 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -418,8 +418,7 @@ static int init_vqs(struct virtio_balloon *vb)
 	 * optionally stat.
 	 */
 	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names,
-			NULL);
+	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
 	if (err)
 		return err;
 
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index 79f1293cda93..3a0468f2ceb0 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -173,8 +173,7 @@ static int virtinput_init_vqs(struct virtio_input *vi)
 	static const char * const names[] = { "events", "status" };
 	int err;
 
-	err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names,
-			NULL);
+	err = virtio_find_vqs(vi->vdev, 2, vqs, cbs, names, NULL);
 	if (err)
 		return err;
 	vi->evt = vqs[0];
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 8355bab175e1..47f3d805c290 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -179,6 +179,15 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 	return vq;
 }
 
+static inline
+int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+			struct virtqueue *vqs[], vq_callback_t *callbacks[],
+			const char * const names[],
+			struct irq_affinity *desc)
+{
+	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, desc);
+}
+
 /**
  * virtio_device_ready - enable vq use in probe function
  * @vdev: the device
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 68675a151f22..97e26e2955e1 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -573,9 +573,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 
 	vsock->vdev = vdev;
 
-	ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
-					    vsock->vqs, callbacks, names,
-					    NULL);
+	ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX,
+			      vsock->vqs, callbacks, names,
+			      NULL);
 	if (ret < 0)
 		goto out;
 
-- 
cgit v1.2.3


From f94682dde5ed23eed13533a37dfce942e60ade4e Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 6 Mar 2017 18:32:29 +0200
Subject: virtio: add context flag to find vqs

Allows maintaining extra context per vq.  For ease of use, passing in
NULL is legal and disables the feature for all vqs.

Includes fixes by Christian for s390, acked by Cornelia.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/misc/mic/vop/vop_main.c        |  9 ++++++---
 drivers/remoteproc/remoteproc_virtio.c | 10 ++++++----
 drivers/s390/virtio/kvm_virtio.c       |  8 +++++---
 drivers/s390/virtio/virtio_ccw.c       |  7 ++++---
 drivers/virtio/virtio_mmio.c           |  8 +++++---
 drivers/virtio/virtio_pci_common.c     | 17 +++++++++++------
 drivers/virtio/virtio_pci_common.h     |  4 +++-
 drivers/virtio/virtio_pci_legacy.c     |  4 +++-
 drivers/virtio/virtio_pci_modern.c     | 12 ++++++++----
 drivers/virtio/virtio_ring.c           |  7 +++++--
 include/linux/virtio_config.h          | 18 +++++++++++++++---
 include/linux/virtio_ring.h            |  3 +++
 12 files changed, 74 insertions(+), 33 deletions(-)

diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index c2e29d7f0de8..a341938c7e2c 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -278,7 +278,7 @@ static void vop_del_vqs(struct virtio_device *dev)
 static struct virtqueue *vop_find_vq(struct virtio_device *dev,
 				     unsigned index,
 				     void (*callback)(struct virtqueue *vq),
-				     const char *name)
+				     const char *name, bool ctx)
 {
 	struct _vop_vdev *vdev = to_vopvdev(dev);
 	struct vop_device *vpdev = vdev->vpdev;
@@ -314,6 +314,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
 				le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN,
 				dev,
 				false,
+				ctx,
 				(void __force *)va, vop_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
@@ -374,7 +375,8 @@ unmap:
 static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
 			struct virtqueue *vqs[],
 			vq_callback_t *callbacks[],
-			const char * const names[], struct irq_affinity *desc)
+			const char * const names[], const bool *ctx,
+			struct irq_affinity *desc)
 {
 	struct _vop_vdev *vdev = to_vopvdev(dev);
 	struct vop_device *vpdev = vdev->vpdev;
@@ -388,7 +390,8 @@ static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
 	for (i = 0; i < nvqs; ++i) {
 		dev_dbg(_vop_dev(vdev), "%s: %d: %s\n",
 			__func__, i, names[i]);
-		vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i]);
+		vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto error;
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 0142cc3f0c91..294634836b32 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(rproc_vq_interrupt);
 static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 				    unsigned int id,
 				    void (*callback)(struct virtqueue *vq),
-				    const char *name)
+				    const char *name, bool ctx)
 {
 	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
 	struct rproc *rproc = vdev_to_rproc(vdev);
@@ -103,8 +103,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 	 * Create the new vq, and tell virtio we're not interested in
 	 * the 'weak' smp barriers, since we're talking with a real device.
 	 */
-	vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, addr,
-				 rproc_virtio_notify, callback, name);
+	vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, ctx,
+				 addr, rproc_virtio_notify, callback, name);
 	if (!vq) {
 		dev_err(dev, "vring_new_virtqueue %s failed\n", name);
 		rproc_free_vring(rvring);
@@ -138,12 +138,14 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 				 struct virtqueue *vqs[],
 				 vq_callback_t *callbacks[],
 				 const char * const names[],
+				 const bool * ctx,
 				 struct irq_affinity *desc)
 {
 	int i, ret;
 
 	for (i = 0; i < nvqs; ++i) {
-		vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i]);
+		vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i],
+				    ctx ? ctx[i] : false);
 		if (IS_ERR(vqs[i])) {
 			ret = PTR_ERR(vqs[i]);
 			goto error;
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 2ce0b3eb2efe..a99d09a11f05 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -189,7 +189,7 @@ static bool kvm_notify(struct virtqueue *vq)
 static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 				     unsigned index,
 				     void (*callback)(struct virtqueue *vq),
-				     const char *name)
+				     const char *name, bool ctx)
 {
 	struct kvm_device *kdev = to_kvmdev(vdev);
 	struct kvm_vqconfig *config;
@@ -211,7 +211,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 		goto out;
 
 	vq = vring_new_virtqueue(index, config->num, KVM_S390_VIRTIO_RING_ALIGN,
-				 vdev, true, (void *) config->address,
+				 vdev, true, ctx, (void *) config->address,
 				 kvm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
@@ -256,6 +256,7 @@ static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			struct virtqueue *vqs[],
 			vq_callback_t *callbacks[],
 			const char * const names[],
+			const bool *ctx,
 			struct irq_affinity *desc)
 {
 	struct kvm_device *kdev = to_kvmdev(vdev);
@@ -266,7 +267,8 @@ static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		return -ENOENT;
 
 	for (i = 0; i < nvqs; ++i) {
-		vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]);
+		vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false);
 		if (IS_ERR(vqs[i]))
 			goto error;
 	}
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 0ed209f3d8b0..2a76ea78a0bf 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -484,7 +484,7 @@ static void virtio_ccw_del_vqs(struct virtio_device *vdev)
 
 static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
 					     int i, vq_callback_t *callback,
-					     const char *name,
+					     const char *name, bool ctx,
 					     struct ccw1 *ccw)
 {
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
@@ -522,7 +522,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
 	}
 
 	vq = vring_new_virtqueue(i, info->num, KVM_VIRTIO_CCW_RING_ALIGN, vdev,
-				 true, info->queue, virtio_ccw_kvm_notify,
+				 true, ctx, info->queue, virtio_ccw_kvm_notify,
 				 callback, name);
 	if (!vq) {
 		/* For now, we fail if we can't get the requested size. */
@@ -629,6 +629,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			       struct virtqueue *vqs[],
 			       vq_callback_t *callbacks[],
 			       const char * const names[],
+			       const bool *ctx,
 			       struct irq_affinity *desc)
 {
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
@@ -642,7 +643,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 
 	for (i = 0; i < nvqs; ++i) {
 		vqs[i] = virtio_ccw_setup_vq(vdev, i, callbacks[i], names[i],
-					     ccw);
+					     ctx ? ctx[i] : false, ccw);
 		if (IS_ERR(vqs[i])) {
 			ret = PTR_ERR(vqs[i]);
 			vqs[i] = NULL;
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 78343b8f9034..74dc7170fd35 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -351,7 +351,7 @@ static void vm_del_vqs(struct virtio_device *vdev)
 
 static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 				  void (*callback)(struct virtqueue *vq),
-				  const char *name)
+				  const char *name, bool ctx)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
 	struct virtio_mmio_vq_info *info;
@@ -388,7 +388,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 
 	/* Create the vring */
 	vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
-				 true, true, vm_notify, callback, name);
+				 true, true, ctx, vm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto error_new_virtqueue;
@@ -447,6 +447,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		       struct virtqueue *vqs[],
 		       vq_callback_t *callbacks[],
 		       const char * const names[],
+		       const bool *ctx,
 		       struct irq_affinity *desc)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
@@ -459,7 +460,8 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		return err;
 
 	for (i = 0; i < nvqs; ++i) {
-		vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
+		vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false);
 		if (IS_ERR(vqs[i])) {
 			vm_del_vqs(vdev);
 			return PTR_ERR(vqs[i]);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 698d5d06fa03..007a4f366086 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -172,6 +172,7 @@ error:
 static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
 				     void (*callback)(struct virtqueue *vq),
 				     const char *name,
+				     bool ctx,
 				     u16 msix_vec)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -183,7 +184,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
 	if (!info)
 		return ERR_PTR(-ENOMEM);
 
-	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name,
+	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
 			      msix_vec);
 	if (IS_ERR(vq))
 		goto out_info;
@@ -274,6 +275,7 @@ void vp_del_vqs(struct virtio_device *vdev)
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
 		const char * const names[], bool per_vq_vectors,
+		const bool *ctx,
 		struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -315,6 +317,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		else
 			msix_vec = VP_MSIX_VQ_VECTOR;
 		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false,
 				     msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
@@ -345,7 +348,7 @@ error_find:
 
 static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[])
+		const char * const names[], const bool *ctx)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i, err;
@@ -367,6 +370,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 			continue;
 		}
 		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false,
 				     VIRTIO_MSI_NO_VECTOR);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
@@ -383,20 +387,21 @@ out_del_vqs:
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], struct irq_affinity *desc)
+		const char * const names[], const bool *ctx,
+		struct irq_affinity *desc)
 {
 	int err;
 
 	/* Try MSI-X with one vector per queue. */
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc);
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
 	if (!err)
 		return 0;
 	/* Fallback: MSI-X with one vector for config, one shared for queues. */
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc);
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
 	if (!err)
 		return 0;
 	/* Finally fall back to regular interrupts. */
-	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
+	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
 }
 
 const char *vp_bus_name(struct virtio_device *vdev)
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index e96334aec1e0..135ee3cf7175 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -102,6 +102,7 @@ struct virtio_pci_device {
 				      unsigned idx,
 				      void (*callback)(struct virtqueue *vq),
 				      const char *name,
+				      bool ctx,
 				      u16 msix_vec);
 	void (*del_vq)(struct virtio_pci_vq_info *info);
 
@@ -131,7 +132,8 @@ void vp_del_vqs(struct virtio_device *vdev);
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], struct irq_affinity *desc);
+		const char * const names[], const bool *ctx,
+		struct irq_affinity *desc);
 const char *vp_bus_name(struct virtio_device *vdev);
 
 /* Setup the affinity for a virtqueue:
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 4bfa48fb1324..2780886e8ba3 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -116,6 +116,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
+				  bool ctx,
 				  u16 msix_vec)
 {
 	struct virtqueue *vq;
@@ -135,7 +136,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	/* create the vring */
 	vq = vring_create_virtqueue(index, num,
 				    VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
-				    true, false, vp_notify, callback, name);
+				    true, false, ctx,
+				    vp_notify, callback, name);
 	if (!vq)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 8978f109d2d7..2555d80f6eec 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -297,6 +297,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
+				  bool ctx,
 				  u16 msix_vec)
 {
 	struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
@@ -328,7 +329,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	/* create the vring */
 	vq = vring_create_virtqueue(index, num,
 				    SMP_CACHE_BYTES, &vp_dev->vdev,
-				    true, true, vp_notify, callback, name);
+				    true, true, ctx,
+				    vp_notify, callback, name);
 	if (!vq)
 		return ERR_PTR(-ENOMEM);
 
@@ -387,12 +389,14 @@ err_map_notify:
 }
 
 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], struct irq_affinity *desc)
+			      struct virtqueue *vqs[],
+			      vq_callback_t *callbacks[],
+			      const char * const names[], const bool *ctx,
+			      struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtqueue *vq;
-	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc);
+	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
 
 	if (rc)
 		return rc;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 409aeaa49246..b23b5fae468b 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -916,6 +916,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					struct vring vring,
 					struct virtio_device *vdev,
 					bool weak_barriers,
+					bool context,
 					bool (*notify)(struct virtqueue *),
 					void (*callback)(struct virtqueue *),
 					const char *name)
@@ -1019,6 +1020,7 @@ struct virtqueue *vring_create_virtqueue(
 	struct virtio_device *vdev,
 	bool weak_barriers,
 	bool may_reduce_num,
+	bool context,
 	bool (*notify)(struct virtqueue *),
 	void (*callback)(struct virtqueue *),
 	const char *name)
@@ -1058,7 +1060,7 @@ struct virtqueue *vring_create_virtqueue(
 	queue_size_in_bytes = vring_size(num, vring_align);
 	vring_init(&vring, num, queue, vring_align);
 
-	vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+	vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 				   notify, callback, name);
 	if (!vq) {
 		vring_free_queue(vdev, queue_size_in_bytes, queue,
@@ -1079,6 +1081,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
 				      bool weak_barriers,
+				      bool context,
 				      void *pages,
 				      bool (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
@@ -1086,7 +1089,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 {
 	struct vring vring;
 	vring_init(&vring, num, pages, vring_align);
-	return __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+	return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 				     notify, callback, name);
 }
 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 47f3d805c290..0133d8a12ccd 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -72,7 +72,8 @@ struct virtio_config_ops {
 	void (*reset)(struct virtio_device *vdev);
 	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
 			struct virtqueue *vqs[], vq_callback_t *callbacks[],
-			const char * const names[], struct irq_affinity *desc);
+			const char * const names[], const bool *ctx,
+			struct irq_affinity *desc);
 	void (*del_vqs)(struct virtio_device *);
 	u64 (*get_features)(struct virtio_device *vdev);
 	int (*finalize_features)(struct virtio_device *vdev);
@@ -173,7 +174,8 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 	vq_callback_t *callbacks[] = { c };
 	const char *names[] = { n };
 	struct virtqueue *vq;
-	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL);
+	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
+					 NULL);
 	if (err < 0)
 		return ERR_PTR(err);
 	return vq;
@@ -185,7 +187,17 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			const char * const names[],
 			struct irq_affinity *desc)
 {
-	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, desc);
+	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
+}
+
+static inline
+int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
+			struct virtqueue *vqs[], vq_callback_t *callbacks[],
+			const char * const names[], const bool *ctx,
+			struct irq_affinity *desc)
+{
+	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
+				      desc);
 }
 
 /**
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index e8d36938f09a..270cfa81830e 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -71,6 +71,7 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
 					 struct virtio_device *vdev,
 					 bool weak_barriers,
 					 bool may_reduce_num,
+					 bool ctx,
 					 bool (*notify)(struct virtqueue *vq),
 					 void (*callback)(struct virtqueue *vq),
 					 const char *name);
@@ -80,6 +81,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					struct vring vring,
 					struct virtio_device *vdev,
 					bool weak_barriers,
+					bool ctx,
 					bool (*notify)(struct virtqueue *),
 					void (*callback)(struct virtqueue *),
 					const char *name);
@@ -93,6 +95,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
 				      bool weak_barriers,
+				      bool ctx,
 				      void *pages,
 				      bool (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
-- 
cgit v1.2.3


From 0a12ae4024a5167c30444d722b0cbafbdb5f4b57 Mon Sep 17 00:00:00 2001
From: Sekhar Nori <nsekhar@ti.com>
Date: Mon, 17 Apr 2017 16:42:15 +0530
Subject: tools/virtio: fix build breakage

Previous commit ("virtio: add context flag to find vqs")
added a new 'context' flag to vring_new_virtqueue(), but the
corresponding API in tools/virtio/ is not updated causing
build errors due to conflicting declarations.

Bring code in tools/virtio in sync with that in kernel.

I have used 'false' for the value of the new boolean 'context'
flag as that seems to be the best way to preserve existing
behavior.

Tested with:

$ make -C tools/virtio clean all ARCH=x86

Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/linux/virtio.h | 1 +
 tools/virtio/virtio_test.c  | 2 +-
 tools/virtio/vringh_test.c  | 7 ++++---
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 9377c8b4ac16..d8f534025b7f 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -57,6 +57,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
 				      bool weak_barriers,
+				      bool ctx,
 				      void *pages,
 				      bool (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index e0445898f08f..76d6f583c249 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -100,7 +100,7 @@ static void vq_info_add(struct vdev_info *dev, int num)
 	vring_init(&info->vring, num, info->ring, 4096);
 	info->vq = vring_new_virtqueue(info->idx,
 				       info->vring.num, 4096, &dev->vdev,
-				       true, info->ring,
+				       true, false, info->ring,
 				       vq_notify, vq_callback, "test");
 	assert(info->vq);
 	info->vq->priv = info;
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
index 5f94f5105678..9476c616d064 100644
--- a/tools/virtio/vringh_test.c
+++ b/tools/virtio/vringh_test.c
@@ -314,7 +314,8 @@ static int parallel_test(u64 features,
 			err(1, "Could not set affinity to cpu %u", first_cpu);
 
 		vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
-					 guest_map, fast_vringh ? no_notify_host
+					 false, guest_map,
+					 fast_vringh ? no_notify_host
 					 : parallel_notify_host,
 					 never_callback_guest, "guest vq");
 
@@ -479,7 +480,7 @@ int main(int argc, char *argv[])
 	memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
 
 	/* Set up guest side. */
-	vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
+	vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false,
 				 __user_addr_min,
 				 never_notify_host, never_callback_guest,
 				 "guest vq");
@@ -663,7 +664,7 @@ int main(int argc, char *argv[])
 		/* Force creation of direct, which we modify. */
 		__virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
 		vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
-					 __user_addr_min,
+					 false, __user_addr_min,
 					 never_notify_host,
 					 never_callback_guest,
 					 "guest vq");
-- 
cgit v1.2.3


From 5a08b04f637921e44ba767c07c74b0535504ab71 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 7 Feb 2017 06:15:13 +0200
Subject: virtio: allow extra context per descriptor

Allow extra context per descriptor. To avoid slow down for data path,
this disables use of indirect descriptors for this vq.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++++++++++++--------
 include/linux/virtio.h       |  9 ++++++
 2 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b23b5fae468b..5e1b548828e6 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -263,6 +263,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 				unsigned int out_sgs,
 				unsigned int in_sgs,
 				void *data,
+				void *ctx,
 				gfp_t gfp)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -275,6 +276,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	START_USE(vq);
 
 	BUG_ON(data == NULL);
+	BUG_ON(ctx && vq->indirect);
 
 	if (unlikely(vq->broken)) {
 		END_USE(vq);
@@ -389,6 +391,8 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	vq->desc_state[head].data = data;
 	if (indirect)
 		vq->desc_state[head].indir_desc = desc;
+	if (ctx)
+		vq->desc_state[head].indir_desc = ctx;
 
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
@@ -461,7 +465,8 @@ int virtqueue_add_sgs(struct virtqueue *_vq,
 		for (sg = sgs[i]; sg; sg = sg_next(sg))
 			total_sg++;
 	}
-	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp);
+	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
+			     data, NULL, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
 
@@ -483,7 +488,7 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
 			 void *data,
 			 gfp_t gfp)
 {
-	return virtqueue_add(vq, &sg, num, 1, 0, data, gfp);
+	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
 
@@ -505,10 +510,34 @@ int virtqueue_add_inbuf(struct virtqueue *vq,
 			void *data,
 			gfp_t gfp)
 {
-	return virtqueue_add(vq, &sg, num, 0, 1, data, gfp);
+	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
 
+/**
+ * virtqueue_add_inbuf_ctx - expose input buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
+ * @data: the token identifying the buffer.
+ * @ctx: extra context for the token
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
+			struct scatterlist *sg, unsigned int num,
+			void *data,
+			void *ctx,
+			gfp_t gfp)
+{
+	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
+
 /**
  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
  * @vq: the struct virtqueue
@@ -598,7 +627,8 @@ bool virtqueue_kick(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
 
-static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
+static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
+		       void **ctx)
 {
 	unsigned int i, j;
 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
@@ -622,10 +652,15 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 	/* Plus final descriptor */
 	vq->vq.num_free++;
 
-	/* Free the indirect table, if any, now that it's unmapped. */
-	if (vq->desc_state[head].indir_desc) {
+	if (vq->indirect) {
 		struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
-		u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
+		u32 len;
+
+		/* Free the indirect table, if any, now that it's unmapped. */
+		if (!indir_desc)
+			return;
+
+		len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
 
 		BUG_ON(!(vq->vring.desc[head].flags &
 			 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
@@ -634,8 +669,10 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 		for (j = 0; j < len / sizeof(struct vring_desc); j++)
 			vring_unmap_one(vq, &indir_desc[j]);
 
-		kfree(vq->desc_state[head].indir_desc);
+		kfree(indir_desc);
 		vq->desc_state[head].indir_desc = NULL;
+	} else if (ctx) {
+		*ctx = vq->desc_state[head].indir_desc;
 	}
 }
 
@@ -660,7 +697,8 @@ static inline bool more_used(const struct vring_virtqueue *vq)
  * Returns NULL if there are no used buffers, or the "data" token
  * handed to virtqueue_add_*().
  */
-void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
+void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
+			    void **ctx)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	void *ret;
@@ -698,7 +736,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 
 	/* detach_buf clears data, so grab it now. */
 	ret = vq->desc_state[i].data;
-	detach_buf(vq, i);
+	detach_buf(vq, i, ctx);
 	vq->last_used_idx++;
 	/* If we expect an interrupt for the next entry, tell host
 	 * by writing event index and flush out the write before
@@ -715,8 +753,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 	END_USE(vq);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(virtqueue_get_buf);
+EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
 
+void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
+{
+	return virtqueue_get_buf_ctx(_vq, len, NULL);
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 /**
  * virtqueue_disable_cb - disable callbacks
  * @vq: the struct virtqueue we're talking about.
@@ -878,7 +921,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 			continue;
 		/* detach_buf clears data, so grab it now. */
 		buf = vq->desc_state[i].data;
-		detach_buf(vq, i);
+		detach_buf(vq, i, NULL);
 		vq->avail_idx_shadow--;
 		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
 		END_USE(vq);
@@ -951,7 +994,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	vq->last_add_time_valid = false;
 #endif
 
-	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
+	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
+		!context;
 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
 	/* No callback?  Tell other side not to bother us. */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 7edfbdb55a99..ed04753278d4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -44,6 +44,12 @@ int virtqueue_add_inbuf(struct virtqueue *vq,
 			void *data,
 			gfp_t gfp);
 
+int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
+			    struct scatterlist sg[], unsigned int num,
+			    void *data,
+			    void *ctx,
+			    gfp_t gfp);
+
 int virtqueue_add_sgs(struct virtqueue *vq,
 		      struct scatterlist *sgs[],
 		      unsigned int out_sgs,
@@ -59,6 +65,9 @@ bool virtqueue_notify(struct virtqueue *vq);
 
 void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 
+void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
+			    void **ctx);
+
 void virtqueue_disable_cb(struct virtqueue *vq);
 
 bool virtqueue_enable_cb(struct virtqueue *vq);
-- 
cgit v1.2.3


From b843f62ad942f3879c2d7acd27b44f931c609685 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 27 Apr 2017 21:21:09 -0300
Subject: perf symbols: Accept symbols starting at address 0

That is the case of _text on s390, and we have some functions that return an
address, using address zero to report problems, oops.

This would lead the symbol loading routines to not use "_text" as the reference
relocation symbol, or the first symbol for the kernel, but use instead
"_stext", that is at the same address on x86_64 and others, but not on s390:

  [acme@localhost perf-4.11.0-rc6]$ head -15 /proc/kallsyms
  0000000000000000 T _text
  0000000000000418 t iplstart
  0000000000000800 T start
  000000000000080a t .base
  000000000000082e t .sk8x8
  0000000000000834 t .gotr
  0000000000000842 t .cmd
  0000000000000846 t .parm
  000000000000084a t .lowcase
  0000000000010000 T startup
  0000000000010010 T startup_kdump
  0000000000010214 t startup_kdump_relocated
  0000000000011000 T startup_continue
  00000000000112a0 T _ehead
  0000000000100000 T _stext
  [acme@localhost perf-4.11.0-rc6]$

Which in turn would make 'perf test vmlinux' to fail because it wouldn't find
the symbols before "_stext" in kallsyms.

Fix it by using the return value only for errors and storing the
address, when the symbol is successfully found, in a provided pointer
arg.

Before this patch:

After:

  [acme@localhost perf-4.11.0-rc6]$ tools/perf/perf test -v 1
   1: vmlinux symtab matches kallsyms            :
  --- start ---
  test child forked, pid 40693
  Looking at the vmlinux_path (8 entries long)
  Using /usr/lib/debug/lib/modules/3.10.0-654.el7.s390x/vmlinux for symbols
  ERR : 0: _text not on kallsyms
  ERR : 0x418: iplstart not on kallsyms
  ERR : 0x800: start not on kallsyms
  ERR : 0x80a: .base not on kallsyms
  ERR : 0x82e: .sk8x8 not on kallsyms
  ERR : 0x834: .gotr not on kallsyms
  ERR : 0x842: .cmd not on kallsyms
  ERR : 0x846: .parm not on kallsyms
  ERR : 0x84a: .lowcase not on kallsyms
  ERR : 0x10000: startup not on kallsyms
  ERR : 0x10010: startup_kdump not on kallsyms
  ERR : 0x10214: startup_kdump_relocated not on kallsyms
  ERR : 0x11000: startup_continue not on kallsyms
  ERR : 0x112a0: _ehead not on kallsyms
  <SNIP warnings>
  test child finished with -1
  ---- end ----
  vmlinux symtab matches kallsyms: FAILED!
  [acme@localhost perf-4.11.0-rc6]$

After:

  [acme@localhost perf-4.11.0-rc6]$ tools/perf/perf test -v 1
   1: vmlinux symtab matches kallsyms            :
  --- start ---
  test child forked, pid 47160
  <SNIP warnings>
  test child finished with 0
  ---- end ----
  vmlinux symtab matches kallsyms: Ok
  [acme@localhost perf-4.11.0-rc6]$

Reported-by: Michael Petlan <mpetlan@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-9x9bwgd3btwdk1u51xie93fz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-buildid-cache.c | 13 ++++++++-----
 tools/perf/util/event.c            |  9 +++++----
 tools/perf/util/event.h            |  4 ++--
 tools/perf/util/machine.c          | 28 +++++++++++++++++-----------
 tools/perf/util/symbol.c           | 11 +++++------
 5 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 64b44e81c771..9eba7f1add1f 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -49,19 +49,22 @@ static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
 	char to[PATH_MAX];
 	const char *name;
 	u64 addr1 = 0, addr2 = 0;
-	int i;
+	int i, err = -1;
 
 	scnprintf(from, sizeof(from), "%s/kallsyms", from_dir);
 	scnprintf(to, sizeof(to), "%s/kallsyms", to_dir);
 
 	for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
-		addr1 = kallsyms__get_function_start(from, name);
-		if (addr1)
+		err = kallsyms__get_function_start(from, name, &addr1);
+		if (!err)
 			break;
 	}
 
-	if (name)
-		addr2 = kallsyms__get_function_start(to, name);
+	if (err)
+		return false;
+
+	if (kallsyms__get_function_start(to, name, &addr2))
+		return false;
 
 	return addr1 == addr2;
 }
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 437194fe0434..dc5c3bb69d73 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -768,15 +768,16 @@ static int find_symbol_cb(void *arg, const char *name, char type,
 	return 1;
 }
 
-u64 kallsyms__get_function_start(const char *kallsyms_filename,
-				 const char *symbol_name)
+int kallsyms__get_function_start(const char *kallsyms_filename,
+				 const char *symbol_name, u64 *addr)
 {
 	struct process_symbol_args args = { .name = symbol_name, };
 
 	if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0)
-		return 0;
+		return -1;
 
-	return args.start;
+	*addr = args.start;
+	return 0;
 }
 
 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index cfbe32bd7413..27ac047490c3 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -675,8 +675,8 @@ size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
 
-u64 kallsyms__get_function_start(const char *kallsyms_filename,
-				 const char *symbol_name);
+int kallsyms__get_function_start(const char *kallsyms_filename,
+				 const char *symbol_name, u64 *addr);
 
 void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max);
 void  cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 7a47f52ccfcc..d97e014c3df3 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -796,11 +796,11 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};
  * Returns the name of the start symbol in *symbol_name. Pass in NULL as
  * symbol_name if it's not that important.
  */
-static u64 machine__get_running_kernel_start(struct machine *machine,
-					     const char **symbol_name)
+static int machine__get_running_kernel_start(struct machine *machine,
+					     const char **symbol_name, u64 *start)
 {
 	char filename[PATH_MAX];
-	int i;
+	int i, err = -1;
 	const char *name;
 	u64 addr = 0;
 
@@ -810,21 +810,28 @@ static u64 machine__get_running_kernel_start(struct machine *machine,
 		return 0;
 
 	for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
-		addr = kallsyms__get_function_start(filename, name);
-		if (addr)
+		err = kallsyms__get_function_start(filename, name, &addr);
+		if (!err)
 			break;
 	}
 
+	if (err)
+		return -1;
+
 	if (symbol_name)
 		*symbol_name = name;
 
-	return addr;
+	*start = addr;
+	return 0;
 }
 
 int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
 {
 	int type;
-	u64 start = machine__get_running_kernel_start(machine, NULL);
+	u64 start = 0;
+
+	if (machine__get_running_kernel_start(machine, NULL, &start))
+		return -1;
 
 	/* In case of renewal the kernel map, destroy previous one */
 	machine__destroy_kernel_maps(machine);
@@ -1185,8 +1192,8 @@ static int machine__create_modules(struct machine *machine)
 int machine__create_kernel_maps(struct machine *machine)
 {
 	struct dso *kernel = machine__get_kernel(machine);
-	const char *name;
-	u64 addr;
+	const char *name = NULL;
+	u64 addr = 0;
 	int ret;
 
 	if (kernel == NULL)
@@ -1211,8 +1218,7 @@ int machine__create_kernel_maps(struct machine *machine)
 	 */
 	map_groups__fixup_end(&machine->kmaps);
 
-	addr = machine__get_running_kernel_start(machine, &name);
-	if (!addr) {
+	if (machine__get_running_kernel_start(machine, &name, &addr)) {
 	} else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
 		machine__destroy_kernel_maps(machine);
 		return -1;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 2cb7665e9973..b349e8eda0e2 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -466,7 +466,7 @@ void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
 struct symbol *dso__find_symbol(struct dso *dso,
 				enum map_type type, u64 addr)
 {
-	if (dso->last_find_result[type].addr != addr) {
+	if (dso->last_find_result[type].addr != addr || dso->last_find_result[type].symbol == NULL) {
 		dso->last_find_result[type].addr   = addr;
 		dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr);
 	}
@@ -1075,8 +1075,9 @@ static int validate_kcore_addresses(const char *kallsyms_filename,
 	if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) {
 		u64 start;
 
-		start = kallsyms__get_function_start(kallsyms_filename,
-						     kmap->ref_reloc_sym->name);
+		if (kallsyms__get_function_start(kallsyms_filename,
+						 kmap->ref_reloc_sym->name, &start))
+			return -ENOENT;
 		if (start != kmap->ref_reloc_sym->addr)
 			return -EINVAL;
 	}
@@ -1248,9 +1249,7 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
 	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
 		return 0;
 
-	addr = kallsyms__get_function_start(filename,
-					    kmap->ref_reloc_sym->name);
-	if (!addr)
+	if (kallsyms__get_function_start(filename, kmap->ref_reloc_sym->name, &addr))
 		return -1;
 
 	*delta = addr - kmap->ref_reloc_sym->addr;
-- 
cgit v1.2.3


From d80406453ad4a69932dc17a617d5b7bc7ae80b8f Mon Sep 17 00:00:00 2001
From: Paul Clarke <pc@us.ibm.com>
Date: Tue, 25 Apr 2017 13:15:49 -0500
Subject: perf symbols: Allow user probes on versioned symbols

Symbol versioning, as in glibc, results in symbols being defined as:

  <real symbol>@[@]<version>

(Note that "@@" identifies a default symbol, if the symbol name is
repeated.)

perf is currently unable to deal with this, and is unable to create user
probes at such symbols:

  --
  $ nm /lib/powerpc64le-linux-gnu/libpthread.so.0 | grep pthread_create
  0000000000008d30 t __pthread_create_2_1
  0000000000008d30 T pthread_create@@GLIBC_2.17
  $ /usr/bin/sudo perf probe -v -x /lib/powerpc64le-linux-gnu/libpthread.so.0 pthread_create
  probe-definition(0): pthread_create
  symbol:pthread_create file:(null) line:0 offset:0 return:0 lazy:(null)
  0 arguments
  Open Debuginfo file: /usr/lib/debug/lib/powerpc64le-linux-gnu/libpthread-2.19.so
  Try to find probe point from debuginfo.
  Probe point 'pthread_create' not found.
     Error: Failed to add events. Reason: No such file or directory (Code: -2)
  --

One is not able to specify the fully versioned symbol, either, due to
syntactic conflicts with other uses of "@" by perf:

  --
  $ /usr/bin/sudo perf probe -v -x /lib/powerpc64le-linux-gnu/libpthread.so.0 pthread_create@@GLIBC_2.17
  probe-definition(0): pthread_create@@GLIBC_2.17
  Semantic error :SRC@SRC is not allowed.
  0 arguments
     Error: Command Parse Error. Reason: Invalid argument (Code: -22)
  --

This patch ignores versioning for default symbols, thus allowing probes to be
created for these symbols:

  --
  $ /usr/bin/sudo ./perf probe -x /lib/powerpc64le-linux-gnu/libpthread.so.0 pthread_create
  Added new event:
     probe_libpthread:pthread_create (on pthread_create in /lib/powerpc64le-linux-gnu/libpthread-2.19.so)

  You can now use it in all perf tools, such as:

           perf record -e probe_libpthread:pthread_create -aR sleep 1

  $ /usr/bin/sudo ./perf record -e probe_libpthread:pthread_create -aR ./test 2
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.052 MB perf.data (2 samples) ]
  $ /usr/bin/sudo ./perf script
               test  2915 [000] 19124.260729: probe_libpthread:pthread_create: (3fff99248d38)
               test  2916 [000] 19124.260962: probe_libpthread:pthread_create: (3fff99248d38)
  $ /usr/bin/sudo ./perf probe --del=probe_libpthread:pthread_create
  Removed event: probe_libpthread:pthread_create
  --

Committer note:

Change the variable storing the result of strlen() to 'int', to fix the build
on debian:experimental-x-mipsel, fedora:24-x-ARC-uClibc, ubuntu:16.04-x-arm,
etc:

  util/symbol.c: In function 'symbol__match_symbol_name':
  util/symbol.c:422:11: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare]
     if (len < versioning - name)
             ^

Signed-off-by: Paul A. Clarke <pc@us.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/c2b18d9c-17f8-9285-4868-f58b6359ccac@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/util/sym-handling.c | 12 ++++++
 tools/perf/util/map.c                       |  5 ---
 tools/perf/util/map.h                       |  5 ++-
 tools/perf/util/symbol.c                    | 61 +++++++++++++++++++++++------
 tools/perf/util/symbol.h                    | 11 ++++++
 5 files changed, 74 insertions(+), 20 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 39dbe512b9fc..bf9a2594572c 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -52,6 +52,18 @@ int arch__compare_symbol_names(const char *namea, const char *nameb)
 
 	return strcmp(namea, nameb);
 }
+
+int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+				 unsigned int n)
+{
+	/* Skip over initial dot */
+	if (*namea == '.')
+		namea++;
+	if (*nameb == '.')
+		nameb++;
+
+	return strncmp(namea, nameb, n);
+}
 #endif
 
 #if defined(_CALL_ELF) && _CALL_ELF == 2
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index ebfa5d92358a..2179b2deb730 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -325,11 +325,6 @@ int map__load(struct map *map)
 	return 0;
 }
 
-int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
-{
-	return strcmp(namea, nameb);
-}
-
 struct symbol *map__find_symbol(struct map *map, u64 addr)
 {
 	if (map__load(map) < 0)
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index c8a5a644c0a9..f9e8ac8a52cd 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -130,13 +130,14 @@ struct thread;
  */
 #define __map__for_each_symbol_by_name(map, sym_name, pos)	\
 	for (pos = map__find_symbol_by_name(map, sym_name);	\
-	     pos && arch__compare_symbol_names(pos->name, sym_name) == 0;	\
+	     pos &&						\
+	     !symbol__match_symbol_name(pos->name, sym_name,	\
+					SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); \
 	     pos = symbol__next_by_name(pos))
 
 #define map__for_each_symbol_by_name(map, sym_name, pos)		\
 	__map__for_each_symbol_by_name(map, sym_name, (pos))
 
-int arch__compare_symbol_names(const char *namea, const char *nameb);
 void map__init(struct map *map, enum map_type type,
 	       u64 start, u64 end, u64 pgoff, struct dso *dso);
 struct map *map__new(struct machine *machine, u64 start, u64 len,
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b349e8eda0e2..8f2b068ff756 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -90,6 +90,17 @@ static int prefix_underscores_count(const char *str)
 	return tail - str;
 }
 
+int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+	return strcmp(namea, nameb);
+}
+
+int __weak arch__compare_symbol_names_n(const char *namea, const char *nameb,
+					unsigned int n)
+{
+	return strncmp(namea, nameb, n);
+}
+
 int __weak arch__choose_best_symbol(struct symbol *syma,
 				    struct symbol *symb __maybe_unused)
 {
@@ -399,8 +410,26 @@ static void symbols__sort_by_name(struct rb_root *symbols,
 	}
 }
 
+int symbol__match_symbol_name(const char *name, const char *str,
+			      enum symbol_tag_include includes)
+{
+	const char *versioning;
+
+	if (includes == SYMBOL_TAG_INCLUDE__DEFAULT_ONLY &&
+	    (versioning = strstr(name, "@@"))) {
+		int len = strlen(str);
+
+		if (len < versioning - name)
+			len = versioning - name;
+
+		return arch__compare_symbol_names_n(name, str, len);
+	} else
+		return arch__compare_symbol_names(name, str);
+}
+
 static struct symbol *symbols__find_by_name(struct rb_root *symbols,
-					    const char *name)
+					    const char *name,
+					    enum symbol_tag_include includes)
 {
 	struct rb_node *n;
 	struct symbol_name_rb_node *s = NULL;
@@ -414,11 +443,11 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
 		int cmp;
 
 		s = rb_entry(n, struct symbol_name_rb_node, rb_node);
-		cmp = arch__compare_symbol_names(name, s->sym.name);
+		cmp = symbol__match_symbol_name(s->sym.name, name, includes);
 
-		if (cmp < 0)
+		if (cmp > 0)
 			n = n->rb_left;
-		else if (cmp > 0)
+		else if (cmp < 0)
 			n = n->rb_right;
 		else
 			break;
@@ -427,16 +456,17 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
 	if (n == NULL)
 		return NULL;
 
-	/* return first symbol that has same name (if any) */
-	for (n = rb_prev(n); n; n = rb_prev(n)) {
-		struct symbol_name_rb_node *tmp;
+	if (includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY)
+		/* return first symbol that has same name (if any) */
+		for (n = rb_prev(n); n; n = rb_prev(n)) {
+			struct symbol_name_rb_node *tmp;
 
-		tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
-		if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
-			break;
+			tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
+			if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
+				break;
 
-		s = tmp;
-	}
+			s = tmp;
+		}
 
 	return &s->sym;
 }
@@ -503,7 +533,12 @@ struct symbol *symbol__next_by_name(struct symbol *sym)
 struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
 					const char *name)
 {
-	return symbols__find_by_name(&dso->symbol_names[type], name);
+	struct symbol *s = symbols__find_by_name(&dso->symbol_names[type], name,
+						 SYMBOL_TAG_INCLUDE__NONE);
+	if (!s)
+		s = symbols__find_by_name(&dso->symbol_names[type], name,
+					  SYMBOL_TAG_INCLUDE__DEFAULT_ONLY);
+	return s;
 }
 
 void dso__sort_by_name(struct dso *dso, enum map_type type)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 7acd70fce68e..41ebba9a2eb2 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -348,8 +348,19 @@ void arch__sym_update(struct symbol *s, GElf_Sym *sym);
 #define SYMBOL_A 0
 #define SYMBOL_B 1
 
+int arch__compare_symbol_names(const char *namea, const char *nameb);
+int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+				 unsigned int n);
 int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
 
+enum symbol_tag_include {
+	SYMBOL_TAG_INCLUDE__NONE = 0,
+	SYMBOL_TAG_INCLUDE__DEFAULT_ONLY
+};
+
+int symbol__match_symbol_name(const char *namea, const char *nameb,
+			      enum symbol_tag_include includes);
+
 /* structure containing an SDT note's info */
 struct sdt_note {
 	char *name;			/* name of the note*/
-- 
cgit v1.2.3


From 4341ec6b3db4c3e903d6c44958722918baec1e59 Mon Sep 17 00:00:00 2001
From: Taeung Song <treeze.taeung@gmail.com>
Date: Wed, 26 Apr 2017 21:21:02 +0900
Subject: perf config: Refactor a duplicated code for obtaining config file
 name

We were doing the same sequence to figure out what is the config
pathname to use, fix it by doing it before those two uses.

Signed-off-by: Taeung Song <treeze.taeung@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1493209268-5543-2-git-send-email-treeze.taeung@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-config.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index 55f04f85b049..80668fa7556e 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -159,6 +159,7 @@ int cmd_config(int argc, const char **argv)
 	int i, ret = 0;
 	struct perf_config_set *set;
 	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
+	const char *config_filename;
 
 	argc = parse_options(argc, argv, config_options, config_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
@@ -175,6 +176,11 @@ int cmd_config(int argc, const char **argv)
 	else if (use_user_config)
 		config_exclusive_filename = user_config;
 
+	if (!config_exclusive_filename)
+		config_filename = user_config;
+	else
+		config_filename = config_exclusive_filename;
+
 	/*
 	 * At only 'config' sub-command, individually use the config set
 	 * because of reinitializing with options config file location.
@@ -192,13 +198,9 @@ int cmd_config(int argc, const char **argv)
 			parse_options_usage(config_usage, config_options, "l", 1);
 		} else {
 			ret = show_config(set);
-			if (ret < 0) {
-				const char * config_filename = config_exclusive_filename;
-				if (!config_exclusive_filename)
-					config_filename = user_config;
+			if (ret < 0)
 				pr_err("Nothing configured, "
 				       "please check your %s \n", config_filename);
-			}
 		}
 		break;
 	default:
@@ -221,13 +223,8 @@ int cmd_config(int argc, const char **argv)
 
 				if (value == NULL)
 					ret = show_spec_config(set, var);
-				else {
-					const char *config_filename = config_exclusive_filename;
-
-					if (!config_exclusive_filename)
-						config_filename = user_config;
+				else
 					ret = set_config(set, config_filename, var, value);
-				}
 				free(arg);
 			}
 		} else
-- 
cgit v1.2.3


From 7d3d162bbd515070dfa4f422778276aa28f114d4 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 23 Jan 2017 19:32:23 -0800
Subject: ARC: mm: Move full_page computation into cache version agnostic
 wrapper

This reduces code duplication in each of cache version specific handlers

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 928562967f3c..18132eb56150 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -28,7 +28,7 @@ unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
 unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
 
 void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
-			       unsigned long sz, const int cacheop);
+			       unsigned long sz, const int op, const int full_page);
 
 void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz);
 void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz);
@@ -233,11 +233,10 @@ slc_chk:
 
 static inline
 void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
-			  unsigned long sz, const int op)
+			  unsigned long sz, const int op, const int full_page)
 {
 	unsigned int aux_cmd;
 	int num_lines;
-	const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 
 	if (op == OP_INV_IC) {
 		aux_cmd = ARC_REG_IC_IVIL;
@@ -279,11 +278,10 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
  */
 static inline
 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
-			  unsigned long sz, const int op)
+			  unsigned long sz, const int op, const int full_page)
 {
 	unsigned int aux_cmd, aux_tag;
 	int num_lines;
-	const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 
 	if (op == OP_INV_IC) {
 		aux_cmd = ARC_REG_IC_IVIL;
@@ -349,17 +347,16 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
  */
 static inline
 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
-			  unsigned long sz, const int cacheop)
+			  unsigned long sz, const int op, const int full_page)
 {
 	unsigned int aux_cmd;
 	int num_lines;
-	const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 
-	if (cacheop == OP_INV_IC) {
+	if (op == OP_INV_IC) {
 		aux_cmd = ARC_REG_IC_IVIL;
 	} else {
 		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
-		aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+		aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
 	}
 
 	/* Ensure we properly floor/ceil the non-line aligned/sized requests
@@ -368,7 +365,7 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 	 *  -@paddr will be cache-line aligned already (being page aligned)
 	 *  -@sz will be integral multiple of line size (being page sized).
 	 */
-	if (!full_page_op) {
+	if (!full_page) {
 		sz += paddr & ~CACHE_LINE_MASK;
 		paddr &= CACHE_LINE_MASK;
 	}
@@ -381,7 +378,7 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 	 *   - (and needs to be written before the lower 32 bits)
 	 */
 	if (is_pae40_enabled()) {
-		if (cacheop == OP_INV_IC)
+		if (op == OP_INV_IC)
 			/*
 			 * Non aliasing I-cache in HS38,
 			 * aliasing I-cache handled in __cache_line_loop_v3()
@@ -486,13 +483,14 @@ static void __dc_enable(void)
 static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr,
 				unsigned long sz, const int op)
 {
+	const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
 	__before_dc_op(op);
 
-	__cache_line_loop(paddr, vaddr, sz, op);
+	__cache_line_loop(paddr, vaddr, sz, op, full_page);
 
 	__after_dc_op(op);
 
@@ -521,10 +519,11 @@ static inline void
 __ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz)
 {
+	const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	(*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC);
+	(*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC, full_page);
 	local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3


From b98e1995e4fa9030474a61ed0dbc033464fe5ea0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 25 Apr 2017 12:19:49 +0300
Subject: mtd: oxnas_nand: Allocating more than necessary in probe()

We only need to allocate sizeof(struct oxnas_nand_ctrl) which is 192
bytes and not sizeof(struct nand_chip) which is a much larger 3056
bytes.

Fixes: 668592492409 ("mtd: nand: Add OX820 NAND Support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/oxnas_nand.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/oxnas_nand.c b/drivers/mtd/nand/oxnas_nand.c
index 3e3bf3b364d2..1b207aac840c 100644
--- a/drivers/mtd/nand/oxnas_nand.c
+++ b/drivers/mtd/nand/oxnas_nand.c
@@ -91,7 +91,7 @@ static int oxnas_nand_probe(struct platform_device *pdev)
 	int err = 0;
 
 	/* Allocate memory for the device structure (and zero it) */
-	oxnas = devm_kzalloc(&pdev->dev, sizeof(struct nand_chip),
+	oxnas = devm_kzalloc(&pdev->dev, sizeof(*oxnas),
 			     GFP_KERNEL);
 	if (!oxnas)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 0d77117fc5c0333d024a183d6790167bb90c3b62 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 29 Aug 2014 10:55:15 +0530
Subject: ARCv2: mm: Implement cache region flush operations

These are more efficient than the per-line ops

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/cache.h |  6 ++++
 arch/arc/mm/cache.c          | 68 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 5008021fba98..16e457706129 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -62,6 +62,8 @@ extern unsigned long perip_base, perip_end;
 #define ARC_REG_IC_BCR		0x77	/* Build Config reg */
 #define ARC_REG_IC_IVIC		0x10
 #define ARC_REG_IC_CTRL		0x11
+#define ARC_REG_IC_IVIR		0x16
+#define ARC_REG_IC_ENDR		0x17
 #define ARC_REG_IC_IVIL		0x19
 #define ARC_REG_IC_PTAG		0x1E
 #define ARC_REG_IC_PTAG_HI	0x1F
@@ -76,6 +78,8 @@ extern unsigned long perip_base, perip_end;
 #define ARC_REG_DC_IVDL		0x4A
 #define ARC_REG_DC_FLSH		0x4B
 #define ARC_REG_DC_FLDL		0x4C
+#define ARC_REG_DC_STARTR	0x4D
+#define ARC_REG_DC_ENDR		0x4E
 #define ARC_REG_DC_PTAG		0x5C
 #define ARC_REG_DC_PTAG_HI	0x5F
 
@@ -83,6 +87,8 @@ extern unsigned long perip_base, perip_end;
 #define DC_CTRL_DIS		0x001
 #define DC_CTRL_INV_MODE_FLUSH	0x040
 #define DC_CTRL_FLUSH_STATUS	0x100
+#define DC_CTRL_RGN_OP_INV	0x200
+#define DC_CTRL_RGN_OP_MSK	0xE00
 
 /*System-level cache (L2 cache) related Auxiliary registers */
 #define ARC_REG_SLC_CFG		0x901
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 18132eb56150..8401fcb75d19 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -21,6 +21,10 @@
 #include <asm/cachectl.h>
 #include <asm/setup.h>
 
+#ifdef CONFIG_ISA_ARCV2
+#define USE_RGN_FLSH	1
+#endif
+
 static int l2_line_sz;
 static int ioc_exists;
 int slc_enable = 1, ioc_enable = 1;
@@ -332,6 +336,8 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 	}
 }
 
+#ifndef USE_RGN_FLSH
+
 /*
  * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
  * Here's how cache ops are implemented
@@ -394,6 +400,68 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 	}
 }
 
+#else
+
+/*
+ * optimized flush operation which takes a region as opposed to iterating per line
+ */
+static inline
+void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
+			  unsigned long sz, const int op, const int full_page)
+{
+	const unsigned int ctl = ARC_REG_DC_CTRL;
+	unsigned int s, e, val;
+
+	/* Only for Non aliasing I-cache in HS38 */
+	if (op == OP_INV_IC) {
+		s = ARC_REG_IC_IVIR;
+		e = ARC_REG_IC_ENDR;
+	} else {
+		s = ARC_REG_DC_STARTR;
+		e = ARC_REG_DC_ENDR;
+	}
+
+	if (!full_page) {
+		/* for any leading gap between @paddr and start of cache line */
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+
+		/*
+		 *  account for any trailing gap to end of cache line
+		 *  this is equivalent to DIV_ROUND_UP() in line ops above
+		 */
+		sz += L1_CACHE_BYTES - 1;
+	}
+
+	if (is_pae40_enabled()) {
+		/* TBD: check if crossing 4TB boundary */
+		if (op == OP_INV_IC)
+			write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+		else
+			write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
+	}
+
+	/*
+	 * Flush / Invalidate is provided by DC_CTRL.RNG_OP 0 or 1
+	 * Flush-n-invalidate additionally uses setting DC_CTRL.IM = 1
+	 * just as for line ops which is handled in __before_dc_op()
+	 */
+	val = read_aux_reg(ctl) & ~DC_CTRL_RGN_OP_MSK;
+
+	if (op & OP_INV)
+		val |= DC_CTRL_RGN_OP_INV;
+
+	write_aux_reg(ctl, val);
+
+	/* ENDR needs to be set ahead of START */
+	write_aux_reg(e, paddr + sz);	/* ENDR is exclusive */
+	write_aux_reg(s, paddr);
+
+	/* caller waits on DC_CTRL.FS */
+}
+
+#endif
+
 #if (CONFIG_ARC_MMU_VER < 3)
 #define __cache_line_loop	__cache_line_loop_v2
 #elif (CONFIG_ARC_MMU_VER == 3)
-- 
cgit v1.2.3


From ee40bd1e0c3fb83d810f258952692ffdebc14726 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 2 May 2017 15:28:12 -0700
Subject: ARCv2: mm: Merge 2 updates to DC_CTRL for region flush

Region Flush has a weird programming model.

 1. Flush or Invalidate is selected by DC_CTRL.RGN_OP
 2 Flush-n-Invalidate is done by DC_CTRL.IM

Given the code structuring before, case #2 above was generating two
seperate updates to DC_CTRL which was pointless.

| 80a342b0 <__dma_cache_wback_inv_l1>:
| 80a342b0:	clri	r4
| 80a342b4:	lr	r2,[dc_ctrl]
| 80a342b8:	bset_s	r2,r2,0x6
| 80a342ba:	sr	r2,[dc_ctrl]	<-- FIRST
|
| 80a342be:	bmskn	r3,r0,0x5
|
| 80a342c2:	lr	r2,[dc_ctrl]
| 80a342c6:	and	r2,r2,0xfffff1ff
| 80a342ce:	bset_s	r2,r2,0x9
| 80a342d0:	sr	r2,[dc_ctrl]	<-- SECOND
|
| 80a342d4:	add_s	r1,r1,0x3f
| 80a342d6:	bmsk_s	r0,r0,0x5
| 80a342d8:	add_s	r0,r0,r1
| 80a342da:	add_s	r0,r0,r3
| 80a342dc:	sr	r0,[78]
| 80a342e0:	sr	r3,[77]
|...
|...

So move setting of DC_CTRL.RGN_OP into __before_dc_op() and combine with
any other update.

| 80b63324 <__dma_cache_wback_inv_l1>:
| 80b63324:	clri	r3
| 80b63328:	lr	r2,[dc_ctrl]
| 80b6332c:	and	r2,r2,0xfffff1ff
| 80b63334:	or	r2,r2,576
| 80b63338:	sr	r2,[dc_ctrl]
|
| 80b6333c:	add_s	r1,r1,0x3f
| 80b6333e:	bmskn	r2,r0,0x5
| 80b63342:	add_s	r0,r0,r1
| 80b63344:	sr	r0,[78]
| 80b63348:	sr	r2,[77]

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 46 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 8401fcb75d19..a867575a758b 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -409,8 +409,7 @@ static inline
 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz, const int op, const int full_page)
 {
-	const unsigned int ctl = ARC_REG_DC_CTRL;
-	unsigned int s, e, val;
+	unsigned int s, e;
 
 	/* Only for Non aliasing I-cache in HS38 */
 	if (op == OP_INV_IC) {
@@ -441,18 +440,6 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 			write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
 	}
 
-	/*
-	 * Flush / Invalidate is provided by DC_CTRL.RNG_OP 0 or 1
-	 * Flush-n-invalidate additionally uses setting DC_CTRL.IM = 1
-	 * just as for line ops which is handled in __before_dc_op()
-	 */
-	val = read_aux_reg(ctl) & ~DC_CTRL_RGN_OP_MSK;
-
-	if (op & OP_INV)
-		val |= DC_CTRL_RGN_OP_INV;
-
-	write_aux_reg(ctl, val);
-
 	/* ENDR needs to be set ahead of START */
 	write_aux_reg(e, paddr + sz);	/* ENDR is exclusive */
 	write_aux_reg(s, paddr);
@@ -476,6 +463,11 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
  * Machine specific helpers for Entire D-Cache or Per Line ops
  */
 
+#ifndef USE_RGN_FLSH
+/*
+ * this version avoids extra read/write of DC_CTRL for flush or invalid ops
+ * in the non region flush regime (such as for ARCompact)
+ */
 static inline void __before_dc_op(const int op)
 {
 	if (op == OP_FLUSH_N_INV) {
@@ -489,6 +481,32 @@ static inline void __before_dc_op(const int op)
 	}
 }
 
+#else
+
+static inline void __before_dc_op(const int op)
+{
+	const unsigned int ctl = ARC_REG_DC_CTRL;
+	unsigned int val = read_aux_reg(ctl);
+
+	if (op == OP_FLUSH_N_INV) {
+		val |= DC_CTRL_INV_MODE_FLUSH;
+	}
+
+	if (op != OP_INV_IC) {
+		/*
+		 * Flush / Invalidate is provided by DC_CTRL.RNG_OP 0 or 1
+		 * combined Flush-n-invalidate uses DC_CTRL.IM = 1 set above
+		 */
+		val &= ~DC_CTRL_RGN_OP_MSK;
+		if (op & OP_INV)
+			val |= DC_CTRL_RGN_OP_INV;
+	}
+	write_aux_reg(ctl, val);
+}
+
+#endif
+
+
 static inline void __after_dc_op(const int op)
 {
 	if (op & OP_FLUSH) {
-- 
cgit v1.2.3


From f734a31083324b8f4f24b2c5cba178c7459db309 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 2 May 2017 16:23:57 -0700
Subject: ARCv2: mm: micro-optimize region flush generated code

DC_CTRL.RGN_OP is 3 bits wide, however only 1 bit is used in current
programming model (0: flush, 1: invalidate)

The current code targetting 3 bits leads to additional 8 byte AND
operation which can be elided given that only 1 bit is ever set by
software and/or looked at by hardware

before
------

| 80b63324 <__dma_cache_wback_inv_l1>:
| 80b63324:	clri	r3
| 80b63328:	lr	r2,[dc_ctrl]
| 80b6332c:	and	r2,r2,0xfffff1ff	<--- 8 bytes insn
| 80b63334:	or	r2,r2,576
| 80b63338:	sr	r2,[dc_ctrl]
| ...
| ...
| 80b63360 <__dma_cache_inv_l1>:
| 80b63360:	clri	r3
| 80b63364:	lr	r2,[dc_ctrl]
| 80b63368:	and	r2,r2,0xfffff1ff	<--- 8 bytes insn
| 80b63370:	bset_s	r2,r2,0x9
| 80b63372:	sr	r2,[dc_ctrl]
| ...
| ...
| 80b6338c <__dma_cache_wback_l1>:
| 80b6338c:	clri	r3
| 80b63390:	lr	r2,[dc_ctrl]
| 80b63394:	and	r2,r2,0xfffff1ff	<--- 8 bytes insn
| 80b6339c:	sr	r2,[dc_ctrl]

after (AND elided totally in 2 cases, replaced with 2 byte BCLR in 3rd)
-----

| 80b63324 <__dma_cache_wback_inv_l1>:
| 80b63324:	clri	r3
| 80b63328:	lr	r2,[dc_ctrl]
| 80b6332c:	or	r2,r2,576
| 80b63330:	sr	r2,[dc_ctrl]
| ...
| ...
| 80b63358 <__dma_cache_inv_l1>:
| 80b63358:	clri	r3
| 80b6335c:	lr	r2,[dc_ctrl]
| 80b63360:	bset_s	r2,r2,0x9
| 80b63362:	sr	r2,[dc_ctrl]
| ...
| ...
| 80b6337c <__dma_cache_wback_l1>:
| 80b6337c:	clri	r3
| 80b63380:	lr	r2,[dc_ctrl]
| 80b63384:	bclr_s	r2,r2,0x9
| 80b63386:	sr	r2,[dc_ctrl]

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/cache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 16e457706129..19ebddffb279 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -88,7 +88,7 @@ extern unsigned long perip_base, perip_end;
 #define DC_CTRL_INV_MODE_FLUSH	0x040
 #define DC_CTRL_FLUSH_STATUS	0x100
 #define DC_CTRL_RGN_OP_INV	0x200
-#define DC_CTRL_RGN_OP_MSK	0xE00
+#define DC_CTRL_RGN_OP_MSK	0x200
 
 /*System-level cache (L2 cache) related Auxiliary registers */
 #define ARC_REG_SLC_CFG		0x901
-- 
cgit v1.2.3


From 6a623e07694437ad09f382a13f76cffc32239a7f Mon Sep 17 00:00:00 2001
From: Alexander Couzens <lynxis@fe80.eu>
Date: Tue, 2 May 2017 12:19:00 +0200
Subject: mtd: nand: add ooblayout for old hamming layout

The old 1-bit hamming layout requires ECC data to be placed at a
fixed offset, and not necessarily at the end of the OOB area.
Add this old layout back in order to fix legacy setups.

Fixes: 41b207a70d3a ("mtd: nand: implement the default mtd_ooblayout_ops")
Cc: <stable@vger.kernel.org>
Signed-off-by: Alexander Couzens <lynxis@fe80.eu>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nand_base.c | 70 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index ed49a1d634b0..d474378ed810 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -139,6 +139,74 @@ const struct mtd_ooblayout_ops nand_ooblayout_lp_ops = {
 };
 EXPORT_SYMBOL_GPL(nand_ooblayout_lp_ops);
 
+/*
+ * Support the old "large page" layout used for 1-bit Hamming ECC where ECC
+ * are placed at a fixed offset.
+ */
+static int nand_ooblayout_ecc_lp_hamming(struct mtd_info *mtd, int section,
+					 struct mtd_oob_region *oobregion)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+
+	if (section)
+		return -ERANGE;
+
+	switch (mtd->oobsize) {
+	case 64:
+		oobregion->offset = 40;
+		break;
+	case 128:
+		oobregion->offset = 80;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	oobregion->length = ecc->total;
+	if (oobregion->offset + oobregion->length > mtd->oobsize)
+		return -ERANGE;
+
+	return 0;
+}
+
+static int nand_ooblayout_free_lp_hamming(struct mtd_info *mtd, int section,
+					  struct mtd_oob_region *oobregion)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int ecc_offset = 0;
+
+	if (section < 0 || section > 1)
+		return -ERANGE;
+
+	switch (mtd->oobsize) {
+	case 64:
+		ecc_offset = 40;
+		break;
+	case 128:
+		ecc_offset = 80;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (section == 0) {
+		oobregion->offset = 2;
+		oobregion->length = ecc_offset - 2;
+	} else {
+		oobregion->offset = ecc_offset + ecc->total;
+		oobregion->length = mtd->oobsize - oobregion->offset;
+	}
+
+	return 0;
+}
+
+const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = {
+	.ecc = nand_ooblayout_ecc_lp_hamming,
+	.free = nand_ooblayout_free_lp_hamming,
+};
+
 static int check_offs_len(struct mtd_info *mtd,
 					loff_t ofs, uint64_t len)
 {
@@ -4559,7 +4627,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 			break;
 		case 64:
 		case 128:
-			mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
+			mtd_set_ooblayout(mtd, &nand_ooblayout_lp_hamming_ops);
 			break;
 		default:
 			WARN(1, "No oob scheme defined for oobsize %d\n",
-- 
cgit v1.2.3


From 726bd223105c04f7bf5eacdc2540819c301061f7 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Mon, 1 May 2017 09:38:13 +0200
Subject: powerpc/8xx: Adding support of IRQ in MPC8xx GPIO

This patch allows the use of IRQ to notify the change of GPIO status
on MPC8xx CPM IO ports. This then allows to associate IRQs to GPIOs
in the Device Tree.

Ex:
	CPM1_PIO_C: gpio-controller@960 {
		#gpio-cells = <2>;
		compatible = "fsl,cpm1-pario-bank-c";
		reg = <0x960 0x10>;
		fsl,cpm1-gpio-irq-mask = <0x0fff>;
		interrupts = <1 2 6 9 10 11 14 15 23 24 26 31>;
		interrupt-parent = <&CPM_PIC>;
		gpio-controller;
	};

The property 'fsl,cpm1-gpio-irq-mask' defines which of the 16 GPIOs
have the associated interrupts defined in the 'interrupts' property.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 .../devicetree/bindings/soc/fsl/cpm_qe/gpio.txt    | 21 +++++++++++++++++-
 arch/powerpc/include/asm/cpm1.h                    |  2 ++
 arch/powerpc/sysdev/cpm1.c                         | 25 ++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt
index 349f79fd7076..626e1afa64a6 100644
--- a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt
@@ -13,8 +13,17 @@ Required properties:
 - #gpio-cells : Should be two. The first cell is the pin number and the
   second cell is used to specify optional parameters (currently unused).
 - gpio-controller : Marks the port as GPIO controller.
+Optional properties:
+- fsl,cpm1-gpio-irq-mask : For banks having interrupt capability (like port C
+  on CPM1), this item tells which ports have an associated interrupt (ports are
+  listed in the same order as in PCINT register)
+- interrupts : This property provides the list of interrupt for each GPIO having
+  one as described by the fsl,cpm1-gpio-irq-mask property. There should be as
+  many interrupts as number of ones in the mask property. The first interrupt in
+  the list corresponds to the most significant bit of the mask.
+- interrupt-parent : Parent for the above interrupt property.
 
-Example of three SOC GPIO banks defined as gpio-controller nodes:
+Example of four SOC GPIO banks defined as gpio-controller nodes:
 
 	CPM1_PIO_A: gpio-controller@950 {
 		#gpio-cells = <2>;
@@ -30,6 +39,16 @@ Example of three SOC GPIO banks defined as gpio-controller nodes:
 		gpio-controller;
 	};
 
+	CPM1_PIO_C: gpio-controller@960 {
+		#gpio-cells = <2>;
+		compatible = "fsl,cpm1-pario-bank-c";
+		reg = <0x960 0x10>;
+		fsl,cpm1-gpio-irq-mask = <0x0fff>;
+		interrupts = <1 2 6 9 10 11 14 15 23 24 26 31>;
+		interrupt-parent = <&CPM_PIC>;
+		gpio-controller;
+	};
+
 	CPM1_PIO_E: gpio-controller@ac8 {
 		#gpio-cells = <2>;
 		compatible = "fsl,cpm1-pario-bank-e";
diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h
index 8ee4211ca0c6..14ad37865000 100644
--- a/arch/powerpc/include/asm/cpm1.h
+++ b/arch/powerpc/include/asm/cpm1.h
@@ -560,6 +560,8 @@ typedef struct risc_timer_pram {
 #define CPM_PIN_SECONDARY 2
 #define CPM_PIN_GPIO      4
 #define CPM_PIN_OPENDRAIN 8
+#define CPM_PIN_FALLEDGE  16
+#define CPM_PIN_ANYEDGE   0
 
 enum cpm_port {
 	CPM_PORTA,
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index 986cd111d4df..c651e668996b 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -377,6 +377,10 @@ static void cpm1_set_pin16(int port, int pin, int flags)
 			setbits16(&iop->odr_sor, pin);
 		else
 			clrbits16(&iop->odr_sor, pin);
+		if (flags & CPM_PIN_FALLEDGE)
+			setbits16(&iop->intr, pin);
+		else
+			clrbits16(&iop->intr, pin);
 	}
 }
 
@@ -528,6 +532,9 @@ struct cpm1_gpio16_chip {
 
 	/* shadowed data register to clear/set bits safely */
 	u16 cpdata;
+
+	/* IRQ associated with Pins when relevant */
+	int irq[16];
 };
 
 static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
@@ -578,6 +585,14 @@ static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
 	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
 }
 
+static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+
+	return cpm1_gc->irq[gpio] ? : -ENXIO;
+}
+
 static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 {
 	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
@@ -618,6 +633,7 @@ int cpm1_gpiochip_add16(struct device_node *np)
 	struct cpm1_gpio16_chip *cpm1_gc;
 	struct of_mm_gpio_chip *mm_gc;
 	struct gpio_chip *gc;
+	u16 mask;
 
 	cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
 	if (!cpm1_gc)
@@ -625,6 +641,14 @@ int cpm1_gpiochip_add16(struct device_node *np)
 
 	spin_lock_init(&cpm1_gc->lock);
 
+	if (!of_property_read_u16(np, "fsl,cpm1-gpio-irq-mask", &mask)) {
+		int i, j;
+
+		for (i = 0, j = 0; i < 16; i++)
+			if (mask & (1 << (15 - i)))
+				cpm1_gc->irq[i] = irq_of_parse_and_map(np, j++);
+	}
+
 	mm_gc = &cpm1_gc->mm_gc;
 	gc = &mm_gc->gc;
 
@@ -634,6 +658,7 @@ int cpm1_gpiochip_add16(struct device_node *np)
 	gc->direction_output = cpm1_gpio16_dir_out;
 	gc->get = cpm1_gpio16_get;
 	gc->set = cpm1_gpio16_set;
+	gc->to_irq = cpm1_gpio16_to_irq;
 
 	return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
 }
-- 
cgit v1.2.3


From fe25cc347959b1efd18ee150165416aa6ed0ecdd Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Tue, 2 May 2017 15:54:29 +0800
Subject: tcmu: Recalculate the tcmu_cmd size to save cmd area memories

For the "struct tcmu_cmd_entry" in cmd area, the minimum size
will be sizeof(struct tcmu_cmd_entry) == 112 Bytes. And it could
fill about (sizeof(struct rsp) - sizeof(struct req)) /
sizeof(struct iovec) == 68 / 16 ~= 4 data regions(iov[4]) by
default.

For most tcmu_cmds, the data block indexes allocated from the
data area will be continuous. And for the continuous blocks they
will be merged into the same region using only one iovec. For
the current code, it will always allocates the same number of
iovecs with blocks for each tcmu_cmd, and it will wastes much
memories.

For example, when the block size is 4K and the DATA_OUT buffer
size is 64K, and the regions needed is less than 5(on my
environment is almost 99.7%). The current code will allocate
about 16 iovecs, and there will be (16 - 4) * sizeof(struct
iovec) = 192 Bytes cmd area memories wasted.

Here adds two helpers to calculate the base size and full size
of the tcmu_cmd. And will recalculate them again when it make sure
how many iovs is needed before insert it to cmd area.

Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Acked-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 52 ++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 11 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 0b29e4f00bce..89b75ce563d8 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -602,6 +602,27 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 	return true;
 }
 
+static inline size_t tcmu_cmd_get_base_cmd_size(size_t iov_cnt)
+{
+	return max(offsetof(struct tcmu_cmd_entry, req.iov[iov_cnt]),
+			sizeof(struct tcmu_cmd_entry));
+}
+
+static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd,
+					   size_t base_command_size)
+{
+	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
+	size_t command_size;
+
+	command_size = base_command_size +
+		round_up(scsi_command_size(se_cmd->t_task_cdb),
+				TCMU_OP_ALIGN_SIZE);
+
+	WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
+
+	return command_size;
+}
+
 static sense_reason_t
 tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 {
@@ -624,16 +645,16 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	 * Must be a certain minimum size for response sense info, but
 	 * also may be larger if the iov array is large.
 	 *
-	 * We prepare way too many iovs for potential uses here, because it's
-	 * expensive to tell how many regions are freed in the bitmap
-	*/
-	base_command_size = max(offsetof(struct tcmu_cmd_entry,
-				req.iov[tcmu_cmd_get_block_cnt(tcmu_cmd)]),
-				sizeof(struct tcmu_cmd_entry));
-	command_size = base_command_size
-		+ round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);
-
-	WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
+	 * We prepare as many iovs as possbile for potential uses here,
+	 * because it's expensive to tell how many regions are freed in
+	 * the bitmap & global data pool, as the size calculated here
+	 * will only be used to do the checks.
+	 *
+	 * The size will be recalculated later as actually needed to save
+	 * cmd area memories.
+	 */
+	base_command_size = tcmu_cmd_get_base_cmd_size(tcmu_cmd->dbi_cnt);
+	command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size);
 
 	mutex_lock(&udev->cmdr_lock);
 
@@ -694,7 +715,6 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	entry = (void *) mb + CMDR_OFF + cmd_head;
 	tcmu_flush_dcache_range(entry, sizeof(*entry));
 	tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD);
-	tcmu_hdr_set_len(&entry->hdr.len_op, command_size);
 	entry->hdr.cmd_id = tcmu_cmd->cmd_id;
 	entry->hdr.kflags = 0;
 	entry->hdr.uflags = 0;
@@ -736,6 +756,16 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		entry->req.iov_bidi_cnt = iov_cnt;
 	}
 
+	/*
+	 * Recalaulate the command's base size and size according
+	 * to the actual needs
+	 */
+	base_command_size = tcmu_cmd_get_base_cmd_size(entry->req.iov_cnt +
+						       entry->req.iov_bidi_cnt);
+	command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size);
+
+	tcmu_hdr_set_len(&entry->hdr.len_op, command_size);
+
 	/* All offsets relative to mb_addr, not start of entry! */
 	cdb_off = CMDR_OFF + cmd_head + base_command_size;
 	memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb));
-- 
cgit v1.2.3


From e8a3118e1f5cbe0c26704419390bf074689b349f Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 21 Apr 2017 17:16:20 +0200
Subject: objtool: make it visible in make V=1 output

It is currently impossible to see what is going on with objtool when
building, so call echo-cmd to see the actions:
  gcc -Wp,-MD,arch/x86/entry/.entry_64.o.d  -nostdinc -isystem ...
  ./tools/objtool/objtool check "arch/x86/entry/entry_64.o";

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Michal Marek <mmarek@suse.com>
Cc: linux-kbuild@vger.kernel.org
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Makefile.build | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index e5f1425c601f..733e044fff8b 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -280,14 +280,14 @@ define rule_cc_o_c
 	$(call echo-cmd,checksrc) $(cmd_checksrc)			  \
 	$(call cmd_and_fixdep,cc_o_c)					  \
 	$(cmd_modversions_c)						  \
-	$(cmd_objtool)						          \
+	$(call echo-cmd,objtool) $(cmd_objtool)				  \
 	$(call echo-cmd,record_mcount) $(cmd_record_mcount)
 endef
 
 define rule_as_o_S
 	$(call cmd_and_fixdep,as_o_S)					  \
 	$(cmd_modversions_S)						  \
-	$(cmd_objtool)
+	$(call echo-cmd,objtool) $(cmd_objtool)
 endef
 
 # List module undefined symbols (or empty line if not enabled)
-- 
cgit v1.2.3


From bd74370b8657fc6fdab7b0d5024d011985183809 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Sun, 30 Apr 2017 18:16:00 +0200
Subject: Makefile: evaluate LDFLAGS_BUILD_ID only once

Evaluate LDFLAGS_BUILD_ID (which involves invoking the compiler) only
once instead of over and over.

This provides a ~20% reduction in null build time with x86 allnoconfig:

$ make allnoconfig && make -j8
$ perf stat -r5 -e sched:sched_process_exec make -j8
-       2 119      sched:sched_process_exec
+       1 878      sched:sched_process_exec

-       1,238817018 seconds time elapsed
+       0,971020553 seconds time elapsed

Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 10d6cd348abb..35497e933995 100644
--- a/Makefile
+++ b/Makefile
@@ -829,7 +829,7 @@ KBUILD_AFLAGS   += $(ARCH_AFLAGS)   $(KAFLAGS)
 KBUILD_CFLAGS   += $(ARCH_CFLAGS)   $(KCFLAGS)
 
 # Use --build-id when available.
-LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\
+LDFLAGS_BUILD_ID := $(patsubst -Wl$(comma)%,%,\
 			      $(call cc-ldoption, -Wl$(comma)--build-id,))
 KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
 LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
-- 
cgit v1.2.3


From 5ed78e5523fd9ba77b8444d380d54da1f88c53fc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 11 Sep 2016 16:42:14 +0900
Subject: alpha: add $(src)/ rather than $(obj)/ to make source file path

$(ev6-y)divide.S is a source file, not a build-time generated file.
So, it should be prefixed with $(src)/ rather than $(obj)/.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/alpha/lib/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
index 59660743237c..7b694fed382c 100644
--- a/arch/alpha/lib/Makefile
+++ b/arch/alpha/lib/Makefile
@@ -46,11 +46,11 @@ AFLAGS___remqu.o =       -DREM
 AFLAGS___divlu.o = -DDIV       -DINTSIZE
 AFLAGS___remlu.o =       -DREM -DINTSIZE
 
-$(obj)/__divqu.o: $(obj)/$(ev6-y)divide.S
+$(obj)/__divqu.o: $(src)/$(ev6-y)divide.S
 	$(cmd_as_o_S)
-$(obj)/__remqu.o: $(obj)/$(ev6-y)divide.S
+$(obj)/__remqu.o: $(src)/$(ev6-y)divide.S
 	$(cmd_as_o_S)
-$(obj)/__divlu.o: $(obj)/$(ev6-y)divide.S
+$(obj)/__divlu.o: $(src)/$(ev6-y)divide.S
 	$(cmd_as_o_S)
-$(obj)/__remlu.o: $(obj)/$(ev6-y)divide.S
+$(obj)/__remlu.o: $(src)/$(ev6-y)divide.S
 	$(cmd_as_o_S)
-- 
cgit v1.2.3


From e19a4e3f1bffe45b8e2ea67fcfb0c9c88278c4cc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 11 Sep 2016 16:42:15 +0900
Subject: alpha: merge build rules of division routines

These four objects are generated by the same build rule, with
different compile options.  The build rules can be merged.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/alpha/lib/Makefile | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
index 7b694fed382c..5f12e9dcd623 100644
--- a/arch/alpha/lib/Makefile
+++ b/arch/alpha/lib/Makefile
@@ -46,11 +46,6 @@ AFLAGS___remqu.o =       -DREM
 AFLAGS___divlu.o = -DDIV       -DINTSIZE
 AFLAGS___remlu.o =       -DREM -DINTSIZE
 
-$(obj)/__divqu.o: $(src)/$(ev6-y)divide.S
-	$(cmd_as_o_S)
-$(obj)/__remqu.o: $(src)/$(ev6-y)divide.S
-	$(cmd_as_o_S)
-$(obj)/__divlu.o: $(src)/$(ev6-y)divide.S
-	$(cmd_as_o_S)
-$(obj)/__remlu.o: $(src)/$(ev6-y)divide.S
+$(addprefix $(obj)/,__divqu.o __remqu.o __divlu.o __remlu.o): \
+							$(src)/$(ev6-y)divide.S
 	$(cmd_as_o_S)
-- 
cgit v1.2.3


From 3eec0291830e4c28d09f73bab247f3b59172022b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 11 Sep 2016 16:42:16 +0900
Subject: alpha: make short build log available for division routines

This enables the Kbuild standard log style as follows:

  AS      arch/alpha/lib/__divlu.o
  AS      arch/alpha/lib/__divqu.o
  AS      arch/alpha/lib/__remlu.o
  AS      arch/alpha/lib/__remqu.o

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/alpha/lib/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
index 5f12e9dcd623..7083434dd241 100644
--- a/arch/alpha/lib/Makefile
+++ b/arch/alpha/lib/Makefile
@@ -47,5 +47,5 @@ AFLAGS___divlu.o = -DDIV       -DINTSIZE
 AFLAGS___remlu.o =       -DREM -DINTSIZE
 
 $(addprefix $(obj)/,__divqu.o __remqu.o __divlu.o __remlu.o): \
-							$(src)/$(ev6-y)divide.S
-	$(cmd_as_o_S)
+						$(src)/$(ev6-y)divide.S FORCE
+	$(call if_changed_rule,as_o_S)
-- 
cgit v1.2.3


From 028d5bf161d11b6cab9f2d0ad076cccd1d8495bc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 12 Apr 2017 08:36:32 +0900
Subject: ia64: beatify build log for gate.so and gate-syms.o

Currently, the object path is not aligned in the build log:

  LDS     arch/ia64/kernel/gate.lds
  AS      arch/ia64/kernel/gate.o
  GATE arch/ia64/kernel/gate.so
  AS      arch/ia64/kernel/gate-data.o

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/ia64/kernel/Makefile.gate | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate
index ceeffc509764..a32903ada016 100644
--- a/arch/ia64/kernel/Makefile.gate
+++ b/arch/ia64/kernel/Makefile.gate
@@ -6,7 +6,7 @@ extra-y += gate.so gate-syms.o gate.lds gate.o
 
 CPPFLAGS_gate.lds := -P -C -U$(ARCH)
 
-quiet_cmd_gate = GATE $@
+quiet_cmd_gate = GATE    $@
       cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
 
 GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
-- 
cgit v1.2.3


From 4512c7bccb7794fb6d6ff37ae449154413bd4963 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Tue, 2 May 2017 18:21:12 -0400
Subject: soc/qbman: Fix implicit header dependency now causing build fails

In commit 461a6946b1f9 ("iommu: Remove pci.h include from
trace/events/iommu.h") that header shuffle uncovered an implicit
include in this driver, manifesting as:

    CC      drivers/soc/fsl/qbman/qman_portal.o
    drivers/soc/fsl/qbman/qman_portal.c: In function 'qman_portal_probe':
    drivers/soc/fsl/qbman/qman_portal.c:299:2: error: implicit declaration of function 'dma_set_mask'
    drivers/soc/fsl/qbman/qman_portal.c:299:2: error: implicit declaration of function 'DMA_BIT_MASK'
        if (dma_set_mask(dev, DMA_BIT_MASK(40))) {
        ^

on the corenet32_smp_defconfig (and 64 bit respectively.)  The above
commit was singled out via git bisect.

The header it was implictly relying on getting was dma-mapping.h - so
we explicitly add it here.

Fixes: 461a6946b1f9 ("iommu: Remove pci.h include from trace/events/iommu.h")
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Scott Wood <oss@buserror.net>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/soc/fsl/qbman/qman_portal.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c
index adbaa30d3c5a..4a6a8ae5e0aa 100644
--- a/drivers/soc/fsl/qbman/qman_portal.c
+++ b/drivers/soc/fsl/qbman/qman_portal.c
@@ -30,6 +30,8 @@
 
 #include "qman_priv.h"
 
+#include <linux/dma-mapping.h>
+
 struct qman_portal *qman_dma_portal;
 EXPORT_SYMBOL(qman_dma_portal);
 
-- 
cgit v1.2.3


From e3cd7f013bac3105d44b8bd5a90359989d45b5a5 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 3 May 2017 09:54:20 +0100
Subject: metag/mm: Drop pointless increment

The increment of entry in the loop in mmu_init for meta1 is redundant as
it isn't used again, so drop it.

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/metag/mm/mmu-meta1.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/metag/mm/mmu-meta1.c b/arch/metag/mm/mmu-meta1.c
index 91f4255bcb5c..62ebab90924d 100644
--- a/arch/metag/mm/mmu-meta1.c
+++ b/arch/metag/mm/mmu-meta1.c
@@ -152,6 +152,5 @@ void __init mmu_init(unsigned long mem_end)
 
 		p_swapper_pg_dir++;
 		addr += PGDIR_SIZE;
-		entry++;
 	}
 }
-- 
cgit v1.2.3


From 33b88e708e7dfa58dc896da2a98f5719d2eb315c Mon Sep 17 00:00:00 2001
From: Vince Weaver <vincent.weaver@maine.edu>
Date: Tue, 2 May 2017 14:08:50 -0400
Subject: perf/x86: Fix Broadwell-EP DRAM RAPL events

It appears as though the Broadwell-EP DRAM units share the special
units quirk with Haswell-EP/KNL.

Without this patch, you get really high results (a single DRAM using 20W
of power).

The powercap driver in drivers/powercap/intel_rapl.c already has this
change.

Signed-off-by: Vince Weaver <vincent.weaver@maine.edu>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@gmail.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Cc: <stable@vger.kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/rapl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 9d05c7e67f60..a45e2114a846 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -761,7 +761,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,   hsw_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,   hsw_rapl_init),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  hsx_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
-- 
cgit v1.2.3


From 8c64415cc1f5794c02b95dee10084fa8a4ee3cfe Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 3 May 2017 08:28:48 -0700
Subject: sparc: Remove redundant tests in boot_flags_init().

The test:

	*commands && *commands == ' '

is equivalent to:

	*commands == ' '

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/setup_32.c | 2 +-
 arch/sparc/kernel/setup_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 6f06058c5ae7..6722308d1a98 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -148,7 +148,7 @@ static void __init boot_flags_init(char *commands)
 {
 	while (*commands) {
 		/* Move to the start of the next "argument". */
-		while (*commands && *commands == ' ')
+		while (*commands == ' ')
 			commands++;
 
 		/* Process any command switches, otherwise skip it. */
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 6b7331d198e9..422b17880955 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -133,7 +133,7 @@ static void __init boot_flags_init(char *commands)
 {
 	while (*commands) {
 		/* Move to the start of the next "argument". */
-		while (*commands && *commands == ' ')
+		while (*commands == ' ')
 			commands++;
 
 		/* Process any command switches, otherwise skip it. */
-- 
cgit v1.2.3


From c296cfe26bdd86de6eeefde57d333174ed9574d0 Mon Sep 17 00:00:00 2001
From: Fred Isaman <fred.isaman@gmail.com>
Date: Tue, 2 May 2017 16:53:36 -0400
Subject: pNFS: Fix NULL dereference in pnfs_generic_alloc_ds_commits

Signed-off-by: Fred Isaman <fred.isaman@gmail.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs_nfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index ae600ab1a646..d40755a0984b 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -221,7 +221,7 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
 		 * If the layout segment is invalid, then let
 		 * pnfs_generic_retry_commit() clean up the bucket.
 		 */
-		if (!pnfs_is_valid_lseg(bucket->clseg) &&
+		if (bucket->clseg && !pnfs_is_valid_lseg(bucket->clseg) &&
 		    !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags))
 			break;
 		data = nfs_commitdata_alloc(false);
-- 
cgit v1.2.3


From 991c7ed6acca506983c42d51965da8b8f3677682 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 3 May 2017 11:20:15 -0700
Subject: elf: Add ARCv2 specific core note section

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 include/uapi/linux/elf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b59ee077a596..b5c00563ed9b 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -417,7 +417,7 @@ typedef struct elf64_shdr {
 #define NT_METAG_CBUF	0x500		/* Metag catch buffer registers */
 #define NT_METAG_RPIPE	0x501		/* Metag read pipeline state */
 #define NT_METAG_TLS	0x502		/* Metag TLS pointer */
-
+#define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {
-- 
cgit v1.2.3


From 65c02a556bc4f374e9002ded1ea11a0263da7b63 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 3 May 2017 11:21:31 -0700
Subject: ARCv2: ptrace: provide regset for accumulator/r30 regs

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/uapi/asm/elf.h    |  1 +
 arch/arc/include/uapi/asm/ptrace.h |  5 +++
 arch/arc/kernel/ptrace.c           | 62 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/arch/arc/include/uapi/asm/elf.h b/arch/arc/include/uapi/asm/elf.h
index 0037a587320d..06d95e611616 100644
--- a/arch/arc/include/uapi/asm/elf.h
+++ b/arch/arc/include/uapi/asm/elf.h
@@ -27,6 +27,7 @@ typedef unsigned long elf_greg_t;
 typedef unsigned long elf_fpregset_t;
 
 #define ELF_NGREG	(sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
+#define ELF_ARCV2REG	(sizeof(struct user_regs_arcv2) / sizeof(elf_greg_t))
 
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h
index 0b3ef63d4a03..dd206e6b482c 100644
--- a/arch/arc/include/uapi/asm/ptrace.h
+++ b/arch/arc/include/uapi/asm/ptrace.h
@@ -47,6 +47,11 @@ struct user_regs_struct {
 	unsigned long efa;	/* break pt addr, for break points in delay slots */
 	unsigned long stop_pc;	/* give dbg stop_pc after ensuring brkpt trap */
 };
+
+struct user_regs_arcv2 {
+	unsigned long r30, r58, r59;
+};
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _UAPI__ASM_ARC_PTRACE_H */
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
index 31150060d38b..5ee4676f135d 100644
--- a/arch/arc/kernel/ptrace.c
+++ b/arch/arc/kernel/ptrace.c
@@ -184,19 +184,75 @@ static int genregs_set(struct task_struct *target,
 	return ret;
 }
 
+#ifdef CONFIG_ISA_ARCV2
+static int arcv2regs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret, copy_sz;
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS))
+		copy_sz = sizeof(struct user_regs_arcv2);
+	else
+		copy_sz = 4;	/* r30 only */
+
+	/*
+	 * itemized copy not needed like above as layout of regs (r30,r58,r59)
+	 * is exactly same in kernel (pt_regs) and userspace (user_regs_arcv2)
+	 */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &regs->r30,
+				  0, copy_sz);
+
+	return ret;
+}
+
+static int arcv2regs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret, copy_sz;
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS))
+		copy_sz = sizeof(struct user_regs_arcv2);
+	else
+		copy_sz = 4;	/* r30 only */
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, (void *)&regs->r30,
+				  0, copy_sz);
+
+	return ret;
+}
+
+#endif
+
 enum arc_getset {
-	REGSET_GENERAL,
+	REGSET_CMN,
+	REGSET_ARCV2,
 };
 
 static const struct user_regset arc_regsets[] = {
-	[REGSET_GENERAL] = {
+	[REGSET_CMN] = {
 	       .core_note_type = NT_PRSTATUS,
 	       .n = ELF_NGREG,
 	       .size = sizeof(unsigned long),
 	       .align = sizeof(unsigned long),
 	       .get = genregs_get,
 	       .set = genregs_set,
-	}
+	},
+#ifdef CONFIG_ISA_ARCV2
+	[REGSET_ARCV2] = {
+	       .core_note_type = NT_ARC_V2,
+	       .n = ELF_ARCV2REG,
+	       .size = sizeof(unsigned long),
+	       .align = sizeof(unsigned long),
+	       .get = arcv2regs_get,
+	       .set = arcv2regs_set,
+	},
+#endif
 };
 
 static const struct user_regset_view user_arc_view = {
-- 
cgit v1.2.3


From ecb6c7435f25d6a11fa9d14f407d00286f9642b9 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 25 Dec 2016 23:11:05 +0100
Subject: um: Fix _print_addr()

Recent changes to printk() broke UML's stack trace
output. Kill the root of the problem by using a single
printk() statement.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/kernel/sysrq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index a76295f7ede9..6b995e870d55 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -20,10 +20,8 @@
 
 static void _print_addr(void *data, unsigned long address, int reliable)
 {
-	pr_info(" [<%08lx>]", address);
-	pr_cont(" %s", reliable ? "" : "? ");
-	print_symbol("%s", address);
-	pr_cont("\n");
+	pr_info(" [<%08lx>] %s%pF\n", address, reliable ? "" : "? ",
+		(void *)address);
 }
 
 static const struct stacktrace_ops stackops = {
-- 
cgit v1.2.3


From 8bba077066d6d0a135b7be1970691522075bf905 Mon Sep 17 00:00:00 2001
From: Nikola Kotur <kotnick@gmail.com>
Date: Thu, 2 Mar 2017 14:16:40 +0100
Subject: um: Set number of CPUs

Define NR_CPUS required by the timer subsystem.

Fixes this make warning:

    scripts/kconfig/conf  --oldconfig arch/x86/um/Kconfig
    kernel/time/Kconfig:155:warning: range is invalid

Signed-off-by: Nikola Kotur <kotnick@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/Kconfig.common          | 5 +++++
 arch/um/os-Linux/skas/process.c | 3 ---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index fd443852103c..347bcd444c99 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -62,3 +62,8 @@ config HZ
 config SUBARCH
 	string
 	option env="SUBARCH"
+
+config NR_CPUS
+	int
+	range 1 1
+	default 1
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 23025d645160..92f7ee14b10e 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -233,9 +233,6 @@ static int userspace_tramp(void *stack)
 	return 0;
 }
 
-/* Each element set once, and only accessed by a single processor anyway */
-#undef NR_CPUS
-#define NR_CPUS 1
 int userspace_pid[NR_CPUS];
 
 int start_userspace(unsigned long stub_stack)
-- 
cgit v1.2.3


From 9abc74a22d85ab29cef9896a2582a530da7e79bf Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sat, 1 Apr 2017 00:41:57 +0200
Subject: um: Fix PTRACE_POKEUSER on x86_64

This is broken since ever but sadly nobody noticed.
Recent versions of GDB set DR_CONTROL unconditionally and
UML dies due to a heap corruption. It turns out that
the PTRACE_POKEUSER was copy&pasted from i386 and assumes
that addresses are 4 bytes long.

Fix that by using 8 as address size in the calculation.

Cc: <stable@vger.kernel.org>
Reported-by: jie cao <cj3054@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/x86/um/ptrace_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index a5c9910d234f..09a085bde0d4 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -125,7 +125,7 @@ int poke_user(struct task_struct *child, long addr, long data)
 	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
 		(addr <= offsetof(struct user, u_debugreg[7]))) {
 		addr -= offsetof(struct user, u_debugreg[0]);
-		addr = addr >> 2;
+		addr = addr >> 3;
 		if ((addr == 4) || (addr == 5))
 			return -EIO;
 		child->thread.arch.debugregs[addr] = data;
-- 
cgit v1.2.3


From 601b7b9147455a4d8d80d04f0e5a1eb222301bbe Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Mon, 3 Apr 2017 12:54:58 -0700
Subject: um: Include kbuild.h instead of duplicating its macros

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/x86/um/shared/sysdep/kernel-offsets.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
index 46a9df99f3c5..7e1d35b6ad5c 100644
--- a/arch/x86/um/shared/sysdep/kernel-offsets.h
+++ b/arch/x86/um/shared/sysdep/kernel-offsets.h
@@ -2,16 +2,9 @@
 #include <linux/sched.h>
 #include <linux/elf.h>
 #include <linux/crypto.h>
+#include <linux/kbuild.h>
 #include <asm/mman.h>
 
-#define DEFINE(sym, val) \
-	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-#define OFFSET(sym, str, mem) \
-	DEFINE(sym, offsetof(struct str, mem));
-
 void foo(void)
 {
 #include <common-offsets.h>
-- 
cgit v1.2.3


From 5b4236e17cc1bd9fa14b2b0c7a4ae632d41f2e20 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 27 Apr 2017 12:15:10 +0900
Subject: um: Fix to call read_initrd after init_bootmem

Since read_initrd() invokes alloc_bootmem() for allocating
memory to load initrd image, it must be called after init_bootmem.

This makes read_initrd() called directly from setup_arch()
after init_bootmem() and mem_total_pages().

Cc: <stable@vger.kernel.org>
Fixes: b63236972e1 ("um: Setup physical memory in setup_arch()")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/kernel/initrd.c  | 4 +---
 arch/um/kernel/um_arch.c | 6 ++++++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 48bae81f8dca..6f6e7896e53f 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -14,7 +14,7 @@
 static char *initrd __initdata = NULL;
 static int load_initrd(char *filename, void *buf, int size);
 
-static int __init read_initrd(void)
+int __init read_initrd(void)
 {
 	void *area;
 	long long size;
@@ -46,8 +46,6 @@ static int __init read_initrd(void)
 	return 0;
 }
 
-__uml_postsetup(read_initrd);
-
 static int __init uml_initrd_setup(char *line, int *add)
 {
 	initrd = line;
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 4b85acd4020c..64a1fd06f3fd 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -338,11 +338,17 @@ int __init linux_main(int argc, char **argv)
 	return start_uml();
 }
 
+int __init __weak read_initrd(void)
+{
+	return 0;
+}
+
 void __init setup_arch(char **cmdline_p)
 {
 	stack_protections((unsigned long) &init_thread_info);
 	setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
 	mem_total_pages(physmem_size, iomem_size, highmem);
+	read_initrd();
 
 	paging_init();
 	strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
-- 
cgit v1.2.3


From c374ed27c93698c794f99c5d41d9ad3062c4c207 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Wed, 19 Apr 2017 14:46:24 +1000
Subject: powerpc/powernv: Block PCI config access on BCM5718 during EEH
 recovery

Similar to what is done in commit b6541db13952 ("powerpc/eeh: Block
PCI config access upon frozen PE"), we need block PCI config access
for BCM5719 when recovering frozen error on them. Otherwise, an
unexpected recursive fenced PHB error is observed.

   0001:06:00.0 Ethernet controller: Broadcom Corporation \
                NetXtreme BCM5718 Gigabit Ethernet PCIe (rev 10)
   0001:06:00.1 Ethernet controller: Broadcom Corporation \
                NetXtreme BCM5718 Gigabit Ethernet PCIe (rev 10)

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/eeh-powernv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index d2f19821d71d..d12ea7b9fd47 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -412,11 +412,14 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 	 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
 	 * that PE to block its config space.
 	 *
+	 * Broadcom BCM5718 2-ports NICs (14e4:1656)
 	 * Broadcom Austin 4-ports NICs (14e4:1657)
 	 * Broadcom Shiner 4-ports 1G NICs (14e4:168a)
 	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
 	 */
 	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x1656) ||
+	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
 	     pdn->device_id == 0x1657) ||
 	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
 	     pdn->device_id == 0x168a) ||
-- 
cgit v1.2.3


From ce4586063f1af780b1c6b7e344907e6f9c1ba59a Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Thu, 4 May 2017 08:15:10 +0200
Subject: um: Add missing NR_CPUS include

We need linux/threads.h for that variable.

Fixes: 8bba077066d6d0 ("um: Set number of CPUs")
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/os-Linux/skas/process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 92f7ee14b10e..03b3c4cc7735 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -21,6 +21,7 @@
 #include <registers.h>
 #include <skas.h>
 #include <sysdep/stub.h>
+#include <linux/threads.h>
 
 int is_skas_winch(int pid, int fd, void *data)
 {
-- 
cgit v1.2.3


From 74da4a0f574d11ed60dbe50a1e5e942e20476590 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 3 Mar 2017 18:16:07 +0100
Subject: libceph, ceph: always advertise all supported features

No reason to hide CephFS-specific features in the rbd case.  Recent
feature bits mix RADOS and CephFS-specific stuff together anyway.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c                |  2 +-
 fs/ceph/super.c                    |  7 +------
 include/linux/ceph/ceph_features.h |  4 ++++
 include/linux/ceph/libceph.h       |  5 +----
 net/ceph/ceph_common.c             | 16 ++++++----------
 5 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 517838b65964..16010183b703 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -731,7 +731,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
 	kref_init(&rbdc->kref);
 	INIT_LIST_HEAD(&rbdc->node);
 
-	rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
+	rbdc->client = ceph_create_client(ceph_opts, rbdc);
 	if (IS_ERR(rbdc->client))
 		goto out_rbdc;
 	ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 0ec8d0114e57..8b9f645006da 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -544,10 +544,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 					struct ceph_options *opt)
 {
 	struct ceph_fs_client *fsc;
-	const u64 supported_features =
-		CEPH_FEATURE_FLOCK | CEPH_FEATURE_DIRLAYOUTHASH |
-		CEPH_FEATURE_MDSENC | CEPH_FEATURE_MDS_INLINE_DATA;
-	const u64 required_features = 0;
 	int page_count;
 	size_t size;
 	int err = -ENOMEM;
@@ -556,8 +552,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	if (!fsc)
 		return ERR_PTR(-ENOMEM);
 
-	fsc->client = ceph_create_client(opt, fsc, supported_features,
-					 required_features);
+	fsc->client = ceph_create_client(opt, fsc);
 	if (IS_ERR(fsc->client)) {
 		err = PTR_ERR(fsc->client);
 		goto fail;
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index ae2f66833762..fd8b2953c78f 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -105,8 +105,10 @@ static inline u64 ceph_sanitize_features(u64 features)
  */
 #define CEPH_FEATURES_SUPPORTED_DEFAULT		\
 	(CEPH_FEATURE_NOSRCADDR |		\
+	 CEPH_FEATURE_FLOCK |			\
 	 CEPH_FEATURE_SUBSCRIBE2 |		\
 	 CEPH_FEATURE_RECONNECT_SEQ |		\
+	 CEPH_FEATURE_DIRLAYOUTHASH |		\
 	 CEPH_FEATURE_PGID64 |			\
 	 CEPH_FEATURE_PGPOOL3 |			\
 	 CEPH_FEATURE_OSDENC |			\
@@ -114,11 +116,13 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_MSG_AUTH |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
 	 CEPH_FEATURE_REPLY_CREATE_INODE |	\
+	 CEPH_FEATURE_MDSENC |			\
 	 CEPH_FEATURE_OSDHASHPSPOOL |		\
 	 CEPH_FEATURE_OSD_CACHEPOOL |		\
 	 CEPH_FEATURE_CRUSH_V2 |		\
 	 CEPH_FEATURE_EXPORT_PEER |		\
 	 CEPH_FEATURE_OSDMAP_ENC |		\
+	 CEPH_FEATURE_MDS_INLINE_DATA |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES3 |		\
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 88cd5dc8e238..cecbf5a26e5a 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -262,10 +262,7 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client);
 extern void ceph_destroy_options(struct ceph_options *opt);
 extern int ceph_compare_options(struct ceph_options *new_opt,
 				struct ceph_client *client);
-extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
-					      void *private,
-					      u64 supported_features,
-					      u64 required_features);
+struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private);
 struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
 u64 ceph_client_gid(struct ceph_client *client);
 extern void ceph_destroy_client(struct ceph_client *client);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 108533859a53..4701d067169f 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -596,9 +596,7 @@ EXPORT_SYMBOL(ceph_client_gid);
 /*
  * create a fresh client instance
  */
-struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
-				       u64 supported_features,
-				       u64 required_features)
+struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
 {
 	struct ceph_client *client;
 	struct ceph_entity_addr *myaddr = NULL;
@@ -615,14 +613,12 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
 	init_waitqueue_head(&client->auth_wq);
 	client->auth_err = 0;
 
-	if (!ceph_test_opt(client, NOMSGAUTH))
-		required_features |= CEPH_FEATURE_MSG_AUTH;
-
 	client->extra_mon_dispatch = NULL;
-	client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
-		supported_features;
-	client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT |
-		required_features;
+	client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+	client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT;
+
+	if (!ceph_test_opt(client, NOMSGAUTH))
+		client->required_features |= CEPH_FEATURE_MSG_AUTH;
 
 	/* msgr */
 	if (ceph_test_opt(client, MYIP))
-- 
cgit v1.2.3


From d6a3408a77807037872892c2a2034180fcc08d12 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 3 Mar 2017 18:16:07 +0100
Subject: libceph: supported_features module parameter

Add a readonly, exported to sysfs module parameter so that userspace
can generate meaningful error messages.  It's a bit funky, but there is
no other libceph-specific place.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/ceph_common.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4701d067169f..6eeeb67bed84 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -45,6 +45,17 @@ bool libceph_compatible(void *data)
 }
 EXPORT_SYMBOL(libceph_compatible);
 
+static int param_get_supported_features(char *buffer,
+					const struct kernel_param *kp)
+{
+	return sprintf(buffer, "0x%llx", CEPH_FEATURES_SUPPORTED_DEFAULT);
+}
+static const struct kernel_param_ops param_ops_supported_features = {
+	.get = param_get_supported_features,
+};
+module_param_cb(supported_features, &param_ops_supported_features, NULL,
+		S_IRUGO);
+
 /*
  * find filename portion of a path (/foo/bar/baz -> baz)
  */
-- 
cgit v1.2.3


From 3997c01d260ed00d712b051fdab022a08719441e Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 11:15:06 +0200
Subject: ceph: convert ceph_mds_session.s_ref from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 18 +++++++++---------
 fs/ceph/mds_client.h |  5 +++--
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c681762d76e6..074490542b4c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -378,9 +378,9 @@ const char *ceph_session_state_name(int s)
 
 static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
 {
-	if (atomic_inc_not_zero(&s->s_ref)) {
+	if (refcount_inc_not_zero(&s->s_ref)) {
 		dout("mdsc get_session %p %d -> %d\n", s,
-		     atomic_read(&s->s_ref)-1, atomic_read(&s->s_ref));
+		     refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref));
 		return s;
 	} else {
 		dout("mdsc get_session %p 0 -- FAIL", s);
@@ -391,8 +391,8 @@ static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
 void ceph_put_mds_session(struct ceph_mds_session *s)
 {
 	dout("mdsc put_session %p %d -> %d\n", s,
-	     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
-	if (atomic_dec_and_test(&s->s_ref)) {
+	     refcount_read(&s->s_ref), refcount_read(&s->s_ref)-1);
+	if (refcount_dec_and_test(&s->s_ref)) {
 		if (s->s_auth.authorizer)
 			ceph_auth_destroy_authorizer(s->s_auth.authorizer);
 		kfree(s);
@@ -411,7 +411,7 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
 		return NULL;
 	session = mdsc->sessions[mds];
 	dout("lookup_mds_session %p %d\n", session,
-	     atomic_read(&session->s_ref));
+	     refcount_read(&session->s_ref));
 	get_session(session);
 	return session;
 }
@@ -466,7 +466,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	INIT_LIST_HEAD(&s->s_caps);
 	s->s_nr_caps = 0;
 	s->s_trim_caps = 0;
-	atomic_set(&s->s_ref, 1);
+	refcount_set(&s->s_ref, 1);
 	INIT_LIST_HEAD(&s->s_waiting);
 	INIT_LIST_HEAD(&s->s_unsafe);
 	s->s_num_cap_releases = 0;
@@ -494,7 +494,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	}
 	mdsc->sessions[mds] = s;
 	atomic_inc(&mdsc->num_sessions);
-	atomic_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
+	refcount_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
 
 	ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
 		      ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
@@ -3881,7 +3881,7 @@ static struct ceph_connection *con_get(struct ceph_connection *con)
 	struct ceph_mds_session *s = con->private;
 
 	if (get_session(s)) {
-		dout("mdsc con_get %p ok (%d)\n", s, atomic_read(&s->s_ref));
+		dout("mdsc con_get %p ok (%d)\n", s, refcount_read(&s->s_ref));
 		return con;
 	}
 	dout("mdsc con_get %p FAIL\n", s);
@@ -3892,7 +3892,7 @@ static void con_put(struct ceph_connection *con)
 {
 	struct ceph_mds_session *s = con->private;
 
-	dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
+	dout("mdsc con_put %p (%d)\n", s, refcount_read(&s->s_ref) - 1);
 	ceph_put_mds_session(s);
 }
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ac0475a2daa7..bbebcd55d79e 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -7,6 +7,7 @@
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
+#include <linux/refcount.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/messenger.h>
@@ -156,7 +157,7 @@ struct ceph_mds_session {
 	unsigned long     s_renew_requested; /* last time we sent a renew req */
 	u64               s_renew_seq;
 
-	atomic_t          s_ref;
+	refcount_t          s_ref;
 	struct list_head  s_waiting;  /* waiting requests */
 	struct list_head  s_unsafe;   /* unsafe requests */
 };
@@ -373,7 +374,7 @@ __ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
 static inline struct ceph_mds_session *
 ceph_get_mds_session(struct ceph_mds_session *s)
 {
-	atomic_inc(&s->s_ref);
+	refcount_inc(&s->s_ref);
 	return s;
 }
 
-- 
cgit v1.2.3


From 805692d0e0f7dec42b5e05fb8b2a5c172f66a3c2 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 3 Mar 2017 11:15:07 +0200
Subject: ceph: convert ceph_cap_snap.nref from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/caps.c  | 4 ++--
 fs/ceph/snap.c  | 2 +-
 fs/ceph/super.h | 5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 68c78be19d5b..60185434162a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1389,7 +1389,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
 		first_tid = cf->tid + 1;
 
 		capsnap = container_of(cf, struct ceph_cap_snap, cap_flush);
-		atomic_inc(&capsnap->nref);
+		refcount_inc(&capsnap->nref);
 		spin_unlock(&ci->i_ceph_lock);
 
 		dout("__flush_snaps %p capsnap %p tid %llu %s\n",
@@ -2202,7 +2202,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 			     inode, capsnap, cf->tid,
 			     ceph_cap_string(capsnap->dirty));
 
-			atomic_inc(&capsnap->nref);
+			refcount_inc(&capsnap->nref);
 			spin_unlock(&ci->i_ceph_lock);
 
 			ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 8f8b41c2ef0f..dab5d6732345 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -519,7 +519,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 	     capsnap->need_flush ? "" : "no_flush");
 	ihold(inode);
 
-	atomic_set(&capsnap->nref, 1);
+	refcount_set(&capsnap->nref, 1);
 	INIT_LIST_HEAD(&capsnap->ci_item);
 
 	capsnap->follows = old_snapc->seq;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index fe6b9cfc4013..c68e6a045fb9 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -14,6 +14,7 @@
 #include <linux/writeback.h>
 #include <linux/slab.h>
 #include <linux/posix_acl.h>
+#include <linux/refcount.h>
 
 #include <linux/ceph/libceph.h>
 
@@ -162,7 +163,7 @@ struct ceph_cap_flush {
  * data before flushing the snapped state (tracked here) back to the MDS.
  */
 struct ceph_cap_snap {
-	atomic_t nref;
+	refcount_t nref;
 	struct list_head ci_item;
 
 	struct ceph_cap_flush cap_flush;
@@ -191,7 +192,7 @@ struct ceph_cap_snap {
 
 static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
 {
-	if (atomic_dec_and_test(&capsnap->nref)) {
+	if (refcount_dec_and_test(&capsnap->nref)) {
 		if (capsnap->xattr_blob)
 			ceph_buffer_put(capsnap->xattr_blob);
 		kfree(capsnap);
-- 
cgit v1.2.3


From 06dfa96399a9a3280dd81e47f8696aa89f1783e7 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 17 Mar 2017 14:10:27 +0200
Subject: libceph: convert ceph_snap_context.nref from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h | 3 ++-
 net/ceph/snapshot.c          | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index cecbf5a26e5a..3229ae6c7846 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -14,6 +14,7 @@
 #include <linux/wait.h>
 #include <linux/writeback.h>
 #include <linux/slab.h>
+#include <linux/refcount.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/messenger.h>
@@ -161,7 +162,7 @@ struct ceph_client {
  * dirtied.
  */
 struct ceph_snap_context {
-	atomic_t nref;
+	refcount_t nref;
 	u64 seq;
 	u32 num_snaps;
 	u64 snaps[];
diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
index 705414e78ae0..e14a5d038656 100644
--- a/net/ceph/snapshot.c
+++ b/net/ceph/snapshot.c
@@ -49,7 +49,7 @@ struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
 	if (!snapc)
 		return NULL;
 
-	atomic_set(&snapc->nref, 1);
+	refcount_set(&snapc->nref, 1);
 	snapc->num_snaps = snap_count;
 
 	return snapc;
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(ceph_create_snap_context);
 struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
 {
 	if (sc)
-		atomic_inc(&sc->nref);
+		refcount_inc(&sc->nref);
 	return sc;
 }
 EXPORT_SYMBOL(ceph_get_snap_context);
@@ -68,7 +68,7 @@ void ceph_put_snap_context(struct ceph_snap_context *sc)
 {
 	if (!sc)
 		return;
-	if (atomic_dec_and_test(&sc->nref)) {
+	if (refcount_dec_and_test(&sc->nref)) {
 		/*printk(" deleting snap_context %p\n", sc);*/
 		kfree(sc);
 	}
-- 
cgit v1.2.3


From 02113a0f14e20bd8e675d7cec16db6eaaf2b2380 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 17 Mar 2017 14:10:28 +0200
Subject: libceph: convert ceph_osd.o_ref from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  3 ++-
 net/ceph/osd_client.c           | 16 ++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index c125b5d9e13c..d6a625e75040 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -5,6 +5,7 @@
 #include <linux/kref.h>
 #include <linux/mempool.h>
 #include <linux/rbtree.h>
+#include <linux/refcount.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/osdmap.h>
@@ -27,7 +28,7 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
 
 /* a given osd we're communicating with */
 struct ceph_osd {
-	atomic_t o_ref;
+	refcount_t o_ref;
 	struct ceph_osd_client *o_osdc;
 	int o_osd;
 	int o_incarnation;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e15ea9e4c495..b4500a8ab8b3 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1005,7 +1005,7 @@ static bool osd_registered(struct ceph_osd *osd)
  */
 static void osd_init(struct ceph_osd *osd)
 {
-	atomic_set(&osd->o_ref, 1);
+	refcount_set(&osd->o_ref, 1);
 	RB_CLEAR_NODE(&osd->o_node);
 	osd->o_requests = RB_ROOT;
 	osd->o_linger_requests = RB_ROOT;
@@ -1050,9 +1050,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
 
 static struct ceph_osd *get_osd(struct ceph_osd *osd)
 {
-	if (atomic_inc_not_zero(&osd->o_ref)) {
-		dout("get_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref)-1,
-		     atomic_read(&osd->o_ref));
+	if (refcount_inc_not_zero(&osd->o_ref)) {
+		dout("get_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref)-1,
+		     refcount_read(&osd->o_ref));
 		return osd;
 	} else {
 		dout("get_osd %p FAIL\n", osd);
@@ -1062,9 +1062,9 @@ static struct ceph_osd *get_osd(struct ceph_osd *osd)
 
 static void put_osd(struct ceph_osd *osd)
 {
-	dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
-	     atomic_read(&osd->o_ref) - 1);
-	if (atomic_dec_and_test(&osd->o_ref)) {
+	dout("put_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref),
+	     refcount_read(&osd->o_ref) - 1);
+	if (refcount_dec_and_test(&osd->o_ref)) {
 		osd_cleanup(osd);
 		kfree(osd);
 	}
@@ -4126,7 +4126,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
 		close_osd(osd);
 	}
 	up_write(&osdc->lock);
-	WARN_ON(atomic_read(&osdc->homeless_osd.o_ref) != 1);
+	WARN_ON(refcount_read(&osdc->homeless_osd.o_ref) != 1);
 	osd_cleanup(&osdc->homeless_osd);
 
 	WARN_ON(!list_empty(&osdc->osd_lru));
-- 
cgit v1.2.3


From 0e1a5ee6577e43e5be55369d398107080b360941 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 17 Mar 2017 14:10:29 +0200
Subject: libceph: convert ceph_pagelist.refcnt from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c          | 2 +-
 include/linux/ceph/pagelist.h | 6 +++---
 net/ceph/pagelist.c           | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 074490542b4c..b16f1cf552a8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1991,7 +1991,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 
 	if (req->r_pagelist) {
 		struct ceph_pagelist *pagelist = req->r_pagelist;
-		atomic_inc(&pagelist->refcnt);
+		refcount_inc(&pagelist->refcnt);
 		ceph_msg_data_add_pagelist(msg, pagelist);
 		msg->hdr.data_len = cpu_to_le32(pagelist->length);
 	} else {
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 13d71fe18b0c..75a7db21457d 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -2,7 +2,7 @@
 #define __FS_CEPH_PAGELIST_H
 
 #include <asm/byteorder.h>
-#include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <linux/list.h>
 #include <linux/types.h>
 
@@ -13,7 +13,7 @@ struct ceph_pagelist {
 	size_t room;
 	struct list_head free_list;
 	size_t num_pages_free;
-	atomic_t refcnt;
+	refcount_t refcnt;
 };
 
 struct ceph_pagelist_cursor {
@@ -30,7 +30,7 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
 	pl->room = 0;
 	INIT_LIST_HEAD(&pl->free_list);
 	pl->num_pages_free = 0;
-	atomic_set(&pl->refcnt, 1);
+	refcount_set(&pl->refcnt, 1);
 }
 
 extern void ceph_pagelist_release(struct ceph_pagelist *pl);
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 6864007e64fc..ce09f73be759 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -16,7 +16,7 @@ static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
 
 void ceph_pagelist_release(struct ceph_pagelist *pl)
 {
-	if (!atomic_dec_and_test(&pl->refcnt))
+	if (!refcount_dec_and_test(&pl->refcnt))
 		return;
 	ceph_pagelist_unmap_tail(pl);
 	while (!list_empty(&pl->head)) {
-- 
cgit v1.2.3


From 8242c9f35ae77ca21e6cf79827787789a988f44c Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Sat, 25 Mar 2017 17:28:10 +0800
Subject: ceph: fix wrong check in ceph_renew_caps()

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 26cc95421cca..ab4823e3e0d5 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -192,7 +192,7 @@ int ceph_renew_caps(struct inode *inode)
 	spin_lock(&ci->i_ceph_lock);
 	wanted = __ceph_caps_file_wanted(ci);
 	if (__ceph_is_any_real_caps(ci) &&
-	    (!(wanted & CEPH_CAP_ANY_WR) == 0 || ci->i_auth_cap)) {
+	    (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
 		int issued = __ceph_caps_issued(ci, NULL);
 		spin_unlock(&ci->i_ceph_lock);
 		dout("renew caps %p want %s issued %s updating mds_wanted\n",
-- 
cgit v1.2.3


From 76201b6354bb3aa31c7ba2bd42b9cbb8dda71c44 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 28 Mar 2017 17:04:13 +0800
Subject: ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds

mdsmap::m_max_mds is the expected count of active mds. It's not the
max rank of active mds. User can decrease mdsmap::m_max_mds, but does
not stop mds whose rank >= mdsmap::m_max_mds.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/debugfs.c           | 21 ++++++++++-----------
 fs/ceph/mds_client.c        | 10 +++++-----
 fs/ceph/mdsmap.c            | 44 +++++++++++++++++++++++++++++++++++++-------
 include/linux/ceph/mdsmap.h |  7 ++++---
 4 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index f2ae393e2c31..4107a887ca34 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -22,20 +22,19 @@ static int mdsmap_show(struct seq_file *s, void *p)
 {
 	int i;
 	struct ceph_fs_client *fsc = s->private;
+	struct ceph_mdsmap *mdsmap;
 
 	if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
 		return 0;
-	seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
-	seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
-	seq_printf(s, "session_timeout %d\n",
-		       fsc->mdsc->mdsmap->m_session_timeout);
-	seq_printf(s, "session_autoclose %d\n",
-		       fsc->mdsc->mdsmap->m_session_autoclose);
-	for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
-		struct ceph_entity_addr *addr =
-			&fsc->mdsc->mdsmap->m_info[i].addr;
-		int state = fsc->mdsc->mdsmap->m_info[i].state;
-
+	mdsmap = fsc->mdsc->mdsmap;
+	seq_printf(s, "epoch %d\n", mdsmap->m_epoch);
+	seq_printf(s, "root %d\n", mdsmap->m_root);
+	seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds);
+	seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout);
+	seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose);
+	for (i = 0; i < mdsmap->m_num_mds; i++) {
+		struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
+		int state = mdsmap->m_info[i].state;
 		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
 			       ceph_pr_addr(&addr->in_addr),
 			       ceph_mds_state_name(state));
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b16f1cf552a8..2733932bf192 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -441,7 +441,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 {
 	struct ceph_mds_session *s;
 
-	if (mds >= mdsc->mdsmap->m_max_mds)
+	if (mds >= mdsc->mdsmap->m_num_mds)
 		return ERR_PTR(-EINVAL);
 
 	s = kzalloc(sizeof(*s), GFP_NOFS);
@@ -1004,7 +1004,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
 	struct ceph_mds_session *ts;
 	int i, mds = session->s_mds;
 
-	if (mds >= mdsc->mdsmap->m_max_mds)
+	if (mds >= mdsc->mdsmap->m_num_mds)
 		return;
 
 	mi = &mdsc->mdsmap->m_info[mds];
@@ -3107,7 +3107,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 	dout("check_new_map new %u old %u\n",
 	     newmap->m_epoch, oldmap->m_epoch);
 
-	for (i = 0; i < oldmap->m_max_mds && i < mdsc->max_sessions; i++) {
+	for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) {
 		if (mdsc->sessions[i] == NULL)
 			continue;
 		s = mdsc->sessions[i];
@@ -3121,7 +3121,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		     ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
 		     ceph_session_state_name(s->s_state));
 
-		if (i >= newmap->m_max_mds ||
+		if (i >= newmap->m_num_mds ||
 		    memcmp(ceph_mdsmap_get_addr(oldmap, i),
 			   ceph_mdsmap_get_addr(newmap, i),
 			   sizeof(struct ceph_entity_addr))) {
@@ -3167,7 +3167,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		}
 	}
 
-	for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) {
+	for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) {
 		s = mdsc->sessions[i];
 		if (!s)
 			continue;
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 5454e2327a5f..1a748cf88535 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -22,11 +22,11 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 	int i;
 
 	/* special case for one mds */
-	if (1 == m->m_max_mds && m->m_info[0].state > 0)
+	if (1 == m->m_num_mds && m->m_info[0].state > 0)
 		return 0;
 
 	/* count */
-	for (i = 0; i < m->m_max_mds; i++)
+	for (i = 0; i < m->m_num_mds; i++)
 		if (m->m_info[i].state > 0)
 			n++;
 	if (n == 0)
@@ -135,8 +135,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	m->m_session_autoclose = ceph_decode_32(p);
 	m->m_max_file_size = ceph_decode_64(p);
 	m->m_max_mds = ceph_decode_32(p);
+	m->m_num_mds = m->m_max_mds;
 
-	m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
+	m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
 	if (m->m_info == NULL)
 		goto nomem;
 
@@ -207,9 +208,20 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 		     ceph_pr_addr(&addr.in_addr),
 		     ceph_mds_state_name(state));
 
-		if (mds < 0 || mds >= m->m_max_mds || state <= 0)
+		if (mds < 0 || state <= 0)
 			continue;
 
+		if (mds >= m->m_num_mds) {
+			int new_num = max(mds + 1, m->m_num_mds * 2);
+			void *new_m_info = krealloc(m->m_info,
+						new_num * sizeof(*m->m_info),
+						GFP_NOFS | __GFP_ZERO);
+			if (!new_m_info)
+				goto nomem;
+			m->m_info = new_m_info;
+			m->m_num_mds = new_num;
+		}
+
 		info = &m->m_info[mds];
 		info->global_id = global_id;
 		info->state = state;
@@ -229,6 +241,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 			info->export_targets = NULL;
 		}
 	}
+	if (m->m_num_mds > m->m_max_mds) {
+		/* find max up mds */
+		for (i = m->m_num_mds; i >= m->m_max_mds; i--) {
+			if (i == 0 || m->m_info[i-1].state > 0)
+				break;
+		}
+		m->m_num_mds = i;
+	}
 
 	/* pg_pools */
 	ceph_decode_32_safe(p, end, n, bad);
@@ -270,12 +290,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 
 		for (i = 0; i < n; i++) {
 			s32 mds = ceph_decode_32(p);
-			if (mds >= 0 && mds < m->m_max_mds) {
+			if (mds >= 0 && mds < m->m_num_mds) {
 				if (m->m_info[mds].laggy)
 					num_laggy++;
 			}
 		}
 		m->m_num_laggy = num_laggy;
+
+		if (n > m->m_num_mds) {
+			void *new_m_info = krealloc(m->m_info,
+						    n * sizeof(*m->m_info),
+						    GFP_NOFS | __GFP_ZERO);
+			if (!new_m_info)
+				goto nomem;
+			m->m_info = new_m_info;
+		}
+		m->m_num_mds = n;
 	}
 
 	/* inc */
@@ -341,7 +371,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
 {
 	int i;
 
-	for (i = 0; i < m->m_max_mds; i++)
+	for (i = 0; i < m->m_num_mds; i++)
 		kfree(m->m_info[i].export_targets);
 	kfree(m->m_info);
 	kfree(m->m_data_pg_pools);
@@ -357,7 +387,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
 		return false;
 	if (m->m_num_laggy > 0)
 		return false;
-	for (i = 0; i < m->m_max_mds; i++) {
+	for (i = 0; i < m->m_num_mds; i++) {
 		if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
 			nr_active++;
 	}
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 8ed5dc505fbb..d5f783f3226a 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -25,6 +25,7 @@ struct ceph_mdsmap {
 	u32 m_session_autoclose;        /* seconds */
 	u64 m_max_file_size;
 	u32 m_max_mds;                  /* size of m_addr, m_state arrays */
+	int m_num_mds;
 	struct ceph_mds_info *m_info;
 
 	/* which object pools file data can be stored in */
@@ -40,7 +41,7 @@ struct ceph_mdsmap {
 static inline struct ceph_entity_addr *
 ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
 {
-	if (w >= m->m_max_mds)
+	if (w >= m->m_num_mds)
 		return NULL;
 	return &m->m_info[w].addr;
 }
@@ -48,14 +49,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
 static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
 {
 	BUG_ON(w < 0);
-	if (w >= m->m_max_mds)
+	if (w >= m->m_num_mds)
 		return CEPH_MDS_STATE_DNE;
 	return m->m_info[w].state;
 }
 
 static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
 {
-	if (w >= 0 && w < m->m_max_mds)
+	if (w >= 0 && w < m->m_num_mds)
 		return m->m_info[w].laggy;
 	return false;
 }
-- 
cgit v1.2.3


From 0a07fc8cd01b6838d999a5eacaa99fe90b8f768b Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Wed, 29 Mar 2017 15:30:24 +0800
Subject: ceph: fix potential use-after-free

__unregister_session() free the session if it drops the last
reference. We should grab an extra reference if we want to use
session after __unregister_session().

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2733932bf192..3bdff7ae7bfd 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2638,8 +2638,10 @@ static void handle_session(struct ceph_mds_session *session,
 	seq = le64_to_cpu(h->seq);
 
 	mutex_lock(&mdsc->mutex);
-	if (op == CEPH_SESSION_CLOSE)
+	if (op == CEPH_SESSION_CLOSE) {
+		get_session(session);
 		__unregister_session(mdsc, session);
+	}
 	/* FIXME: this ttl calculation is generous */
 	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
 	mutex_unlock(&mdsc->mutex);
@@ -2728,6 +2730,8 @@ static void handle_session(struct ceph_mds_session *session,
 			kick_requests(mdsc, mds);
 		mutex_unlock(&mdsc->mutex);
 	}
+	if (op == CEPH_SESSION_CLOSE)
+		ceph_put_mds_session(session);
 	return;
 
 bad:
@@ -3128,8 +3132,10 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 			if (s->s_state == CEPH_MDS_SESSION_OPENING) {
 				/* the session never opened, just close it
 				 * out now */
-				__wake_requests(mdsc, &s->s_waiting);
+				get_session(s);
 				__unregister_session(mdsc, s);
+				__wake_requests(mdsc, &s->s_waiting);
+				ceph_put_mds_session(s);
 			} else {
 				/* just close it */
 				mutex_unlock(&mdsc->mutex);
-- 
cgit v1.2.3


From 2827528da003ad207930f0d1af5faf3e482d6393 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 28 Mar 2017 17:56:29 +0800
Subject: ceph: close stopped mds' session

If a mds has stopped, close its session and clean up its session
requests/caps. The process is similar to handling SESSION_CLOSE
initiated by mds.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3bdff7ae7bfd..a22688873ec3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3136,6 +3136,22 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 				__unregister_session(mdsc, s);
 				__wake_requests(mdsc, &s->s_waiting);
 				ceph_put_mds_session(s);
+			} else if (i >= newmap->m_num_mds) {
+				/* force close session for stopped mds */
+				get_session(s);
+				__unregister_session(mdsc, s);
+				__wake_requests(mdsc, &s->s_waiting);
+				kick_requests(mdsc, i);
+				mutex_unlock(&mdsc->mutex);
+
+				mutex_lock(&s->s_mutex);
+				cleanup_session_requests(mdsc, s);
+				remove_session_caps(s);
+				mutex_unlock(&s->s_mutex);
+
+				ceph_put_mds_session(s);
+
+				mutex_lock(&mdsc->mutex);
 			} else {
 				/* just close it */
 				mutex_unlock(&mdsc->mutex);
-- 
cgit v1.2.3


From 79162547b76e4979b21ef80c9629ada94a51a59b Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Wed, 5 Apr 2017 12:54:05 -0400
Subject: ceph: make seeky readdir more efficient

Current cephfs client uses string to indicate start position of
readdir. The string is last entry of previous readdir reply.
This approach does not work for seeky readdir because we can
not easily convert the new postion to a string. For seeky readdir,
mds needs to return dentries from the beginning. Client keeps
retrying if the reply does not contain the dentry it wants.

In current version of ceph, mds sorts CDentry in its cache in
hash order. Client also uses dentry hash to compose dir postion.
For seeky readdir, if client passes the hash part of dir postion
to mds. mds can avoid replying useless dentries.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/dir.c                |  4 ++++
 fs/ceph/inode.c              | 17 ++++++++++++-----
 fs/ceph/mds_client.c         |  1 +
 fs/ceph/mds_client.h         |  3 ++-
 include/linux/ceph/ceph_fs.h |  2 ++
 5 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 3e9ad501addf..ae61cdf7d489 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -378,7 +378,11 @@ more:
 				ceph_mdsc_put_request(req);
 				return -ENOMEM;
 			}
+		} else if (is_hash_order(ctx->pos)) {
+			req->r_args.readdir.offset_hash =
+				cpu_to_le32(fpos_hash(ctx->pos));
 		}
+
 		req->r_dir_release_cnt = fi->dir_release_count;
 		req->r_dir_ordered_cnt = fi->dir_ordered_count;
 		req->r_readdir_cache_idx = fi->readdir_cache_idx;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index d3119fe3ab45..dcce79b84406 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1482,10 +1482,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
 		return readdir_prepopulate_inodes_only(req, session);
 
-	if (rinfo->hash_order && req->r_path2) {
-		last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
-					  req->r_path2, strlen(req->r_path2));
-		last_hash = ceph_frag_value(last_hash);
+	if (rinfo->hash_order) {
+		if (req->r_path2) {
+			last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
+						  req->r_path2,
+						  strlen(req->r_path2));
+			last_hash = ceph_frag_value(last_hash);
+		} else if (rinfo->offset_hash) {
+			/* mds understands offset_hash */
+			WARN_ON_ONCE(req->r_readdir_offset != 2);
+			last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
+		}
 	}
 
 	if (rinfo->dir_dir &&
@@ -1510,7 +1517,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	}
 
 	if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 &&
-	    !(rinfo->hash_order && req->r_path2)) {
+	    !(rinfo->hash_order && last_hash)) {
 		/* note dir version at start of readdir so we can tell
 		 * if any dentries get dropped */
 		req->r_dir_release_cnt = atomic64_read(&ci->i_release_count);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a22688873ec3..8cc4d4e8b077 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -189,6 +189,7 @@ static int parse_reply_info_dir(void **p, void *end,
 		info->dir_end = !!(flags & CEPH_READDIR_FRAG_END);
 		info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE);
 		info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER);
+		info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH);
 	}
 	if (num == 0)
 		goto done;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index bbebcd55d79e..3e67dd2169fa 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -83,9 +83,10 @@ struct ceph_mds_reply_info_parsed {
 			struct ceph_mds_reply_dirfrag *dir_dir;
 			size_t			      dir_buf_size;
 			int                           dir_nr;
-			bool			      dir_complete;
 			bool			      dir_end;
+			bool			      dir_complete;
 			bool			      hash_order;
+			bool			      offset_hash;
 			struct ceph_mds_reply_dir_entry  *dir_entries;
 		};
 
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index f4b2ee18f38c..1787e4a8e251 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -365,6 +365,7 @@ extern const char *ceph_mds_op_name(int op);
 #define CEPH_READDIR_FRAG_END		(1<<0)
 #define CEPH_READDIR_FRAG_COMPLETE	(1<<8)
 #define CEPH_READDIR_HASH_ORDER		(1<<9)
+#define CEPH_READDIR_OFFSET_HASH	(1<<10)
 
 union ceph_mds_request_args {
 	struct {
@@ -384,6 +385,7 @@ union ceph_mds_request_args {
 		__le32 max_entries;          /* how many dentries to grab */
 		__le32 max_bytes;
 		__le16 flags;
+		__le32 offset_hash;
 	} __attribute__ ((packed)) readdir;
 	struct {
 		__le32 mode;
-- 
cgit v1.2.3


From aa26d662b9d44e7f5b0d109e892e537a23471863 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 4 Apr 2017 08:39:36 -0400
Subject: libceph: remove req->r_replay_version

Nothing uses this anymore with the removal of the ack vs. commit code.
Remove the field and just encode zeroes into place in the request
encoding.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h | 1 -
 net/ceph/debugfs.c              | 4 +---
 net/ceph/osd_client.c           | 7 ++++---
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index d6a625e75040..3fc9e7754a9b 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -192,7 +192,6 @@ struct ceph_osd_request {
 	unsigned long r_stamp;                /* jiffies, send or check time */
 	unsigned long r_start_stamp;          /* jiffies */
 	int r_attempts;
-	struct ceph_eversion r_replay_version; /* aka reassert_version */
 	u32 r_last_force_resend;
 	u32 r_map_dne_bound;
 
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index c62b2b029a6e..d7e63a4f5578 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -177,9 +177,7 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req)
 	seq_printf(s, "%llu\t", req->r_tid);
 	dump_target(s, &req->r_t);
 
-	seq_printf(s, "\t%d\t%u'%llu", req->r_attempts,
-		   le32_to_cpu(req->r_replay_version.epoch),
-		   le64_to_cpu(req->r_replay_version.version));
+	seq_printf(s, "\t%d", req->r_attempts);
 
 	for (i = 0; i < req->r_num_ops; i++) {
 		struct ceph_osd_req_op *op = &req->r_ops[i];
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b4500a8ab8b3..feb666c22381 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1503,9 +1503,10 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 	ceph_encode_32(&p, req->r_flags);
 	ceph_encode_timespec(p, &req->r_mtime);
 	p += sizeof(struct ceph_timespec);
-	/* aka reassert_version */
-	memcpy(p, &req->r_replay_version, sizeof(req->r_replay_version));
-	p += sizeof(req->r_replay_version);
+
+	/* reassert_version */
+	memset(p, 0, sizeof(struct ceph_eversion));
+	p += sizeof(struct ceph_eversion);
 
 	/* oloc */
 	ceph_start_encoding(&p, 5, 4,
-- 
cgit v1.2.3


From a1f4020aab10a6bddb2d061c960ebe52cdfa30b5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 4 Apr 2017 08:39:37 -0400
Subject: libceph: allow requests to return immediately on full conditions if
 caller wishes

Usually, when the osd map is flagged as full or the pool is at quota,
write requests just hang. This is not what we want for cephfs, where
it would be better to simply report -ENOSPC back to userland instead
of stalling.

If the caller knows that it will want an immediate error return instead
of blocking on a full or at-quota error condition then allow it to set a
flag to request that behavior.

Set that flag in ceph_osdc_new_request (since ceph.ko is the only caller),
and on any other write request from ceph.ko.

A later patch will deal with requests that were submitted before the new
map showing the full condition came in.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/addr.c                  | 1 +
 fs/ceph/file.c                  | 1 +
 include/linux/ceph/osd_client.h | 1 +
 net/ceph/osd_client.c           | 7 +++++++
 4 files changed, 10 insertions(+)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1a3e1b40799a..7e3fae334620 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1892,6 +1892,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
 	err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
 
 	wr_req->r_mtime = ci->vfs_inode.i_mtime;
+	wr_req->r_abort_on_full = true;
 	err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
 
 	if (!err)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ab4823e3e0d5..134c978141d0 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -781,6 +781,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
 	req->r_callback = ceph_aio_complete_req;
 	req->r_inode = inode;
 	req->r_priv = aio_req;
+	req->r_abort_on_full = true;
 
 	ret = ceph_osdc_start_request(req->r_osdc, req, false);
 out:
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 3fc9e7754a9b..8cf644197b1a 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -187,6 +187,7 @@ struct ceph_osd_request {
 	struct timespec r_mtime;              /* ditto */
 	u64 r_data_offset;                    /* ditto */
 	bool r_linger;                        /* don't resend on failure */
+	bool r_abort_on_full;		      /* return ENOSPC when full */
 
 	/* internal */
 	unsigned long r_stamp;                /* jiffies, send or check time */
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index feb666c22381..52a2019a2b64 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -961,6 +961,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 				       truncate_size, truncate_seq);
 	}
 
+	req->r_abort_on_full = true;
 	req->r_flags = flags;
 	req->r_base_oloc.pool = layout->pool_id;
 	req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns);
@@ -1627,6 +1628,7 @@ static void maybe_request_map(struct ceph_osd_client *osdc)
 		ceph_monc_renew_subs(&osdc->client->monc);
 }
 
+static void complete_request(struct ceph_osd_request *req, int err);
 static void send_map_check(struct ceph_osd_request *req);
 
 static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -1636,6 +1638,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 	enum calc_target_result ct_res;
 	bool need_send = false;
 	bool promoted = false;
+	bool need_abort = false;
 
 	WARN_ON(req->r_tid);
 	dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
@@ -1670,6 +1673,8 @@ again:
 		pr_warn_ratelimited("FULL or reached pool quota\n");
 		req->r_t.paused = true;
 		maybe_request_map(osdc);
+		if (req->r_abort_on_full)
+			need_abort = true;
 	} else if (!osd_homeless(osd)) {
 		need_send = true;
 	} else {
@@ -1686,6 +1691,8 @@ again:
 	link_request(osd, req);
 	if (need_send)
 		send_request(req);
+	else if (need_abort)
+		complete_request(req, -ENOSPC);
 	mutex_unlock(&osd->lock);
 
 	if (ct_res == CALC_TARGET_POOL_DNE)
-- 
cgit v1.2.3


From fc36d0a42c19870e57a542c2dd0972185584d407 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 4 Apr 2017 08:39:39 -0400
Subject: libceph: abort already submitted but abortable requests when map or
 pool goes full

When a Ceph volume hits capacity, a flag is set in the OSD map to
indicate that, and a new map is sprayed around the cluster. With cephfs
we want it to shut down any abortable requests that are in progress with
an -ENOSPC error as they'd just hang otherwise.

Add a new ceph_osdc_abort_on_full helper function to handle this. It
will first check whether there is an out-of-space condition in the
cluster and then walk the tree and abort any request that has
r_abort_on_full set with a -ENOSPC error. Call this new function
directly whenever we get a new OSD map.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osd_client.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 52a2019a2b64..55b7585ccefd 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1807,6 +1807,39 @@ static void abort_request(struct ceph_osd_request *req, int err)
 	complete_request(req, err);
 }
 
+/*
+ * Drop all pending requests that are stalled waiting on a full condition to
+ * clear, and complete them with ENOSPC as the return code.
+ */
+static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc)
+{
+	struct rb_node *n;
+
+	dout("enter abort_on_full\n");
+
+	if (!ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !have_pool_full(osdc))
+		goto out;
+
+	for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+		struct rb_node *m;
+
+		m = rb_first(&osd->o_requests);
+		while (m) {
+			struct ceph_osd_request *req = rb_entry(m,
+					struct ceph_osd_request, r_node);
+			m = rb_next(m);
+
+			if (req->r_abort_on_full &&
+			    (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
+			     pool_full(osdc, req->r_t.target_oloc.pool)))
+				abort_request(req, -ENOSPC);
+		}
+	}
+out:
+	dout("return abort_on_full\n");
+}
+
 static void check_pool_dne(struct ceph_osd_request *req)
 {
 	struct ceph_osd_client *osdc = req->r_osdc;
@@ -3265,6 +3298,7 @@ done:
 
 	kick_requests(osdc, &need_resend, &need_resend_linger);
 
+	ceph_osdc_abort_on_full(osdc);
 	ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP,
 			  osdc->osdmap->epoch);
 	up_write(&osdc->lock);
-- 
cgit v1.2.3


From 58eb7932ae4d671d2a2377a1779eda96a2789b11 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Apr 2017 09:21:16 -0400
Subject: libceph: add an epoch_barrier field to struct ceph_osd_client

Cephfs can get cap update requests that contain a new epoch barrier in
them. When that happens we want to pause all OSD traffic until the right
map epoch arrives.

Add an epoch_barrier field to ceph_osd_client that is protected by the
osdc->lock rwsem. When the barrier is set, and the current OSD map
epoch is below that, pause the request target when submitting the
request or when revisiting it. Add a way for upper layers (cephfs)
to update the epoch_barrier as well.

If we get a new map, compare the new epoch against the barrier before
kicking requests and request another map if the map epoch is still lower
than the one we want.

If we get a map with a full pool, or at quota condition, then set the
barrier to the current epoch value.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  2 ++
 net/ceph/debugfs.c              |  3 +-
 net/ceph/osd_client.c           | 79 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 8cf644197b1a..85650b415e73 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -267,6 +267,7 @@ struct ceph_osd_client {
 	struct rb_root         osds;          /* osds */
 	struct list_head       osd_lru;       /* idle osds */
 	spinlock_t             osd_lru_lock;
+	u32		       epoch_barrier;
 	struct ceph_osd        homeless_osd;
 	atomic64_t             last_tid;      /* tid of last request */
 	u64                    last_linger_id;
@@ -305,6 +306,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 				   struct ceph_msg *msg);
 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 				 struct ceph_msg *msg);
+void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
 
 extern void osd_req_op_init(struct ceph_osd_request *osd_req,
 			    unsigned int which, u16 opcode, u32 flags);
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index d7e63a4f5578..71ba13927b3d 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -62,7 +62,8 @@ static int osdmap_show(struct seq_file *s, void *p)
 		return 0;
 
 	down_read(&osdc->lock);
-	seq_printf(s, "epoch %d flags 0x%x\n", map->epoch, map->flags);
+	seq_printf(s, "epoch %u barrier %u flags 0x%x\n", map->epoch,
+			osdc->epoch_barrier, map->flags);
 
 	for (n = rb_first(&map->pg_pools); n; n = rb_next(n)) {
 		struct ceph_pg_pool_info *pi =
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 55b7585ccefd..9b8f57fd5ee1 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1298,8 +1298,9 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
 		       __pool_full(pi);
 
 	WARN_ON(pi->id != t->base_oloc.pool);
-	return (t->flags & CEPH_OSD_FLAG_READ && pauserd) ||
-	       (t->flags & CEPH_OSD_FLAG_WRITE && pausewr);
+	return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
+	       ((t->flags & CEPH_OSD_FLAG_WRITE) && pausewr) ||
+	       (osdc->osdmap->epoch < osdc->epoch_barrier);
 }
 
 enum calc_target_result {
@@ -1654,8 +1655,13 @@ again:
 		goto promote;
 	}
 
-	if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
-	    ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) {
+	if (osdc->osdmap->epoch < osdc->epoch_barrier) {
+		dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch,
+		     osdc->epoch_barrier);
+		req->r_t.paused = true;
+		maybe_request_map(osdc);
+	} else if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
+		   ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) {
 		dout("req %p pausewr\n", req);
 		req->r_t.paused = true;
 		maybe_request_map(osdc);
@@ -1807,19 +1813,77 @@ static void abort_request(struct ceph_osd_request *req, int err)
 	complete_request(req, err);
 }
 
+static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
+{
+	if (likely(eb > osdc->epoch_barrier)) {
+		dout("updating epoch_barrier from %u to %u\n",
+				osdc->epoch_barrier, eb);
+		osdc->epoch_barrier = eb;
+		/* Request map if we're not to the barrier yet */
+		if (eb > osdc->osdmap->epoch)
+			maybe_request_map(osdc);
+	}
+}
+
+void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
+{
+	down_read(&osdc->lock);
+	if (unlikely(eb > osdc->epoch_barrier)) {
+		up_read(&osdc->lock);
+		down_write(&osdc->lock);
+		update_epoch_barrier(osdc, eb);
+		up_write(&osdc->lock);
+	} else {
+		up_read(&osdc->lock);
+	}
+}
+EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier);
+
 /*
  * Drop all pending requests that are stalled waiting on a full condition to
- * clear, and complete them with ENOSPC as the return code.
+ * clear, and complete them with ENOSPC as the return code. Set the
+ * osdc->epoch_barrier to the latest map epoch that we've seen if any were
+ * cancelled.
  */
 static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc)
 {
 	struct rb_node *n;
+	bool victims = false;
 
 	dout("enter abort_on_full\n");
 
 	if (!ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !have_pool_full(osdc))
 		goto out;
 
+	/* Scan list and see if there is anything to abort */
+	for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+		struct rb_node *m;
+
+		m = rb_first(&osd->o_requests);
+		while (m) {
+			struct ceph_osd_request *req = rb_entry(m,
+					struct ceph_osd_request, r_node);
+			m = rb_next(m);
+
+			if (req->r_abort_on_full) {
+				victims = true;
+				break;
+			}
+		}
+		if (victims)
+			break;
+	}
+
+	if (!victims)
+		goto out;
+
+	/*
+	 * Update the barrier to current epoch if it's behind that point,
+	 * since we know we have some calls to be aborted in the tree.
+	 */
+	update_epoch_barrier(osdc, osdc->osdmap->epoch);
+
 	for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
 		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
 		struct rb_node *m;
@@ -1837,7 +1901,7 @@ static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc)
 		}
 	}
 out:
-	dout("return abort_on_full\n");
+	dout("return abort_on_full barrier=%u\n", osdc->epoch_barrier);
 }
 
 static void check_pool_dne(struct ceph_osd_request *req)
@@ -3293,7 +3357,8 @@ done:
 	pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) ||
 		  ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
 		  have_pool_full(osdc);
-	if (was_pauserd || was_pausewr || pauserd || pausewr)
+	if (was_pauserd || was_pausewr || pauserd || pausewr ||
+	    osdc->osdmap->epoch < osdc->epoch_barrier)
 		maybe_request_map(osdc);
 
 	kick_requests(osdc, &need_resend, &need_resend_linger);
-- 
cgit v1.2.3


From 92475f05bdb6daefce3f55f46551153e7ed05f45 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 13 Apr 2017 11:07:04 -0400
Subject: ceph: handle epoch barriers in cap messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Have the client store and update the osdc epoch_barrier when a cap
message comes in with one.

When sending cap messages, send the epoch barrier as well. This allows
clients to inform servers that their released caps may not be used until
a particular OSD map epoch.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: "Yan, Zheng” <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/caps.c       | 21 ++++++++++++++++-----
 fs/ceph/mds_client.c | 20 ++++++++++++++++++++
 fs/ceph/mds_client.h |  7 +++++--
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 60185434162a..a3ebb632294e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1015,6 +1015,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
 	void *p;
 	size_t extra_len;
 	struct timespec zerotime = {0};
+	struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;
 
 	dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
 	     " seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu"
@@ -1076,8 +1077,12 @@ static int send_cap_msg(struct cap_msg_args *arg)
 	ceph_encode_64(&p, arg->inline_data ? 0 : CEPH_INLINE_NONE);
 	/* inline data size */
 	ceph_encode_32(&p, 0);
-	/* osd_epoch_barrier (version 5) */
-	ceph_encode_32(&p, 0);
+	/*
+	 * osd_epoch_barrier (version 5)
+	 * The epoch_barrier is protected osdc->lock, so READ_ONCE here in
+	 * case it was recently changed
+	 */
+	ceph_encode_32(&p, READ_ONCE(osdc->epoch_barrier));
 	/* oldest_flush_tid (version 6) */
 	ceph_encode_64(&p, arg->oldest_flush_tid);
 
@@ -3633,13 +3638,19 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 		p += inline_len;
 	}
 
+	if (le16_to_cpu(msg->hdr.version) >= 5) {
+		struct ceph_osd_client	*osdc = &mdsc->fsc->client->osdc;
+		u32			epoch_barrier;
+
+		ceph_decode_32_safe(&p, end, epoch_barrier, bad);
+		ceph_osdc_update_epoch_barrier(osdc, epoch_barrier);
+	}
+
 	if (le16_to_cpu(msg->hdr.version) >= 8) {
 		u64 flush_tid;
 		u32 caller_uid, caller_gid;
-		u32 osd_epoch_barrier;
 		u32 pool_ns_len;
-		/* version >= 5 */
-		ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);
+
 		/* version >= 6 */
 		ceph_decode_64_safe(&p, end, flush_tid, bad);
 		/* version >= 7 */
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8cc4d4e8b077..f7bfc22eb39c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1552,9 +1552,15 @@ void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
 	struct ceph_msg *msg = NULL;
 	struct ceph_mds_cap_release *head;
 	struct ceph_mds_cap_item *item;
+	struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc;
 	struct ceph_cap *cap;
 	LIST_HEAD(tmp_list);
 	int num_cap_releases;
+	__le32	barrier, *cap_barrier;
+
+	down_read(&osdc->lock);
+	barrier = cpu_to_le32(osdc->epoch_barrier);
+	up_read(&osdc->lock);
 
 	spin_lock(&session->s_cap_lock);
 again:
@@ -1572,7 +1578,11 @@ again:
 			head = msg->front.iov_base;
 			head->num = cpu_to_le32(0);
 			msg->front.iov_len = sizeof(*head);
+
+			msg->hdr.version = cpu_to_le16(2);
+			msg->hdr.compat_version = cpu_to_le16(1);
 		}
+
 		cap = list_first_entry(&tmp_list, struct ceph_cap,
 					session_caps);
 		list_del(&cap->session_caps);
@@ -1590,6 +1600,11 @@ again:
 		ceph_put_cap(mdsc, cap);
 
 		if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+			// Append cap_barrier field
+			cap_barrier = msg->front.iov_base + msg->front.iov_len;
+			*cap_barrier = barrier;
+			msg->front.iov_len += sizeof(*cap_barrier);
+
 			msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
 			dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
 			ceph_con_send(&session->s_con, msg);
@@ -1605,6 +1620,11 @@ again:
 	spin_unlock(&session->s_cap_lock);
 
 	if (msg) {
+		// Append cap_barrier field
+		cap_barrier = msg->front.iov_base + msg->front.iov_len;
+		*cap_barrier = barrier;
+		msg->front.iov_len += sizeof(*cap_barrier);
+
 		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
 		dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
 		ceph_con_send(&session->s_con, msg);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3e67dd2169fa..db57ae98ed34 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -106,10 +106,13 @@ struct ceph_mds_reply_info_parsed {
 
 /*
  * cap releases are batched and sent to the MDS en masse.
+ *
+ * Account for per-message overhead of mds_cap_release header
+ * and __le32 for osd epoch barrier trailing field.
  */
-#define CEPH_CAPS_PER_RELEASE ((PAGE_SIZE -			\
+#define CEPH_CAPS_PER_RELEASE ((PAGE_SIZE - sizeof(u32) -		\
 				sizeof(struct ceph_mds_cap_release)) /	\
-			       sizeof(struct ceph_mds_cap_item))
+			        sizeof(struct ceph_mds_cap_item))
 
 
 /*
-- 
cgit v1.2.3


From 6fc1fe5e4cfc8939ee59a570b087946042a30140 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 4 Apr 2017 08:39:44 -0400
Subject: Revert "ceph: SetPageError() for writeback pages if writepages fails"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit b109eec6f4332bd517e2f41e207037c4b9065094.

If I'm filling up a filesystem with this sort of command:

    $ dd if=/dev/urandom of=/mnt/cephfs/fillfile bs=2M oflag=sync

...then I'll eventually get back EIO on a write. Further calls
will give us ENOSPC.

I'm not sure what prompted this change, but I don't think it's what we
want to do. If writepages failed, we will have already set the mapping
error appropriately, and that's what gets reported by fsync() or
close().

__filemap_fdatawait_range however, does this:

	wait_on_page_writeback(page);
	if (TestClearPageError(page))
		ret = -EIO;

...and that -EIO ends up trumping the mapping's error if one exists.

When writepages fails, we only want to set the error in the mapping,
and not flag the individual pages.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: "Yan, Zheng” <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/addr.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 7e3fae334620..6cdf94459ac4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -703,9 +703,6 @@ static void writepages_finish(struct ceph_osd_request *req)
 				clear_bdi_congested(&fsc->backing_dev_info,
 						    BLK_RW_ASYNC);
 
-			if (rc < 0)
-				SetPageError(page);
-
 			ceph_put_snap_context(page_snap_context(page));
 			page->private = 0;
 			ClearPagePrivate(page);
-- 
cgit v1.2.3


From 26544c623e741ac6445f8b1ae369ee32ae1794ad Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 4 Apr 2017 08:39:46 -0400
Subject: ceph: when seeing write errors on an inode, switch to sync writes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, we don't have a real feedback mechanism in place for when we
start seeing buffered writeback errors. If writeback is failing, there
is nothing that prevents an application from continuing to dirty pages
that aren't being cleaned.

In the event that we're seeing write errors of any sort occur on an
inode, have the callback set a flag to force further writes to be
synchronous. When the next write succeeds, clear the flag to allow
buffered writeback to continue.

Since this is just a hint to the write submission mechanism, we only
take the i_ceph_lock when a lockless check shows that the flag needs to
be changed.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: "Yan, Zheng” <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/addr.c  |  6 +++++-
 fs/ceph/file.c  | 31 ++++++++++++++++++-------------
 fs/ceph/super.h | 26 ++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6cdf94459ac4..e253102b43cd 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -670,8 +670,12 @@ static void writepages_finish(struct ceph_osd_request *req)
 	bool remove_page;
 
 	dout("writepages_finish %p rc %d\n", inode, rc);
-	if (rc < 0)
+	if (rc < 0) {
 		mapping_set_error(mapping, rc);
+		ceph_set_error_write(ci);
+	} else {
+		ceph_clear_error_write(ci);
+	}
 
 	/*
 	 * We lost the cache cap, need to truncate the page before
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 134c978141d0..39866d6a34b6 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1089,19 +1089,22 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 
 out:
 		ceph_osdc_put_request(req);
-		if (ret == 0) {
-			pos += len;
-			written += len;
-
-			if (pos > i_size_read(inode)) {
-				check_caps = ceph_inode_set_size(inode, pos);
-				if (check_caps)
-					ceph_check_caps(ceph_inode(inode),
-							CHECK_CAPS_AUTHONLY,
-							NULL);
-			}
-		} else
+		if (ret != 0) {
+			ceph_set_error_write(ci);
 			break;
+		}
+
+		ceph_clear_error_write(ci);
+		pos += len;
+		written += len;
+		if (pos > i_size_read(inode)) {
+			check_caps = ceph_inode_set_size(inode, pos);
+			if (check_caps)
+				ceph_check_caps(ceph_inode(inode),
+						CHECK_CAPS_AUTHONLY,
+						NULL);
+		}
+
 	}
 
 	if (ret != -EOLDSNAPC && written > 0) {
@@ -1307,6 +1310,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	}
 
 retry_snap:
+	/* FIXME: not complete since it doesn't account for being at quota */
 	if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) {
 		err = -ENOSPC;
 		goto out;
@@ -1328,7 +1332,8 @@ retry_snap:
 	     inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
 
 	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
-	    (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+	    (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) ||
+	    (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) {
 		struct ceph_snap_context *snapc;
 		struct iov_iter data;
 		inode_unlock(inode);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c68e6a045fb9..7334ee86b9e8 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -474,6 +474,32 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_CAP_DROPPED	(1 << 8)  /* caps were forcibly dropped */
 #define CEPH_I_KICK_FLUSH	(1 << 9)  /* kick flushing caps */
 #define CEPH_I_FLUSH_SNAPS	(1 << 10) /* need flush snapss */
+#define CEPH_I_ERROR_WRITE	(1 << 11) /* have seen write errors */
+
+/*
+ * We set the ERROR_WRITE bit when we start seeing write errors on an inode
+ * and then clear it when they start succeeding. Note that we do a lockless
+ * check first, and only take the lock if it looks like it needs to be changed.
+ * The write submission code just takes this as a hint, so we're not too
+ * worried if a few slip through in either direction.
+ */
+static inline void ceph_set_error_write(struct ceph_inode_info *ci)
+{
+	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) {
+		spin_lock(&ci->i_ceph_lock);
+		ci->i_ceph_flags |= CEPH_I_ERROR_WRITE;
+		spin_unlock(&ci->i_ceph_lock);
+	}
+}
+
+static inline void ceph_clear_error_write(struct ceph_inode_info *ci)
+{
+	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) {
+		spin_lock(&ci->i_ceph_lock);
+		ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE;
+		spin_unlock(&ci->i_ceph_lock);
+	}
+}
 
 static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
 					   long long release_count,
-- 
cgit v1.2.3


From 8b679ec5257eeb3d73b71a613cad2769f21c86ad Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 13 Apr 2017 12:17:37 +0200
Subject: rbd: move rbd_dev_destroy() call out of rbd_dev_image_release()

... to simplify error handling in do_rbd_add().

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 16010183b703..0191a3ca5460 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -5967,8 +5967,6 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
 	rbd_dev->image_format = 0;
 	kfree(rbd_dev->spec->image_id);
 	rbd_dev->spec->image_id = NULL;
-
-	rbd_dev_destroy(rbd_dev);
 }
 
 /*
@@ -6135,8 +6133,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 		 * commit 1f3ef78861ac.
 		 */
 		rbd_unregister_watch(rbd_dev);
-		rbd_dev_image_release(rbd_dev);
-		goto out;
+		goto err_out_image_probe;
 	}
 
 	rc = count;
@@ -6144,6 +6141,8 @@ out:
 	module_put(THIS_MODULE);
 	return rc;
 
+err_out_image_probe:
+	rbd_dev_image_release(rbd_dev);
 err_out_rbd_dev:
 	rbd_dev_destroy(rbd_dev);
 err_out_client:
@@ -6203,6 +6202,7 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
 		}
 		rbd_assert(second);
 		rbd_dev_image_release(second);
+		rbd_dev_destroy(second);
 		first->parent = NULL;
 		first->parent_overlap = 0;
 
@@ -6285,7 +6285,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
 	 */
 	rbd_dev_device_release(rbd_dev);
 	rbd_dev_image_release(rbd_dev);
-
+	rbd_dev_destroy(rbd_dev);
 	return count;
 }
 
-- 
cgit v1.2.3


From fd22aef8b47cfc068448df65c1183698b0abd815 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 13 Apr 2017 12:17:37 +0200
Subject: rbd: move rbd_unregister_watch() call into rbd_dev_image_release()

rbd_dev->disk tear down vs rbd_watch_cb() race shouldn't be a problem
anymore thanks to EXISTS and REMOVING checks in rbd_dev_update_size().
A similar race could occur on "rbd map", see commit 811c66887746
("rbd: fix rbd map vs notify races").

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 0191a3ca5460..b299ed0315f8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -5964,6 +5964,8 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
 static void rbd_dev_image_release(struct rbd_device *rbd_dev)
 {
 	rbd_dev_unprobe(rbd_dev);
+	if (rbd_dev->opts)
+		rbd_unregister_watch(rbd_dev);
 	rbd_dev->image_format = 0;
 	kfree(rbd_dev->spec->image_id);
 	rbd_dev->spec->image_id = NULL;
@@ -6126,15 +6128,8 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 	rbd_dev->mapping.read_only = read_only;
 
 	rc = rbd_dev_device_setup(rbd_dev);
-	if (rc) {
-		/*
-		 * rbd_unregister_watch() can't be moved into
-		 * rbd_dev_image_release() without refactoring, see
-		 * commit 1f3ef78861ac.
-		 */
-		rbd_unregister_watch(rbd_dev);
+	if (rc)
 		goto err_out_image_probe;
-	}
 
 	rc = count;
 out:
@@ -6275,14 +6270,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
 	if (__rbd_is_lock_owner(rbd_dev))
 		rbd_unlock(rbd_dev);
 	up_write(&rbd_dev->lock_rwsem);
-	rbd_unregister_watch(rbd_dev);
 
-	/*
-	 * Don't free anything from rbd_dev->disk until after all
-	 * notifies are completely processed. Otherwise
-	 * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
-	 * in a potential use after free of rbd_dev->disk or rbd_dev.
-	 */
 	rbd_dev_device_release(rbd_dev);
 	rbd_dev_image_release(rbd_dev);
 	rbd_dev_destroy(rbd_dev);
-- 
cgit v1.2.3


From 5769ed0cb12dcd135251e546863196cec0b58e34 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 13 Apr 2017 12:17:38 +0200
Subject: rbd: fix error handling around rbd_init_disk()

add_disk() takes an extra reference on disk->queue, which is put in
put_disk() -> disk_release().  Avoiding blk_cleanup_queue() (which also
puts the queue) until add_disk() sets GENHD_FL_UP works for the queue
itself, but leaks various queue internals.  Conditioning tag_set freeing
on GENHD_FL_UP is wrong too: all error paths after rbd_init_disk() leak
the tag_set.

Move the final "announce" steps out of rbd_dev_device_setup() so that
it can be unwound like any other function.  Leave "announce" steps to
do_rbd_add/remove().

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 87 +++++++++++++++++++++++++++--------------------------
 1 file changed, 44 insertions(+), 43 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b299ed0315f8..50395af7a9a6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4114,19 +4114,10 @@ static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 static void rbd_free_disk(struct rbd_device *rbd_dev)
 {
-	struct gendisk *disk = rbd_dev->disk;
-
-	if (!disk)
-		return;
-
+	blk_cleanup_queue(rbd_dev->disk->queue);
+	blk_mq_free_tag_set(&rbd_dev->tag_set);
+	put_disk(rbd_dev->disk);
 	rbd_dev->disk = NULL;
-	if (disk->flags & GENHD_FL_UP) {
-		del_gendisk(disk);
-		if (disk->queue)
-			blk_cleanup_queue(disk->queue);
-		blk_mq_free_tag_set(&rbd_dev->tag_set);
-	}
-	put_disk(disk);
 }
 
 static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
@@ -4385,8 +4376,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
 		q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
 
+	/*
+	 * disk_release() expects a queue ref from add_disk() and will
+	 * put it.  Hold an extra ref until add_disk() is called.
+	 */
+	WARN_ON(!blk_get_queue(q));
 	disk->queue = q;
-
 	q->queuedata = rbd_dev;
 
 	rbd_dev->disk = disk;
@@ -5875,6 +5870,15 @@ out_err:
 	return ret;
 }
 
+static void rbd_dev_device_release(struct rbd_device *rbd_dev)
+{
+	clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+	rbd_dev_mapping_clear(rbd_dev);
+	rbd_free_disk(rbd_dev);
+	if (!single_major)
+		unregister_blkdev(rbd_dev->major, rbd_dev->name);
+}
+
 /*
  * rbd_dev->header_rwsem must be locked for write and will be unlocked
  * upon return.
@@ -5910,26 +5914,13 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
 	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
 	set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
 
-	dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
-	ret = device_add(&rbd_dev->dev);
+	ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
 	if (ret)
 		goto err_out_mapping;
 
-	/* Everything's ready.  Announce the disk to the world. */
-
 	set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
 	up_write(&rbd_dev->header_rwsem);
-
-	spin_lock(&rbd_dev_list_lock);
-	list_add_tail(&rbd_dev->node, &rbd_dev_list);
-	spin_unlock(&rbd_dev_list_lock);
-
-	add_disk(rbd_dev->disk);
-	pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
-		(unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
-		rbd_dev->header.features);
-
-	return ret;
+	return 0;
 
 err_out_mapping:
 	rbd_dev_mapping_clear(rbd_dev);
@@ -6131,11 +6122,30 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 	if (rc)
 		goto err_out_image_probe;
 
+	/* Everything's ready.  Announce the disk to the world. */
+
+	rc = device_add(&rbd_dev->dev);
+	if (rc)
+		goto err_out_device_setup;
+
+	add_disk(rbd_dev->disk);
+	/* see rbd_init_disk() */
+	blk_put_queue(rbd_dev->disk->queue);
+
+	spin_lock(&rbd_dev_list_lock);
+	list_add_tail(&rbd_dev->node, &rbd_dev_list);
+	spin_unlock(&rbd_dev_list_lock);
+
+	pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
+		(unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
+		rbd_dev->header.features);
 	rc = count;
 out:
 	module_put(THIS_MODULE);
 	return rc;
 
+err_out_device_setup:
+	rbd_dev_device_release(rbd_dev);
 err_out_image_probe:
 	rbd_dev_image_release(rbd_dev);
 err_out_rbd_dev:
@@ -6165,21 +6175,6 @@ static ssize_t rbd_add_single_major(struct bus_type *bus,
 	return do_rbd_add(bus, buf, count);
 }
 
-static void rbd_dev_device_release(struct rbd_device *rbd_dev)
-{
-	rbd_free_disk(rbd_dev);
-
-	spin_lock(&rbd_dev_list_lock);
-	list_del_init(&rbd_dev->node);
-	spin_unlock(&rbd_dev_list_lock);
-
-	clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
-	device_del(&rbd_dev->dev);
-	rbd_dev_mapping_clear(rbd_dev);
-	if (!single_major)
-		unregister_blkdev(rbd_dev->major, rbd_dev->name);
-}
-
 static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
 {
 	while (rbd_dev->parent) {
@@ -6266,6 +6261,12 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
 		blk_set_queue_dying(rbd_dev->disk->queue);
 	}
 
+	del_gendisk(rbd_dev->disk);
+	spin_lock(&rbd_dev_list_lock);
+	list_del_init(&rbd_dev->node);
+	spin_unlock(&rbd_dev_list_lock);
+	device_del(&rbd_dev->dev);
+
 	down_write(&rbd_dev->lock_rwsem);
 	if (__rbd_is_lock_owner(rbd_dev))
 		rbd_unlock(rbd_dev);
-- 
cgit v1.2.3


From bbead745d96cfd51aaa332bdeab300862c7a8061 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 13 Apr 2017 12:17:38 +0200
Subject: rbd: ignore unlock errors

Currently the lock_state is set to UNLOCKED (preventing further I/O),
but RELEASED_LOCK notification isn't sent.  Be consistent with userspace
and treat ceph_cls_unlock() errors as the image is unlocked.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 50395af7a9a6..423de775aabb 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3097,7 +3097,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
 /*
  * lock_rwsem must be held for write
  */
-static int rbd_unlock(struct rbd_device *rbd_dev)
+static void rbd_unlock(struct rbd_device *rbd_dev)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	char cookie[32];
@@ -3105,19 +3105,16 @@ static int rbd_unlock(struct rbd_device *rbd_dev)
 
 	WARN_ON(!__rbd_is_lock_owner(rbd_dev));
 
-	rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
-
 	format_lock_cookie(rbd_dev, cookie);
 	ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
 			      RBD_LOCK_NAME, cookie);
-	if (ret && ret != -ENOENT) {
-		rbd_warn(rbd_dev, "cls_unlock failed: %d", ret);
-		return ret;
-	}
+	if (ret && ret != -ENOENT)
+		rbd_warn(rbd_dev, "failed to unlock: %d", ret);
 
+	/* treat errors as the image is unlocked */
+	rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
 	rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
 	queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work);
-	return 0;
 }
 
 static int __rbd_notify_op_lock(struct rbd_device *rbd_dev,
@@ -3490,16 +3487,15 @@ static bool rbd_release_lock(struct rbd_device *rbd_dev)
 	if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
 		return false;
 
-	if (!rbd_unlock(rbd_dev))
-		/*
-		 * Give others a chance to grab the lock - we would re-acquire
-		 * almost immediately if we got new IO during ceph_osdc_sync()
-		 * otherwise.  We need to ack our own notifications, so this
-		 * lock_dwork will be requeued from rbd_wait_state_locked()
-		 * after wake_requests() in rbd_handle_released_lock().
-		 */
-		cancel_delayed_work(&rbd_dev->lock_dwork);
-
+	rbd_unlock(rbd_dev);
+	/*
+	 * Give others a chance to grab the lock - we would re-acquire
+	 * almost immediately if we got new IO during ceph_osdc_sync()
+	 * otherwise.  We need to ack our own notifications, so this
+	 * lock_dwork will be requeued from rbd_wait_state_locked()
+	 * after wake_requests() in rbd_handle_released_lock().
+	 */
+	cancel_delayed_work(&rbd_dev->lock_dwork);
 	return true;
 }
 
-- 
cgit v1.2.3


From cbbfb0ff115159847121afe9c7553bd5c86f6062 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 13 Apr 2017 12:17:38 +0200
Subject: rbd: store lock cookie

In preparation for supporting set_cookie method (or rather set_cookie
fallback for older OSDs), store the lock cookie on lock and use it on
unlock instead of recalculating from rbd_dev->watch_cookie.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 423de775aabb..5f563db59820 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -387,6 +387,7 @@ struct rbd_device {
 
 	struct rw_semaphore	lock_rwsem;
 	enum rbd_lock_state	lock_state;
+	char			lock_cookie[32];
 	struct rbd_client_id	owner_cid;
 	struct work_struct	acquired_lock_work;
 	struct work_struct	released_lock_work;
@@ -3079,7 +3080,8 @@ static int rbd_lock(struct rbd_device *rbd_dev)
 	char cookie[32];
 	int ret;
 
-	WARN_ON(__rbd_is_lock_owner(rbd_dev));
+	WARN_ON(__rbd_is_lock_owner(rbd_dev) ||
+		rbd_dev->lock_cookie[0] != '\0');
 
 	format_lock_cookie(rbd_dev, cookie);
 	ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
@@ -3089,6 +3091,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
 		return ret;
 
 	rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
+	strcpy(rbd_dev->lock_cookie, cookie);
 	rbd_set_owner_cid(rbd_dev, &cid);
 	queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
 	return 0;
@@ -3100,19 +3103,19 @@ static int rbd_lock(struct rbd_device *rbd_dev)
 static void rbd_unlock(struct rbd_device *rbd_dev)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
-	char cookie[32];
 	int ret;
 
-	WARN_ON(!__rbd_is_lock_owner(rbd_dev));
+	WARN_ON(!__rbd_is_lock_owner(rbd_dev) ||
+		rbd_dev->lock_cookie[0] == '\0');
 
-	format_lock_cookie(rbd_dev, cookie);
 	ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
-			      RBD_LOCK_NAME, cookie);
+			      RBD_LOCK_NAME, rbd_dev->lock_cookie);
 	if (ret && ret != -ENOENT)
 		rbd_warn(rbd_dev, "failed to unlock: %d", ret);
 
 	/* treat errors as the image is unlocked */
 	rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
+	rbd_dev->lock_cookie[0] = '\0';
 	rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
 	queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work);
 }
-- 
cgit v1.2.3


From 14bb211d324d6c8140167bd6b2b8a80757348a2f Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 13 Apr 2017 12:17:38 +0200
Subject: rbd: support updating the lock cookie without releasing the lock

As we no longer release the lock before potentially raising BLACKLISTED
in rbd_reregister_watch(), the "either locked or blacklisted" assert in
rbd_queue_workfn() needs to go: we can be both locked and blacklisted
at that point now.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c                  | 66 ++++++++++++++++++++++--------------
 include/linux/ceph/cls_lock_client.h |  5 +++
 net/ceph/cls_lock_client.c           | 51 ++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 25 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 5f563db59820..063c8f06fb9c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3820,24 +3820,51 @@ static void rbd_unregister_watch(struct rbd_device *rbd_dev)
 	ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
 }
 
+/*
+ * lock_rwsem must be held for write
+ */
+static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
+{
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	char cookie[32];
+	int ret;
+
+	WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);
+
+	format_lock_cookie(rbd_dev, cookie);
+	ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid,
+				  &rbd_dev->header_oloc, RBD_LOCK_NAME,
+				  CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie,
+				  RBD_LOCK_TAG, cookie);
+	if (ret) {
+		if (ret != -EOPNOTSUPP)
+			rbd_warn(rbd_dev, "failed to update lock cookie: %d",
+				 ret);
+
+		/*
+		 * Lock cookie cannot be updated on older OSDs, so do
+		 * a manual release and queue an acquire.
+		 */
+		if (rbd_release_lock(rbd_dev))
+			queue_delayed_work(rbd_dev->task_wq,
+					   &rbd_dev->lock_dwork, 0);
+	} else {
+		strcpy(rbd_dev->lock_cookie, cookie);
+	}
+}
+
 static void rbd_reregister_watch(struct work_struct *work)
 {
 	struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
 					    struct rbd_device, watch_dwork);
-	bool was_lock_owner = false;
-	bool need_to_wake = false;
 	int ret;
 
 	dout("%s rbd_dev %p\n", __func__, rbd_dev);
 
-	down_write(&rbd_dev->lock_rwsem);
-	if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
-		was_lock_owner = rbd_release_lock(rbd_dev);
-
 	mutex_lock(&rbd_dev->watch_mutex);
 	if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) {
 		mutex_unlock(&rbd_dev->watch_mutex);
-		goto out;
+		return;
 	}
 
 	ret = __rbd_register_watch(rbd_dev);
@@ -3845,36 +3872,28 @@ static void rbd_reregister_watch(struct work_struct *work)
 		rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
 		if (ret == -EBLACKLISTED || ret == -ENOENT) {
 			set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
-			need_to_wake = true;
+			wake_requests(rbd_dev, true);
 		} else {
 			queue_delayed_work(rbd_dev->task_wq,
 					   &rbd_dev->watch_dwork,
 					   RBD_RETRY_DELAY);
 		}
 		mutex_unlock(&rbd_dev->watch_mutex);
-		goto out;
+		return;
 	}
 
-	need_to_wake = true;
 	rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
 	rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
 	mutex_unlock(&rbd_dev->watch_mutex);
 
+	down_write(&rbd_dev->lock_rwsem);
+	if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
+		rbd_reacquire_lock(rbd_dev);
+	up_write(&rbd_dev->lock_rwsem);
+
 	ret = rbd_dev_refresh(rbd_dev);
 	if (ret)
 		rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret);
-
-	if (was_lock_owner) {
-		ret = rbd_try_lock(rbd_dev);
-		if (ret)
-			rbd_warn(rbd_dev, "reregisteration lock failed: %d",
-				 ret);
-	}
-
-out:
-	up_write(&rbd_dev->lock_rwsem);
-	if (need_to_wake)
-		wake_requests(rbd_dev, true);
 }
 
 /*
@@ -4052,9 +4071,6 @@ static void rbd_queue_workfn(struct work_struct *work)
 		if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
 		    !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags))
 			rbd_wait_state_locked(rbd_dev);
-
-		WARN_ON((rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) ^
-			!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags));
 		if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
 			result = -EBLACKLISTED;
 			goto err_unlock;
diff --git a/include/linux/ceph/cls_lock_client.h b/include/linux/ceph/cls_lock_client.h
index 84884d8d4710..0594d3bba774 100644
--- a/include/linux/ceph/cls_lock_client.h
+++ b/include/linux/ceph/cls_lock_client.h
@@ -37,6 +37,11 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc,
 			struct ceph_object_locator *oloc,
 			char *lock_name, char *cookie,
 			struct ceph_entity_name *locker);
+int ceph_cls_set_cookie(struct ceph_osd_client *osdc,
+			struct ceph_object_id *oid,
+			struct ceph_object_locator *oloc,
+			char *lock_name, u8 type, char *old_cookie,
+			char *tag, char *new_cookie);
 
 void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers);
 
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index b9233b990399..08ada893f01e 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -179,6 +179,57 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc,
 }
 EXPORT_SYMBOL(ceph_cls_break_lock);
 
+int ceph_cls_set_cookie(struct ceph_osd_client *osdc,
+			struct ceph_object_id *oid,
+			struct ceph_object_locator *oloc,
+			char *lock_name, u8 type, char *old_cookie,
+			char *tag, char *new_cookie)
+{
+	int cookie_op_buf_size;
+	int name_len = strlen(lock_name);
+	int old_cookie_len = strlen(old_cookie);
+	int tag_len = strlen(tag);
+	int new_cookie_len = strlen(new_cookie);
+	void *p, *end;
+	struct page *cookie_op_page;
+	int ret;
+
+	cookie_op_buf_size = name_len + sizeof(__le32) +
+			     old_cookie_len + sizeof(__le32) +
+			     tag_len + sizeof(__le32) +
+			     new_cookie_len + sizeof(__le32) +
+			     sizeof(u8) + CEPH_ENCODING_START_BLK_LEN;
+	if (cookie_op_buf_size > PAGE_SIZE)
+		return -E2BIG;
+
+	cookie_op_page = alloc_page(GFP_NOIO);
+	if (!cookie_op_page)
+		return -ENOMEM;
+
+	p = page_address(cookie_op_page);
+	end = p + cookie_op_buf_size;
+
+	/* encode cls_lock_set_cookie_op struct */
+	ceph_start_encoding(&p, 1, 1,
+			    cookie_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+	ceph_encode_string(&p, end, lock_name, name_len);
+	ceph_encode_8(&p, type);
+	ceph_encode_string(&p, end, old_cookie, old_cookie_len);
+	ceph_encode_string(&p, end, tag, tag_len);
+	ceph_encode_string(&p, end, new_cookie, new_cookie_len);
+
+	dout("%s lock_name %s type %d old_cookie %s tag %s new_cookie %s\n",
+	     __func__, lock_name, type, old_cookie, tag, new_cookie);
+	ret = ceph_osdc_call(osdc, oid, oloc, "lock", "set_cookie",
+			     CEPH_OSD_FLAG_WRITE, cookie_op_page,
+			     cookie_op_buf_size, NULL, NULL);
+
+	dout("%s: status %d\n", __func__, ret);
+	__free_page(cookie_op_page);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_cls_set_cookie);
+
 void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers)
 {
 	int i;
-- 
cgit v1.2.3


From f9bebd580360c141b5fdbede9cc13a4caf23cd1a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 13 Apr 2017 12:17:39 +0200
Subject: rbd: kill rbd_is_lock_supported()

Currently the exclusive lock is acquired only if the mapping is
writable, i.e. an image HEAD mapped in rw mode.  This means that we
don't acquire the lock for executing a read from a snapshot or an image
HEAD mapped in ro mode, even if lock_on_read is set.  This is somewhat
weird and inconsistent with "no automatic exclusive lock transfers"
mode, where the lock is acquired unconditionally.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 063c8f06fb9c..e3fafaf97dee 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -478,13 +478,6 @@ static int minor_to_rbd_dev_id(int minor)
 	return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
 }
 
-static bool rbd_is_lock_supported(struct rbd_device *rbd_dev)
-{
-	return (rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) &&
-	       rbd_dev->spec->snap_id == CEPH_NOSNAP &&
-	       !rbd_dev->mapping.read_only;
-}
-
 static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
 {
 	return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED ||
@@ -4052,10 +4045,6 @@ static void rbd_queue_workfn(struct work_struct *work)
 	if (op_type != OBJ_OP_READ) {
 		snapc = rbd_dev->header.snapc;
 		ceph_get_snap_context(snapc);
-		must_be_locked = rbd_is_lock_supported(rbd_dev);
-	} else {
-		must_be_locked = rbd_dev->opts->lock_on_read &&
-					rbd_is_lock_supported(rbd_dev);
 	}
 	up_read(&rbd_dev->header_rwsem);
 
@@ -4066,6 +4055,9 @@ static void rbd_queue_workfn(struct work_struct *work)
 		goto err_rq;
 	}
 
+	must_be_locked =
+	    (rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) &&
+	    (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read);
 	if (must_be_locked) {
 		down_read(&rbd_dev->lock_rwsem);
 		if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
-- 
cgit v1.2.3


From 3b77faa0495abd07e94119681be8cc66af5e0a3b Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 13 Apr 2017 12:17:39 +0200
Subject: rbd: return ResponseMessage result from rbd_handle_request_lock()

Right now it's just 0, but "no automatic exclusive lock transfers" mode
code will need -EROFS.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 40 ++++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index e3fafaf97dee..8babb1a59a0a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3572,12 +3572,16 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
 	up_read(&rbd_dev->lock_rwsem);
 }
 
-static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
-				    void **p)
+/*
+ * Returns result for ResponseMessage to be encoded (<= 0), or 1 if no
+ * ResponseMessage is needed.
+ */
+static int rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
+				   void **p)
 {
 	struct rbd_client_id my_cid = rbd_get_cid(rbd_dev);
 	struct rbd_client_id cid = { 0 };
-	bool need_to_send;
+	int result = 1;
 
 	if (struct_v >= 2) {
 		cid.gid = ceph_decode_64(p);
@@ -3587,19 +3591,30 @@ static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
 	dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
 	     cid.handle);
 	if (rbd_cid_equal(&cid, &my_cid))
-		return false;
+		return result;
 
 	down_read(&rbd_dev->lock_rwsem);
-	need_to_send = __rbd_is_lock_owner(rbd_dev);
-	if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
-		if (!rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid)) {
+	if (__rbd_is_lock_owner(rbd_dev)) {
+		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED &&
+		    rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid))
+			goto out_unlock;
+
+		/*
+		 * encode ResponseMessage(0) so the peer can detect
+		 * a missing owner
+		 */
+		result = 0;
+
+		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
 			dout("%s rbd_dev %p queueing unlock_work\n", __func__,
 			     rbd_dev);
 			queue_work(rbd_dev->task_wq, &rbd_dev->unlock_work);
 		}
 	}
+
+out_unlock:
 	up_read(&rbd_dev->lock_rwsem);
-	return need_to_send;
+	return result;
 }
 
 static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev,
@@ -3682,13 +3697,10 @@ static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie,
 		rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
 		break;
 	case RBD_NOTIFY_OP_REQUEST_LOCK:
-		if (rbd_handle_request_lock(rbd_dev, struct_v, &p))
-			/*
-			 * send ResponseMessage(0) back so the client
-			 * can detect a missing owner
-			 */
+		ret = rbd_handle_request_lock(rbd_dev, struct_v, &p);
+		if (ret <= 0)
 			rbd_acknowledge_notify_result(rbd_dev, notify_id,
-						      cookie, 0);
+						      cookie, ret);
 		else
 			rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
 		break;
-- 
cgit v1.2.3


From e010dd0ada619ed6d3411de7371fba12c1baa48b Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 13 Apr 2017 12:17:39 +0200
Subject: rbd: exclusive map option

Support disabling automatic exclusive lock transfers to allow users
to be in charge of which node should own the lock while being able to
reuse exclusive lock's built-in blacklist/break-lock functionality.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 73 insertions(+), 10 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8babb1a59a0a..3402ff7414c5 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -798,6 +798,7 @@ enum {
 	Opt_read_only,
 	Opt_read_write,
 	Opt_lock_on_read,
+	Opt_exclusive,
 	Opt_err
 };
 
@@ -810,6 +811,7 @@ static match_table_t rbd_opts_tokens = {
 	{Opt_read_write, "read_write"},
 	{Opt_read_write, "rw"},		/* Alternate spelling */
 	{Opt_lock_on_read, "lock_on_read"},
+	{Opt_exclusive, "exclusive"},
 	{Opt_err, NULL}
 };
 
@@ -817,11 +819,13 @@ struct rbd_options {
 	int	queue_depth;
 	bool	read_only;
 	bool	lock_on_read;
+	bool	exclusive;
 };
 
 #define RBD_QUEUE_DEPTH_DEFAULT	BLKDEV_MAX_RQ
 #define RBD_READ_ONLY_DEFAULT	false
 #define RBD_LOCK_ON_READ_DEFAULT false
+#define RBD_EXCLUSIVE_DEFAULT	false
 
 static int parse_rbd_opts_token(char *c, void *private)
 {
@@ -860,6 +864,9 @@ static int parse_rbd_opts_token(char *c, void *private)
 	case Opt_lock_on_read:
 		rbd_opts->lock_on_read = true;
 		break;
+	case Opt_exclusive:
+		rbd_opts->exclusive = true;
+		break;
 	default:
 		/* libceph prints "bad option" msg */
 		return -EINVAL;
@@ -3440,6 +3447,18 @@ again:
 	ret = rbd_request_lock(rbd_dev);
 	if (ret == -ETIMEDOUT) {
 		goto again; /* treat this as a dead client */
+	} else if (ret == -EROFS) {
+		rbd_warn(rbd_dev, "peer will not release lock");
+		/*
+		 * If this is rbd_add_acquire_lock(), we want to fail
+		 * immediately -- reuse BLACKLISTED flag.  Otherwise we
+		 * want to block.
+		 */
+		if (!(rbd_dev->disk->flags & GENHD_FL_UP)) {
+			set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
+			/* wake "rbd map --exclusive" process */
+			wake_requests(rbd_dev, false);
+		}
 	} else if (ret < 0) {
 		rbd_warn(rbd_dev, "error requesting lock: %d", ret);
 		mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
@@ -3606,9 +3625,15 @@ static int rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
 		result = 0;
 
 		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
-			dout("%s rbd_dev %p queueing unlock_work\n", __func__,
-			     rbd_dev);
-			queue_work(rbd_dev->task_wq, &rbd_dev->unlock_work);
+			if (!rbd_dev->opts->exclusive) {
+				dout("%s rbd_dev %p queueing unlock_work\n",
+				     __func__, rbd_dev);
+				queue_work(rbd_dev->task_wq,
+					   &rbd_dev->unlock_work);
+			} else {
+				/* refuse to release the lock */
+				result = -EROFS;
+			}
 		}
 	}
 
@@ -4073,8 +4098,14 @@ static void rbd_queue_workfn(struct work_struct *work)
 	if (must_be_locked) {
 		down_read(&rbd_dev->lock_rwsem);
 		if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
-		    !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags))
+		    !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
+			if (rbd_dev->opts->exclusive) {
+				rbd_warn(rbd_dev, "exclusive lock required");
+				result = -EROFS;
+				goto err_unlock;
+			}
 			rbd_wait_state_locked(rbd_dev);
+		}
 		if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
 			result = -EBLACKLISTED;
 			goto err_unlock;
@@ -5640,6 +5671,7 @@ static int rbd_add_parse_args(const char *buf,
 	rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
 	rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
 	rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
+	rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
 
 	copts = ceph_parse_options(options, mon_addrs,
 					mon_addrs + mon_addrs_size - 1,
@@ -5698,6 +5730,33 @@ again:
 	return ret;
 }
 
+static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
+{
+	down_write(&rbd_dev->lock_rwsem);
+	if (__rbd_is_lock_owner(rbd_dev))
+		rbd_unlock(rbd_dev);
+	up_write(&rbd_dev->lock_rwsem);
+}
+
+static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
+{
+	if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
+		rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
+		return -EINVAL;
+	}
+
+	/* FIXME: "rbd map --exclusive" should be in interruptible */
+	down_read(&rbd_dev->lock_rwsem);
+	rbd_wait_state_locked(rbd_dev);
+	up_read(&rbd_dev->lock_rwsem);
+	if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
+		rbd_warn(rbd_dev, "failed to acquire exclusive lock");
+		return -EROFS;
+	}
+
+	return 0;
+}
+
 /*
  * An rbd format 2 image has a unique identifier, distinct from the
  * name given to it by the user.  Internally, that identifier is
@@ -6141,11 +6200,17 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 	if (rc)
 		goto err_out_image_probe;
 
+	if (rbd_dev->opts->exclusive) {
+		rc = rbd_add_acquire_lock(rbd_dev);
+		if (rc)
+			goto err_out_device_setup;
+	}
+
 	/* Everything's ready.  Announce the disk to the world. */
 
 	rc = device_add(&rbd_dev->dev);
 	if (rc)
-		goto err_out_device_setup;
+		goto err_out_image_lock;
 
 	add_disk(rbd_dev->disk);
 	/* see rbd_init_disk() */
@@ -6163,6 +6228,8 @@ out:
 	module_put(THIS_MODULE);
 	return rc;
 
+err_out_image_lock:
+	rbd_dev_image_unlock(rbd_dev);
 err_out_device_setup:
 	rbd_dev_device_release(rbd_dev);
 err_out_image_probe:
@@ -6286,11 +6353,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
 	spin_unlock(&rbd_dev_list_lock);
 	device_del(&rbd_dev->dev);
 
-	down_write(&rbd_dev->lock_rwsem);
-	if (__rbd_is_lock_owner(rbd_dev))
-		rbd_unlock(rbd_dev);
-	up_write(&rbd_dev->lock_rwsem);
-
+	rbd_dev_image_unlock(rbd_dev);
 	rbd_dev_device_release(rbd_dev);
 	rbd_dev_image_release(rbd_dev);
 	rbd_dev_destroy(rbd_dev);
-- 
cgit v1.2.3


From b50c2de51e611da90cf3cf04c058f7e9bbe79e93 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Mon, 24 Apr 2017 11:56:50 +0800
Subject: ceph: choose readdir frag based on previous readdir reply

The dirfragtree is lazily updated, it's not always accurate. Infinite
loops happens in following circumstance.

- client send request to read frag A
- frag A has been fragmented into frag B and C. So mds fills the reply
  with contents of frag B
- client wants to read next frag C. ceph_choose_frag(frag value of C)
  return frag A.

The fix is using previous readdir reply to calculate next readdir frag
when possible.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/dir.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ae61cdf7d489..e071d23f6148 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -294,7 +294,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	int i;
 	int err;
-	u32 ftype;
+	unsigned frag = -1;
 	struct ceph_mds_reply_info_parsed *rinfo;
 
 	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
@@ -341,7 +341,6 @@ more:
 	/* do we have the correct frag content buffered? */
 	if (need_send_readdir(fi, ctx->pos)) {
 		struct ceph_mds_request *req;
-		unsigned frag;
 		int op = ceph_snap(inode) == CEPH_SNAPDIR ?
 			CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
 
@@ -352,8 +351,11 @@ more:
 		}
 
 		if (is_hash_order(ctx->pos)) {
-			frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
-						NULL, NULL);
+			/* fragtree isn't always accurate. choose frag
+			 * based on previous reply when possible. */
+			if (frag == (unsigned)-1)
+				frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
+							NULL, NULL);
 		} else {
 			frag = fpos_frag(ctx->pos);
 		}
@@ -480,6 +482,7 @@ more:
 		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
 		struct ceph_vino vino;
 		ino_t ino;
+		u32 ftype;
 
 		BUG_ON(rde->offset < ctx->pos);
 
@@ -502,15 +505,17 @@ more:
 		ctx->pos++;
 	}
 
+	ceph_mdsc_put_request(fi->last_readdir);
+	fi->last_readdir = NULL;
+
 	if (fi->next_offset > 2) {
-		ceph_mdsc_put_request(fi->last_readdir);
-		fi->last_readdir = NULL;
+		frag = fi->frag;
 		goto more;
 	}
 
 	/* more frags? */
 	if (!ceph_frag_is_rightmost(fi->frag)) {
-		unsigned frag = ceph_frag_next(fi->frag);
+		frag = ceph_frag_next(fi->frag);
 		if (is_hash_order(ctx->pos)) {
 			loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
 							fi->next_offset, true);
-- 
cgit v1.2.3


From f775ff7d89f33fc9ba63f6f70df3bcc98c2d9828 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 27 Apr 2017 18:34:00 +0200
Subject: ceph: fix file open flags on ppc64

The file open flags (O_foo) are platform specific and should never go
out to an interface that is not local to the system.

Unfortunately these flags have leaked out onto the wire in the cephfs
implementation. That lead to bogus flags getting transmitted on ppc64.

This patch converts the kernel view of flags to the ceph view of file
open flags.

Fixes: 124e68e74 ("ceph: file operations")
Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/file.c               | 34 +++++++++++++++++++++++++++++++++-
 include/linux/ceph/ceph_fs.h | 12 ++++++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 39866d6a34b6..9d2eeed9e323 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -13,6 +13,38 @@
 #include "mds_client.h"
 #include "cache.h"
 
+static __le32 ceph_flags_sys2wire(u32 flags)
+{
+	u32 wire_flags = 0;
+
+	switch (flags & O_ACCMODE) {
+	case O_RDONLY:
+		wire_flags |= CEPH_O_RDONLY;
+		break;
+	case O_WRONLY:
+		wire_flags |= CEPH_O_WRONLY;
+		break;
+	case O_RDWR:
+		wire_flags |= CEPH_O_RDWR;
+		break;
+	}
+
+#define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; }
+
+	ceph_sys2wire(O_CREAT);
+	ceph_sys2wire(O_EXCL);
+	ceph_sys2wire(O_TRUNC);
+	ceph_sys2wire(O_DIRECTORY);
+	ceph_sys2wire(O_NOFOLLOW);
+
+#undef ceph_sys2wire
+
+	if (flags)
+		dout("unused open flags: %x", flags);
+
+	return cpu_to_le32(wire_flags);
+}
+
 /*
  * Ceph file operations
  *
@@ -123,7 +155,7 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode)
 	if (IS_ERR(req))
 		goto out;
 	req->r_fmode = ceph_flags_to_mode(flags);
-	req->r_args.open.flags = cpu_to_le32(flags);
+	req->r_args.open.flags = ceph_flags_sys2wire(flags);
 	req->r_args.open.mode = cpu_to_le32(create_mode);
 out:
 	return req;
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 1787e4a8e251..ad078ebe25d6 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -367,6 +367,18 @@ extern const char *ceph_mds_op_name(int op);
 #define CEPH_READDIR_HASH_ORDER		(1<<9)
 #define CEPH_READDIR_OFFSET_HASH	(1<<10)
 
+/*
+ * open request flags
+ */
+#define CEPH_O_RDONLY		00000000
+#define CEPH_O_WRONLY		00000001
+#define CEPH_O_RDWR		00000002
+#define CEPH_O_CREAT		00000100
+#define CEPH_O_EXCL		00000200
+#define CEPH_O_TRUNC		00001000
+#define CEPH_O_DIRECTORY	00200000
+#define CEPH_O_NOFOLLOW		00400000
+
 union ceph_mds_request_args {
 	struct {
 		__le32 mask;                 /* CEPH_CAP_* */
-- 
cgit v1.2.3


From eeca958dce0a9231d1969f86196653eb50fcc9b3 Mon Sep 17 00:00:00 2001
From: Luis Henriques <lhenriques@suse.com>
Date: Fri, 28 Apr 2017 11:14:04 +0100
Subject: ceph: fix memory leak in __ceph_setxattr()

The ceph_inode_xattr needs to be released when removing an xattr.  Easily
reproducible running the 'generic/020' test from xfstests or simply by
doing:

  attr -s attr0 -V 0 /mnt/test && attr -r attr0 /mnt/test

While there, also fix the error path.

Here's the kmemleak splat:

unreferenced object 0xffff88001f86fbc0 (size 64):
  comm "attr", pid 244, jiffies 4294904246 (age 98.464s)
  hex dump (first 32 bytes):
    40 fa 86 1f 00 88 ff ff 80 32 38 1f 00 88 ff ff  @........28.....
    00 01 00 00 00 00 ad de 00 02 00 00 00 00 ad de  ................
  backtrace:
    [<ffffffff81560199>] kmemleak_alloc+0x49/0xa0
    [<ffffffff810f3e5b>] kmem_cache_alloc+0x9b/0xf0
    [<ffffffff812b157e>] __ceph_setxattr+0x17e/0x820
    [<ffffffff812b1c57>] ceph_set_xattr_handler+0x37/0x40
    [<ffffffff8111fb4b>] __vfs_removexattr+0x4b/0x60
    [<ffffffff8111fd37>] vfs_removexattr+0x77/0xd0
    [<ffffffff8111fdd1>] removexattr+0x41/0x60
    [<ffffffff8111fe65>] path_removexattr+0x75/0xa0
    [<ffffffff81120aeb>] SyS_lremovexattr+0xb/0x10
    [<ffffffff81564b20>] entry_SYSCALL_64_fastpath+0x13/0x94
    [<ffffffffffffffff>] 0xffffffffffffffff

Cc: stable@vger.kernel.org
Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/xattr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index febc28f9e2c2..75267cdd5dfd 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -392,6 +392,7 @@ static int __set_xattr(struct ceph_inode_info *ci,
 
 	if (update_xattr) {
 		int err = 0;
+
 		if (xattr && (flags & XATTR_CREATE))
 			err = -EEXIST;
 		else if (!xattr && (flags & XATTR_REPLACE))
@@ -399,12 +400,14 @@ static int __set_xattr(struct ceph_inode_info *ci,
 		if (err) {
 			kfree(name);
 			kfree(val);
+			kfree(*newxattr);
 			return err;
 		}
 		if (update_xattr < 0) {
 			if (xattr)
 				__remove_xattr(ci, xattr);
 			kfree(name);
+			kfree(*newxattr);
 			return 0;
 		}
 	}
-- 
cgit v1.2.3


From 44f0aeec203738bf34f4b7e16b745c8c71fe0f06 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 2 Apr 2017 16:50:33 +0200
Subject: dmaengine: pl080: Cut some unused defines

There is no in-kernel code using these indexed register
defines, and their offsets are clearly defined right below.
Cut them.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/amba/pl080.h | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h
index 91b84a7f0539..800429f4b997 100644
--- a/include/linux/amba/pl080.h
+++ b/include/linux/amba/pl080.h
@@ -46,16 +46,8 @@
 
 /* Per channel configuration registers */
 
-#define PL080_Cx_STRIDE				(0x20)
+/* Per channel configuration registers */
 #define PL080_Cx_BASE(x)			((0x100 + (x * 0x20)))
-#define PL080_Cx_SRC_ADDR(x)			((0x100 + (x * 0x20)))
-#define PL080_Cx_DST_ADDR(x)			((0x104 + (x * 0x20)))
-#define PL080_Cx_LLI(x)				((0x108 + (x * 0x20)))
-#define PL080_Cx_CONTROL(x)			((0x10C + (x * 0x20)))
-#define PL080_Cx_CONFIG(x)			((0x110 + (x * 0x20)))
-#define PL080S_Cx_CONTROL2(x)			((0x110 + (x * 0x20)))
-#define PL080S_Cx_CONFIG(x)			((0x114 + (x * 0x20)))
-
 #define PL080_CH_SRC_ADDR			(0x00)
 #define PL080_CH_DST_ADDR			(0x04)
 #define PL080_CH_LLI				(0x08)
-- 
cgit v1.2.3


From da7cbd2098a7c943ac919297d896682222a306dc Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 2 Apr 2017 16:50:44 +0200
Subject: dmaengine: pl080: Fix some missing kerneldoc

Two elements of the physical channel description was missing.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/amba-pl08x.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 0b7c6ce629a6..8624598530f3 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -106,6 +106,7 @@ struct pl08x_driver_data;
 
 /**
  * struct vendor_data - vendor-specific config parameters for PL08x derivatives
+ * @config_offset: offset to the configuration register
  * @channels: the number of channels available in this variant
  * @signals: the number of request signals available from the hardware
  * @dualmaster: whether this version supports dual AHB masters or not.
@@ -145,6 +146,8 @@ struct pl08x_bus_data {
 /**
  * struct pl08x_phy_chan - holder for the physical channels
  * @id: physical index to this channel
+ * @base: memory base address for this physical channel
+ * @reg_config: configuration address for this physical channel
  * @lock: a lock to use when altering an instance of this struct
  * @serving: the virtual channel currently being served by this physical
  * channel
@@ -226,7 +229,7 @@ enum pl08x_dma_chan_state {
  * @phychan: the physical channel utilized by this channel, if there is one
  * @name: name of channel
  * @cd: channel platform data
- * @runtime_addr: address for RX/TX according to the runtime config
+ * @cfg: slave configuration
  * @at: active transaction on this channel
  * @lock: a lock for this channel data
  * @host: a pointer to the host (internal use)
@@ -262,7 +265,7 @@ struct pl08x_dma_chan {
  * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI
  * fetches
  * @mem_buses: set to indicate memory transfers on AHB2.
- * @lock: a spinlock for this struct
+ * @lli_words: how many words are used in each LLI item for this variant
  */
 struct pl08x_driver_data {
 	struct dma_device slave;
-- 
cgit v1.2.3


From ded091fee6806b7120f475d89c151d611758a395 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 2 Apr 2017 16:50:53 +0200
Subject: dmaengine: pl08x: Use the BIT() macro consistently

This makes the driver shift bits with BIT() which is used on other
places in the driver.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/amba-pl08x.c   | 10 +++++-----
 include/linux/amba/pl080.h | 40 ++++++++++++++++++++--------------------
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 8624598530f3..286114ebbbb9 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -420,7 +420,7 @@ static void pl08x_start_next_txd(struct pl08x_dma_chan *plchan)
 
 	/* Enable the DMA channel */
 	/* Do not access config register until channel shows as disabled */
-	while (readl(pl08x->base + PL080_EN_CHAN) & (1 << phychan->id))
+	while (readl(pl08x->base + PL080_EN_CHAN) & BIT(phychan->id))
 		cpu_relax();
 
 	/* Do not access config register until channel shows as inactive */
@@ -487,8 +487,8 @@ static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
 
 	writel(val, ch->reg_config);
 
-	writel(1 << ch->id, pl08x->base + PL080_ERR_CLEAR);
-	writel(1 << ch->id, pl08x->base + PL080_TC_CLEAR);
+	writel(BIT(ch->id), pl08x->base + PL080_ERR_CLEAR);
+	writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
 }
 
 static inline u32 get_bytes_in_cctl(u32 cctl)
@@ -1837,7 +1837,7 @@ static irqreturn_t pl08x_irq(int irq, void *dev)
 		return IRQ_NONE;
 
 	for (i = 0; i < pl08x->vd->channels; i++) {
-		if (((1 << i) & err) || ((1 << i) & tc)) {
+		if ((BIT(i) & err) || (BIT(i) & tc)) {
 			/* Locate physical channel */
 			struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i];
 			struct pl08x_dma_chan *plchan = phychan->serving;
@@ -1875,7 +1875,7 @@ static irqreturn_t pl08x_irq(int irq, void *dev)
 			}
 			spin_unlock(&plchan->vc.lock);
 
-			mask |= (1 << i);
+			mask |= BIT(i);
 		}
 	}
 
diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h
index 800429f4b997..580b5323a717 100644
--- a/include/linux/amba/pl080.h
+++ b/include/linux/amba/pl080.h
@@ -38,9 +38,9 @@
 #define PL080_SOFT_LSREQ			(0x2C)
 
 #define PL080_CONFIG				(0x30)
-#define PL080_CONFIG_M2_BE			(1 << 2)
-#define PL080_CONFIG_M1_BE			(1 << 1)
-#define PL080_CONFIG_ENABLE			(1 << 0)
+#define PL080_CONFIG_M2_BE			BIT(2)
+#define PL080_CONFIG_M1_BE			BIT(1)
+#define PL080_CONFIG_ENABLE			BIT(0)
 
 #define PL080_SYNC				(0x34)
 
@@ -58,18 +58,18 @@
 
 #define PL080_LLI_ADDR_MASK			(0x3fffffff << 2)
 #define PL080_LLI_ADDR_SHIFT			(2)
-#define PL080_LLI_LM_AHB2			(1 << 0)
+#define PL080_LLI_LM_AHB2			BIT(0)
 
-#define PL080_CONTROL_TC_IRQ_EN			(1 << 31)
+#define PL080_CONTROL_TC_IRQ_EN			BIT(31)
 #define PL080_CONTROL_PROT_MASK			(0x7 << 28)
 #define PL080_CONTROL_PROT_SHIFT		(28)
-#define PL080_CONTROL_PROT_CACHE		(1 << 30)
-#define PL080_CONTROL_PROT_BUFF			(1 << 29)
-#define PL080_CONTROL_PROT_SYS			(1 << 28)
-#define PL080_CONTROL_DST_INCR			(1 << 27)
-#define PL080_CONTROL_SRC_INCR			(1 << 26)
-#define PL080_CONTROL_DST_AHB2			(1 << 25)
-#define PL080_CONTROL_SRC_AHB2			(1 << 24)
+#define PL080_CONTROL_PROT_CACHE		BIT(30)
+#define PL080_CONTROL_PROT_BUFF			BIT(29)
+#define PL080_CONTROL_PROT_SYS			BIT(28)
+#define PL080_CONTROL_DST_INCR			BIT(27)
+#define PL080_CONTROL_SRC_INCR			BIT(26)
+#define PL080_CONTROL_DST_AHB2			BIT(25)
+#define PL080_CONTROL_SRC_AHB2			BIT(24)
 #define PL080_CONTROL_DWIDTH_MASK		(0x7 << 21)
 #define PL080_CONTROL_DWIDTH_SHIFT		(21)
 #define PL080_CONTROL_SWIDTH_MASK		(0x7 << 18)
@@ -95,20 +95,20 @@
 #define PL080_WIDTH_16BIT			(0x1)
 #define PL080_WIDTH_32BIT			(0x2)
 
-#define PL080N_CONFIG_ITPROT			(1 << 20)
-#define PL080N_CONFIG_SECPROT			(1 << 19)
-#define PL080_CONFIG_HALT			(1 << 18)
-#define PL080_CONFIG_ACTIVE			(1 << 17)  /* RO */
-#define PL080_CONFIG_LOCK			(1 << 16)
-#define PL080_CONFIG_TC_IRQ_MASK		(1 << 15)
-#define PL080_CONFIG_ERR_IRQ_MASK		(1 << 14)
+#define PL080N_CONFIG_ITPROT			BIT(20)
+#define PL080N_CONFIG_SECPROT			BIT(19)
+#define PL080_CONFIG_HALT			BIT(18)
+#define PL080_CONFIG_ACTIVE			BIT(17)  /* RO */
+#define PL080_CONFIG_LOCK			BIT(16)
+#define PL080_CONFIG_TC_IRQ_MASK		BIT(15)
+#define PL080_CONFIG_ERR_IRQ_MASK		BIT(14)
 #define PL080_CONFIG_FLOW_CONTROL_MASK		(0x7 << 11)
 #define PL080_CONFIG_FLOW_CONTROL_SHIFT		(11)
 #define PL080_CONFIG_DST_SEL_MASK		(0xf << 6)
 #define PL080_CONFIG_DST_SEL_SHIFT		(6)
 #define PL080_CONFIG_SRC_SEL_MASK		(0xf << 1)
 #define PL080_CONFIG_SRC_SEL_SHIFT		(1)
-#define PL080_CONFIG_ENABLE			(1 << 0)
+#define PL080_CONFIG_ENABLE			BIT(0)
 
 #define PL080_FLOW_MEM2MEM			(0x0)
 #define PL080_FLOW_MEM2PER			(0x1)
-- 
cgit v1.2.3


From 8ee1bdc5a46cf818e2192c11bc1294a7dbad36c8 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Mon, 3 Apr 2017 09:02:36 +0530
Subject: dmaengine: pl08x: fix pl08x_dma_chan_state documentation

Documentation for pl08x_dma_chan_state mentions it as struct whereas it is a
enum, so fix that

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/amba-pl08x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 286114ebbbb9..0e03f0fdacac 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -206,7 +206,7 @@ struct pl08x_txd {
 };
 
 /**
- * struct pl08x_dma_chan_state - holds the PL08x specific virtual channel
+ * enum pl08x_dma_chan_state - holds the PL08x specific virtual channel
  * states
  * @PL08X_CHAN_IDLE: the channel is idle
  * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport
-- 
cgit v1.2.3


From 417cb972537af7994b9123c226a35aac26369f9d Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Mon, 3 Apr 2017 09:04:28 +0530
Subject: dmaengine: pl08x: remove lock documentation

lock variable in pl08x_dma_chan_state no longer exists so remove it

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/amba-pl08x.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 0e03f0fdacac..6bb8813ca275 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -231,7 +231,6 @@ enum pl08x_dma_chan_state {
  * @cd: channel platform data
  * @cfg: slave configuration
  * @at: active transaction on this channel
- * @lock: a lock for this channel data
  * @host: a pointer to the host (internal use)
  * @state: whether the channel is idle, paused, running etc
  * @slave: whether this channel is a device (slave) or for memcpy
-- 
cgit v1.2.3


From 35d2d5d490e2dc98ec07f899577b2a5451f413e8 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Thu, 4 May 2017 13:54:17 +0200
Subject: KVM: arm/arm64: Move shared files to virt/kvm/arm

For some time now we have been having a lot of shared functionality
between the arm and arm64 KVM support in arch/arm, which not only
required a horrible inter-arch reference from the Makefile in
arch/arm64/kvm, but also created confusion for newcomers to the code
base, as was recently seen on the mailing list.

Further, it causes confusion for things like cscope, which needs special
attention to index specific shared files for arm64 from the arm tree.

Move the shared files into virt/kvm/arm and move the trace points along
with it.  When moving the tracepoints we have to modify the way the vgic
creates definitions of the trace points, so we take the chance to
include the VGIC tracepoints in its very own special vgic trace.h file.

Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 arch/arm/kvm/Makefile     |    7 +-
 arch/arm/kvm/arm.c        | 1480 ----------------------------------
 arch/arm/kvm/mmio.c       |  217 -----
 arch/arm/kvm/mmu.c        | 1958 ---------------------------------------------
 arch/arm/kvm/perf.c       |   68 --
 arch/arm/kvm/psci.c       |  332 --------
 arch/arm/kvm/trace.h      |  247 ------
 arch/arm64/kvm/Makefile   |    5 +-
 virt/kvm/arm/arm.c        | 1480 ++++++++++++++++++++++++++++++++++
 virt/kvm/arm/mmio.c       |  217 +++++
 virt/kvm/arm/mmu.c        | 1958 +++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/perf.c       |   68 ++
 virt/kvm/arm/psci.c       |  332 ++++++++
 virt/kvm/arm/trace.h      |  246 +++++-
 virt/kvm/arm/vgic/trace.h |   37 +
 virt/kvm/arm/vgic/vgic.c  |    2 +-
 16 files changed, 4335 insertions(+), 4319 deletions(-)
 delete mode 100644 arch/arm/kvm/arm.c
 delete mode 100644 arch/arm/kvm/mmio.c
 delete mode 100644 arch/arm/kvm/mmu.c
 delete mode 100644 arch/arm/kvm/perf.c
 delete mode 100644 arch/arm/kvm/psci.c
 create mode 100644 virt/kvm/arm/arm.c
 create mode 100644 virt/kvm/arm/mmio.c
 create mode 100644 virt/kvm/arm/mmu.c
 create mode 100644 virt/kvm/arm/perf.c
 create mode 100644 virt/kvm/arm/psci.c
 create mode 100644 virt/kvm/arm/vgic/trace.h

diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 7b3670c2ae7b..d9beee652d36 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -18,9 +18,12 @@ KVM := ../../../virt/kvm
 kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
 
 obj-$(CONFIG_KVM_ARM_HOST) += hyp/
+
 obj-y += kvm-arm.o init.o interrupts.o
-obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o vgic-v3-coproc.o
+obj-y += handle_exit.o guest.o emulate.o reset.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o   vgic-v3-coproc.o
+obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
+obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
 obj-y += $(KVM)/arm/aarch32.o
 
 obj-y += $(KVM)/arm/vgic/vgic.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
deleted file mode 100644
index 7941699a766d..000000000000
--- a/arch/arm/kvm/arm.c
+++ /dev/null
@@ -1,1480 +0,0 @@
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#include <linux/cpu_pm.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/kvm_host.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/mman.h>
-#include <linux/sched.h>
-#include <linux/kvm.h>
-#include <trace/events/kvm.h>
-#include <kvm/arm_pmu.h>
-
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
-#include <linux/uaccess.h>
-#include <asm/ptrace.h>
-#include <asm/mman.h>
-#include <asm/tlbflush.h>
-#include <asm/cacheflush.h>
-#include <asm/virt.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_coproc.h>
-#include <asm/kvm_psci.h>
-#include <asm/sections.h>
-
-#ifdef REQUIRES_VIRT
-__asm__(".arch_extension	virt");
-#endif
-
-static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
-
-/* Per-CPU variable containing the currently running vcpu. */
-static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
-
-/* The VMID used in the VTTBR */
-static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
-static u32 kvm_next_vmid;
-static unsigned int kvm_vmid_bits __read_mostly;
-static DEFINE_SPINLOCK(kvm_vmid_lock);
-
-static bool vgic_present;
-
-static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
-
-static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
-{
-	BUG_ON(preemptible());
-	__this_cpu_write(kvm_arm_running_vcpu, vcpu);
-}
-
-/**
- * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
- * Must be called from non-preemptible context
- */
-struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
-{
-	BUG_ON(preemptible());
-	return __this_cpu_read(kvm_arm_running_vcpu);
-}
-
-/**
- * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
- */
-struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
-{
-	return &kvm_arm_running_vcpu;
-}
-
-int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
-{
-	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
-}
-
-int kvm_arch_hardware_setup(void)
-{
-	return 0;
-}
-
-void kvm_arch_check_processor_compat(void *rtn)
-{
-	*(int *)rtn = 0;
-}
-
-
-/**
- * kvm_arch_init_vm - initializes a VM data structure
- * @kvm:	pointer to the KVM struct
- */
-int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
-{
-	int ret, cpu;
-
-	if (type)
-		return -EINVAL;
-
-	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
-	if (!kvm->arch.last_vcpu_ran)
-		return -ENOMEM;
-
-	for_each_possible_cpu(cpu)
-		*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
-
-	ret = kvm_alloc_stage2_pgd(kvm);
-	if (ret)
-		goto out_fail_alloc;
-
-	ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
-	if (ret)
-		goto out_free_stage2_pgd;
-
-	kvm_vgic_early_init(kvm);
-
-	/* Mark the initial VMID generation invalid */
-	kvm->arch.vmid_gen = 0;
-
-	/* The maximum number of VCPUs is limited by the host's GIC model */
-	kvm->arch.max_vcpus = vgic_present ?
-				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
-
-	return ret;
-out_free_stage2_pgd:
-	kvm_free_stage2_pgd(kvm);
-out_fail_alloc:
-	free_percpu(kvm->arch.last_vcpu_ran);
-	kvm->arch.last_vcpu_ran = NULL;
-	return ret;
-}
-
-bool kvm_arch_has_vcpu_debugfs(void)
-{
-	return false;
-}
-
-int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
-{
-	return VM_FAULT_SIGBUS;
-}
-
-
-/**
- * kvm_arch_destroy_vm - destroy the VM data structure
- * @kvm:	pointer to the KVM struct
- */
-void kvm_arch_destroy_vm(struct kvm *kvm)
-{
-	int i;
-
-	free_percpu(kvm->arch.last_vcpu_ran);
-	kvm->arch.last_vcpu_ran = NULL;
-
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		if (kvm->vcpus[i]) {
-			kvm_arch_vcpu_free(kvm->vcpus[i]);
-			kvm->vcpus[i] = NULL;
-		}
-	}
-
-	kvm_vgic_destroy(kvm);
-}
-
-int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
-{
-	int r;
-	switch (ext) {
-	case KVM_CAP_IRQCHIP:
-		r = vgic_present;
-		break;
-	case KVM_CAP_IOEVENTFD:
-	case KVM_CAP_DEVICE_CTRL:
-	case KVM_CAP_USER_MEMORY:
-	case KVM_CAP_SYNC_MMU:
-	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
-	case KVM_CAP_ONE_REG:
-	case KVM_CAP_ARM_PSCI:
-	case KVM_CAP_ARM_PSCI_0_2:
-	case KVM_CAP_READONLY_MEM:
-	case KVM_CAP_MP_STATE:
-	case KVM_CAP_IMMEDIATE_EXIT:
-		r = 1;
-		break;
-	case KVM_CAP_COALESCED_MMIO:
-		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
-		break;
-	case KVM_CAP_ARM_SET_DEVICE_ADDR:
-		r = 1;
-		break;
-	case KVM_CAP_NR_VCPUS:
-		r = num_online_cpus();
-		break;
-	case KVM_CAP_MAX_VCPUS:
-		r = KVM_MAX_VCPUS;
-		break;
-	case KVM_CAP_NR_MEMSLOTS:
-		r = KVM_USER_MEM_SLOTS;
-		break;
-	case KVM_CAP_MSI_DEVID:
-		if (!kvm)
-			r = -EINVAL;
-		else
-			r = kvm->arch.vgic.msis_require_devid;
-		break;
-	case KVM_CAP_ARM_USER_IRQ:
-		/*
-		 * 1: EL1_VTIMER, EL1_PTIMER, and PMU.
-		 * (bump this number if adding more devices)
-		 */
-		r = 1;
-		break;
-	default:
-		r = kvm_arch_dev_ioctl_check_extension(kvm, ext);
-		break;
-	}
-	return r;
-}
-
-long kvm_arch_dev_ioctl(struct file *filp,
-			unsigned int ioctl, unsigned long arg)
-{
-	return -EINVAL;
-}
-
-
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
-{
-	int err;
-	struct kvm_vcpu *vcpu;
-
-	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
-		err = -EBUSY;
-		goto out;
-	}
-
-	if (id >= kvm->arch.max_vcpus) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-	if (!vcpu) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	err = kvm_vcpu_init(vcpu, kvm, id);
-	if (err)
-		goto free_vcpu;
-
-	err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
-	if (err)
-		goto vcpu_uninit;
-
-	return vcpu;
-vcpu_uninit:
-	kvm_vcpu_uninit(vcpu);
-free_vcpu:
-	kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
-	return ERR_PTR(err);
-}
-
-void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
-{
-	kvm_vgic_vcpu_early_init(vcpu);
-}
-
-void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
-{
-	kvm_mmu_free_memory_caches(vcpu);
-	kvm_timer_vcpu_terminate(vcpu);
-	kvm_vgic_vcpu_destroy(vcpu);
-	kvm_pmu_vcpu_destroy(vcpu);
-	kvm_vcpu_uninit(vcpu);
-	kmem_cache_free(kvm_vcpu_cache, vcpu);
-}
-
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-	kvm_arch_vcpu_free(vcpu);
-}
-
-int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-	return kvm_timer_should_fire(vcpu_vtimer(vcpu)) ||
-	       kvm_timer_should_fire(vcpu_ptimer(vcpu));
-}
-
-void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
-{
-	kvm_timer_schedule(vcpu);
-}
-
-void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
-{
-	kvm_timer_unschedule(vcpu);
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	/* Force users to call KVM_ARM_VCPU_INIT */
-	vcpu->arch.target = -1;
-	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-	/* Set up the timer */
-	kvm_timer_vcpu_init(vcpu);
-
-	kvm_arm_reset_debug_ptr(vcpu);
-
-	return 0;
-}
-
-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-	int *last_ran;
-
-	last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
-
-	/*
-	 * We might get preempted before the vCPU actually runs, but
-	 * over-invalidation doesn't affect correctness.
-	 */
-	if (*last_ran != vcpu->vcpu_id) {
-		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
-		*last_ran = vcpu->vcpu_id;
-	}
-
-	vcpu->cpu = cpu;
-	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
-
-	kvm_arm_set_running_vcpu(vcpu);
-
-	kvm_vgic_load(vcpu);
-}
-
-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
-{
-	kvm_vgic_put(vcpu);
-
-	vcpu->cpu = -1;
-
-	kvm_arm_set_running_vcpu(NULL);
-	kvm_timer_vcpu_put(vcpu);
-}
-
-int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
-				    struct kvm_mp_state *mp_state)
-{
-	if (vcpu->arch.power_off)
-		mp_state->mp_state = KVM_MP_STATE_STOPPED;
-	else
-		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
-
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
-				    struct kvm_mp_state *mp_state)
-{
-	switch (mp_state->mp_state) {
-	case KVM_MP_STATE_RUNNABLE:
-		vcpu->arch.power_off = false;
-		break;
-	case KVM_MP_STATE_STOPPED:
-		vcpu->arch.power_off = true;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/**
- * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
- * @v:		The VCPU pointer
- *
- * If the guest CPU is not waiting for interrupts or an interrupt line is
- * asserted, the CPU is by definition runnable.
- */
-int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
-{
-	return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
-		&& !v->arch.power_off && !v->arch.pause);
-}
-
-/* Just ensure a guest exit from a particular CPU */
-static void exit_vm_noop(void *info)
-{
-}
-
-void force_vm_exit(const cpumask_t *mask)
-{
-	preempt_disable();
-	smp_call_function_many(mask, exit_vm_noop, NULL, true);
-	preempt_enable();
-}
-
-/**
- * need_new_vmid_gen - check that the VMID is still valid
- * @kvm: The VM's VMID to check
- *
- * return true if there is a new generation of VMIDs being used
- *
- * The hardware supports only 256 values with the value zero reserved for the
- * host, so we check if an assigned value belongs to a previous generation,
- * which which requires us to assign a new value. If we're the first to use a
- * VMID for the new generation, we must flush necessary caches and TLBs on all
- * CPUs.
- */
-static bool need_new_vmid_gen(struct kvm *kvm)
-{
-	return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
-}
-
-/**
- * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
- * @kvm	The guest that we are about to run
- *
- * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
- * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
- * caches and TLBs.
- */
-static void update_vttbr(struct kvm *kvm)
-{
-	phys_addr_t pgd_phys;
-	u64 vmid;
-
-	if (!need_new_vmid_gen(kvm))
-		return;
-
-	spin_lock(&kvm_vmid_lock);
-
-	/*
-	 * We need to re-check the vmid_gen here to ensure that if another vcpu
-	 * already allocated a valid vmid for this vm, then this vcpu should
-	 * use the same vmid.
-	 */
-	if (!need_new_vmid_gen(kvm)) {
-		spin_unlock(&kvm_vmid_lock);
-		return;
-	}
-
-	/* First user of a new VMID generation? */
-	if (unlikely(kvm_next_vmid == 0)) {
-		atomic64_inc(&kvm_vmid_gen);
-		kvm_next_vmid = 1;
-
-		/*
-		 * On SMP we know no other CPUs can use this CPU's or each
-		 * other's VMID after force_vm_exit returns since the
-		 * kvm_vmid_lock blocks them from reentry to the guest.
-		 */
-		force_vm_exit(cpu_all_mask);
-		/*
-		 * Now broadcast TLB + ICACHE invalidation over the inner
-		 * shareable domain to make sure all data structures are
-		 * clean.
-		 */
-		kvm_call_hyp(__kvm_flush_vm_context);
-	}
-
-	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
-	kvm->arch.vmid = kvm_next_vmid;
-	kvm_next_vmid++;
-	kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
-
-	/* update vttbr to be used with the new vmid */
-	pgd_phys = virt_to_phys(kvm->arch.pgd);
-	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
-	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
-	kvm->arch.vttbr = pgd_phys | vmid;
-
-	spin_unlock(&kvm_vmid_lock);
-}
-
-static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = vcpu->kvm;
-	int ret = 0;
-
-	if (likely(vcpu->arch.has_run_once))
-		return 0;
-
-	vcpu->arch.has_run_once = true;
-
-	/*
-	 * Map the VGIC hardware resources before running a vcpu the first
-	 * time on this VM.
-	 */
-	if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) {
-		ret = kvm_vgic_map_resources(kvm);
-		if (ret)
-			return ret;
-	}
-
-	ret = kvm_timer_enable(vcpu);
-
-	return ret;
-}
-
-bool kvm_arch_intc_initialized(struct kvm *kvm)
-{
-	return vgic_initialized(kvm);
-}
-
-void kvm_arm_halt_guest(struct kvm *kvm)
-{
-	int i;
-	struct kvm_vcpu *vcpu;
-
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		vcpu->arch.pause = true;
-	kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT);
-}
-
-void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.pause = true;
-	kvm_vcpu_kick(vcpu);
-}
-
-void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu)
-{
-	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
-
-	vcpu->arch.pause = false;
-	swake_up(wq);
-}
-
-void kvm_arm_resume_guest(struct kvm *kvm)
-{
-	int i;
-	struct kvm_vcpu *vcpu;
-
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		kvm_arm_resume_vcpu(vcpu);
-}
-
-static void vcpu_sleep(struct kvm_vcpu *vcpu)
-{
-	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
-
-	swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
-				       (!vcpu->arch.pause)));
-}
-
-static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.target >= 0;
-}
-
-/**
- * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
- * @vcpu:	The VCPU pointer
- * @run:	The kvm_run structure pointer used for userspace state exchange
- *
- * This function is called through the VCPU_RUN ioctl called from user space. It
- * will execute VM code in a loop until the time slice for the process is used
- * or some emulation is needed from user space in which case the function will
- * return with return value 0 and with the kvm_run structure filled in with the
- * required data for the requested emulation.
- */
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	int ret;
-	sigset_t sigsaved;
-
-	if (unlikely(!kvm_vcpu_initialized(vcpu)))
-		return -ENOEXEC;
-
-	ret = kvm_vcpu_first_run_init(vcpu);
-	if (ret)
-		return ret;
-
-	if (run->exit_reason == KVM_EXIT_MMIO) {
-		ret = kvm_handle_mmio_return(vcpu, vcpu->run);
-		if (ret)
-			return ret;
-	}
-
-	if (run->immediate_exit)
-		return -EINTR;
-
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
-	ret = 1;
-	run->exit_reason = KVM_EXIT_UNKNOWN;
-	while (ret > 0) {
-		/*
-		 * Check conditions before entering the guest
-		 */
-		cond_resched();
-
-		update_vttbr(vcpu->kvm);
-
-		if (vcpu->arch.power_off || vcpu->arch.pause)
-			vcpu_sleep(vcpu);
-
-		/*
-		 * Preparing the interrupts to be injected also
-		 * involves poking the GIC, which must be done in a
-		 * non-preemptible context.
-		 */
-		preempt_disable();
-
-		kvm_pmu_flush_hwstate(vcpu);
-
-		kvm_timer_flush_hwstate(vcpu);
-		kvm_vgic_flush_hwstate(vcpu);
-
-		local_irq_disable();
-
-		/*
-		 * If we have a singal pending, or need to notify a userspace
-		 * irqchip about timer or PMU level changes, then we exit (and
-		 * update the timer level state in kvm_timer_update_run
-		 * below).
-		 */
-		if (signal_pending(current) ||
-		    kvm_timer_should_notify_user(vcpu) ||
-		    kvm_pmu_should_notify_user(vcpu)) {
-			ret = -EINTR;
-			run->exit_reason = KVM_EXIT_INTR;
-		}
-
-		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
-			vcpu->arch.power_off || vcpu->arch.pause) {
-			local_irq_enable();
-			kvm_pmu_sync_hwstate(vcpu);
-			kvm_timer_sync_hwstate(vcpu);
-			kvm_vgic_sync_hwstate(vcpu);
-			preempt_enable();
-			continue;
-		}
-
-		kvm_arm_setup_debug(vcpu);
-
-		/**************************************************************
-		 * Enter the guest
-		 */
-		trace_kvm_entry(*vcpu_pc(vcpu));
-		guest_enter_irqoff();
-		vcpu->mode = IN_GUEST_MODE;
-
-		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
-
-		vcpu->mode = OUTSIDE_GUEST_MODE;
-		vcpu->stat.exits++;
-		/*
-		 * Back from guest
-		 *************************************************************/
-
-		kvm_arm_clear_debug(vcpu);
-
-		/*
-		 * We may have taken a host interrupt in HYP mode (ie
-		 * while executing the guest). This interrupt is still
-		 * pending, as we haven't serviced it yet!
-		 *
-		 * We're now back in SVC mode, with interrupts
-		 * disabled.  Enabling the interrupts now will have
-		 * the effect of taking the interrupt again, in SVC
-		 * mode this time.
-		 */
-		local_irq_enable();
-
-		/*
-		 * We do local_irq_enable() before calling guest_exit() so
-		 * that if a timer interrupt hits while running the guest we
-		 * account that tick as being spent in the guest.  We enable
-		 * preemption after calling guest_exit() so that if we get
-		 * preempted we make sure ticks after that is not counted as
-		 * guest time.
-		 */
-		guest_exit();
-		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
-
-		/*
-		 * We must sync the PMU and timer state before the vgic state so
-		 * that the vgic can properly sample the updated state of the
-		 * interrupt line.
-		 */
-		kvm_pmu_sync_hwstate(vcpu);
-		kvm_timer_sync_hwstate(vcpu);
-
-		kvm_vgic_sync_hwstate(vcpu);
-
-		preempt_enable();
-
-		ret = handle_exit(vcpu, run, ret);
-	}
-
-	/* Tell userspace about in-kernel device output levels */
-	if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
-		kvm_timer_update_run(vcpu);
-		kvm_pmu_update_run(vcpu);
-	}
-
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-	return ret;
-}
-
-static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
-{
-	int bit_index;
-	bool set;
-	unsigned long *ptr;
-
-	if (number == KVM_ARM_IRQ_CPU_IRQ)
-		bit_index = __ffs(HCR_VI);
-	else /* KVM_ARM_IRQ_CPU_FIQ */
-		bit_index = __ffs(HCR_VF);
-
-	ptr = (unsigned long *)&vcpu->arch.irq_lines;
-	if (level)
-		set = test_and_set_bit(bit_index, ptr);
-	else
-		set = test_and_clear_bit(bit_index, ptr);
-
-	/*
-	 * If we didn't change anything, no need to wake up or kick other CPUs
-	 */
-	if (set == level)
-		return 0;
-
-	/*
-	 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
-	 * trigger a world-switch round on the running physical CPU to set the
-	 * virtual IRQ/FIQ fields in the HCR appropriately.
-	 */
-	kvm_vcpu_kick(vcpu);
-
-	return 0;
-}
-
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
-			  bool line_status)
-{
-	u32 irq = irq_level->irq;
-	unsigned int irq_type, vcpu_idx, irq_num;
-	int nrcpus = atomic_read(&kvm->online_vcpus);
-	struct kvm_vcpu *vcpu = NULL;
-	bool level = irq_level->level;
-
-	irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
-	vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
-	irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
-
-	trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
-
-	switch (irq_type) {
-	case KVM_ARM_IRQ_TYPE_CPU:
-		if (irqchip_in_kernel(kvm))
-			return -ENXIO;
-
-		if (vcpu_idx >= nrcpus)
-			return -EINVAL;
-
-		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
-		if (!vcpu)
-			return -EINVAL;
-
-		if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
-			return -EINVAL;
-
-		return vcpu_interrupt_line(vcpu, irq_num, level);
-	case KVM_ARM_IRQ_TYPE_PPI:
-		if (!irqchip_in_kernel(kvm))
-			return -ENXIO;
-
-		if (vcpu_idx >= nrcpus)
-			return -EINVAL;
-
-		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
-		if (!vcpu)
-			return -EINVAL;
-
-		if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
-			return -EINVAL;
-
-		return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level);
-	case KVM_ARM_IRQ_TYPE_SPI:
-		if (!irqchip_in_kernel(kvm))
-			return -ENXIO;
-
-		if (irq_num < VGIC_NR_PRIVATE_IRQS)
-			return -EINVAL;
-
-		return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
-	}
-
-	return -EINVAL;
-}
-
-static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			       const struct kvm_vcpu_init *init)
-{
-	unsigned int i;
-	int phys_target = kvm_target_cpu();
-
-	if (init->target != phys_target)
-		return -EINVAL;
-
-	/*
-	 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
-	 * use the same target.
-	 */
-	if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
-		return -EINVAL;
-
-	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-	for (i = 0; i < sizeof(init->features) * 8; i++) {
-		bool set = (init->features[i / 32] & (1 << (i % 32)));
-
-		if (set && i >= KVM_VCPU_MAX_FEATURES)
-			return -ENOENT;
-
-		/*
-		 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
-		 * use the same feature set.
-		 */
-		if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
-		    test_bit(i, vcpu->arch.features) != set)
-			return -EINVAL;
-
-		if (set)
-			set_bit(i, vcpu->arch.features);
-	}
-
-	vcpu->arch.target = phys_target;
-
-	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
-
-
-static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
-					 struct kvm_vcpu_init *init)
-{
-	int ret;
-
-	ret = kvm_vcpu_set_target(vcpu, init);
-	if (ret)
-		return ret;
-
-	/*
-	 * Ensure a rebooted VM will fault in RAM pages and detect if the
-	 * guest MMU is turned off and flush the caches as needed.
-	 */
-	if (vcpu->arch.has_run_once)
-		stage2_unmap_vm(vcpu->kvm);
-
-	vcpu_reset_hcr(vcpu);
-
-	/*
-	 * Handle the "start in power-off" case.
-	 */
-	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
-		vcpu->arch.power_off = true;
-	else
-		vcpu->arch.power_off = false;
-
-	return 0;
-}
-
-static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
-				 struct kvm_device_attr *attr)
-{
-	int ret = -ENXIO;
-
-	switch (attr->group) {
-	default:
-		ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
-		break;
-	}
-
-	return ret;
-}
-
-static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
-				 struct kvm_device_attr *attr)
-{
-	int ret = -ENXIO;
-
-	switch (attr->group) {
-	default:
-		ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
-		break;
-	}
-
-	return ret;
-}
-
-static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
-				 struct kvm_device_attr *attr)
-{
-	int ret = -ENXIO;
-
-	switch (attr->group) {
-	default:
-		ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
-		break;
-	}
-
-	return ret;
-}
-
-long kvm_arch_vcpu_ioctl(struct file *filp,
-			 unsigned int ioctl, unsigned long arg)
-{
-	struct kvm_vcpu *vcpu = filp->private_data;
-	void __user *argp = (void __user *)arg;
-	struct kvm_device_attr attr;
-
-	switch (ioctl) {
-	case KVM_ARM_VCPU_INIT: {
-		struct kvm_vcpu_init init;
-
-		if (copy_from_user(&init, argp, sizeof(init)))
-			return -EFAULT;
-
-		return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
-	}
-	case KVM_SET_ONE_REG:
-	case KVM_GET_ONE_REG: {
-		struct kvm_one_reg reg;
-
-		if (unlikely(!kvm_vcpu_initialized(vcpu)))
-			return -ENOEXEC;
-
-		if (copy_from_user(&reg, argp, sizeof(reg)))
-			return -EFAULT;
-		if (ioctl == KVM_SET_ONE_REG)
-			return kvm_arm_set_reg(vcpu, &reg);
-		else
-			return kvm_arm_get_reg(vcpu, &reg);
-	}
-	case KVM_GET_REG_LIST: {
-		struct kvm_reg_list __user *user_list = argp;
-		struct kvm_reg_list reg_list;
-		unsigned n;
-
-		if (unlikely(!kvm_vcpu_initialized(vcpu)))
-			return -ENOEXEC;
-
-		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
-			return -EFAULT;
-		n = reg_list.n;
-		reg_list.n = kvm_arm_num_regs(vcpu);
-		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
-			return -EFAULT;
-		if (n < reg_list.n)
-			return -E2BIG;
-		return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
-	}
-	case KVM_SET_DEVICE_ATTR: {
-		if (copy_from_user(&attr, argp, sizeof(attr)))
-			return -EFAULT;
-		return kvm_arm_vcpu_set_attr(vcpu, &attr);
-	}
-	case KVM_GET_DEVICE_ATTR: {
-		if (copy_from_user(&attr, argp, sizeof(attr)))
-			return -EFAULT;
-		return kvm_arm_vcpu_get_attr(vcpu, &attr);
-	}
-	case KVM_HAS_DEVICE_ATTR: {
-		if (copy_from_user(&attr, argp, sizeof(attr)))
-			return -EFAULT;
-		return kvm_arm_vcpu_has_attr(vcpu, &attr);
-	}
-	default:
-		return -EINVAL;
-	}
-}
-
-/**
- * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
- * @kvm: kvm instance
- * @log: slot id and address to which we copy the log
- *
- * Steps 1-4 below provide general overview of dirty page logging. See
- * kvm_get_dirty_log_protect() function description for additional details.
- *
- * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
- * always flush the TLB (step 4) even if previous step failed  and the dirty
- * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
- * does not preclude user space subsequent dirty log read. Flushing TLB ensures
- * writes will be marked dirty for next log read.
- *
- *   1. Take a snapshot of the bit and clear it if needed.
- *   2. Write protect the corresponding page.
- *   3. Copy the snapshot to the userspace.
- *   4. Flush TLB's if needed.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	bool is_dirty = false;
-	int r;
-
-	mutex_lock(&kvm->slots_lock);
-
-	r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
-
-	if (is_dirty)
-		kvm_flush_remote_tlbs(kvm);
-
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
-static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
-					struct kvm_arm_device_addr *dev_addr)
-{
-	unsigned long dev_id, type;
-
-	dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
-		KVM_ARM_DEVICE_ID_SHIFT;
-	type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
-		KVM_ARM_DEVICE_TYPE_SHIFT;
-
-	switch (dev_id) {
-	case KVM_ARM_DEVICE_VGIC_V2:
-		if (!vgic_present)
-			return -ENXIO;
-		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
-	default:
-		return -ENODEV;
-	}
-}
-
-long kvm_arch_vm_ioctl(struct file *filp,
-		       unsigned int ioctl, unsigned long arg)
-{
-	struct kvm *kvm = filp->private_data;
-	void __user *argp = (void __user *)arg;
-
-	switch (ioctl) {
-	case KVM_CREATE_IRQCHIP: {
-		int ret;
-		if (!vgic_present)
-			return -ENXIO;
-		mutex_lock(&kvm->lock);
-		ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
-		mutex_unlock(&kvm->lock);
-		return ret;
-	}
-	case KVM_ARM_SET_DEVICE_ADDR: {
-		struct kvm_arm_device_addr dev_addr;
-
-		if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
-			return -EFAULT;
-		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
-	}
-	case KVM_ARM_PREFERRED_TARGET: {
-		int err;
-		struct kvm_vcpu_init init;
-
-		err = kvm_vcpu_preferred_target(&init);
-		if (err)
-			return err;
-
-		if (copy_to_user(argp, &init, sizeof(init)))
-			return -EFAULT;
-
-		return 0;
-	}
-	default:
-		return -EINVAL;
-	}
-}
-
-static void cpu_init_hyp_mode(void *dummy)
-{
-	phys_addr_t pgd_ptr;
-	unsigned long hyp_stack_ptr;
-	unsigned long stack_page;
-	unsigned long vector_ptr;
-
-	/* Switch from the HYP stub to our own HYP init vector */
-	__hyp_set_vectors(kvm_get_idmap_vector());
-
-	pgd_ptr = kvm_mmu_get_httbr();
-	stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
-	hyp_stack_ptr = stack_page + PAGE_SIZE;
-	vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
-
-	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
-	__cpu_init_stage2();
-
-	if (is_kernel_in_hyp_mode())
-		kvm_timer_init_vhe();
-
-	kvm_arm_init_debug();
-}
-
-static void cpu_hyp_reset(void)
-{
-	if (!is_kernel_in_hyp_mode())
-		__hyp_reset_vectors();
-}
-
-static void cpu_hyp_reinit(void)
-{
-	cpu_hyp_reset();
-
-	if (is_kernel_in_hyp_mode()) {
-		/*
-		 * __cpu_init_stage2() is safe to call even if the PM
-		 * event was cancelled before the CPU was reset.
-		 */
-		__cpu_init_stage2();
-	} else {
-		cpu_init_hyp_mode(NULL);
-	}
-}
-
-static void _kvm_arch_hardware_enable(void *discard)
-{
-	if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
-		cpu_hyp_reinit();
-		__this_cpu_write(kvm_arm_hardware_enabled, 1);
-	}
-}
-
-int kvm_arch_hardware_enable(void)
-{
-	_kvm_arch_hardware_enable(NULL);
-	return 0;
-}
-
-static void _kvm_arch_hardware_disable(void *discard)
-{
-	if (__this_cpu_read(kvm_arm_hardware_enabled)) {
-		cpu_hyp_reset();
-		__this_cpu_write(kvm_arm_hardware_enabled, 0);
-	}
-}
-
-void kvm_arch_hardware_disable(void)
-{
-	_kvm_arch_hardware_disable(NULL);
-}
-
-#ifdef CONFIG_CPU_PM
-static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
-				    unsigned long cmd,
-				    void *v)
-{
-	/*
-	 * kvm_arm_hardware_enabled is left with its old value over
-	 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
-	 * re-enable hyp.
-	 */
-	switch (cmd) {
-	case CPU_PM_ENTER:
-		if (__this_cpu_read(kvm_arm_hardware_enabled))
-			/*
-			 * don't update kvm_arm_hardware_enabled here
-			 * so that the hardware will be re-enabled
-			 * when we resume. See below.
-			 */
-			cpu_hyp_reset();
-
-		return NOTIFY_OK;
-	case CPU_PM_EXIT:
-		if (__this_cpu_read(kvm_arm_hardware_enabled))
-			/* The hardware was enabled before suspend. */
-			cpu_hyp_reinit();
-
-		return NOTIFY_OK;
-
-	default:
-		return NOTIFY_DONE;
-	}
-}
-
-static struct notifier_block hyp_init_cpu_pm_nb = {
-	.notifier_call = hyp_init_cpu_pm_notifier,
-};
-
-static void __init hyp_cpu_pm_init(void)
-{
-	cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
-}
-static void __init hyp_cpu_pm_exit(void)
-{
-	cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
-}
-#else
-static inline void hyp_cpu_pm_init(void)
-{
-}
-static inline void hyp_cpu_pm_exit(void)
-{
-}
-#endif
-
-static void teardown_common_resources(void)
-{
-	free_percpu(kvm_host_cpu_state);
-}
-
-static int init_common_resources(void)
-{
-	kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
-	if (!kvm_host_cpu_state) {
-		kvm_err("Cannot allocate host CPU state\n");
-		return -ENOMEM;
-	}
-
-	/* set size of VMID supported by CPU */
-	kvm_vmid_bits = kvm_get_vmid_bits();
-	kvm_info("%d-bit VMID\n", kvm_vmid_bits);
-
-	return 0;
-}
-
-static int init_subsystems(void)
-{
-	int err = 0;
-
-	/*
-	 * Enable hardware so that subsystem initialisation can access EL2.
-	 */
-	on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
-
-	/*
-	 * Register CPU lower-power notifier
-	 */
-	hyp_cpu_pm_init();
-
-	/*
-	 * Init HYP view of VGIC
-	 */
-	err = kvm_vgic_hyp_init();
-	switch (err) {
-	case 0:
-		vgic_present = true;
-		break;
-	case -ENODEV:
-	case -ENXIO:
-		vgic_present = false;
-		err = 0;
-		break;
-	default:
-		goto out;
-	}
-
-	/*
-	 * Init HYP architected timer support
-	 */
-	err = kvm_timer_hyp_init();
-	if (err)
-		goto out;
-
-	kvm_perf_init();
-	kvm_coproc_table_init();
-
-out:
-	on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
-
-	return err;
-}
-
-static void teardown_hyp_mode(void)
-{
-	int cpu;
-
-	if (is_kernel_in_hyp_mode())
-		return;
-
-	free_hyp_pgds();
-	for_each_possible_cpu(cpu)
-		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
-	hyp_cpu_pm_exit();
-}
-
-static int init_vhe_mode(void)
-{
-	kvm_info("VHE mode initialized successfully\n");
-	return 0;
-}
-
-/**
- * Inits Hyp-mode on all online CPUs
- */
-static int init_hyp_mode(void)
-{
-	int cpu;
-	int err = 0;
-
-	/*
-	 * Allocate Hyp PGD and setup Hyp identity mapping
-	 */
-	err = kvm_mmu_init();
-	if (err)
-		goto out_err;
-
-	/*
-	 * Allocate stack pages for Hypervisor-mode
-	 */
-	for_each_possible_cpu(cpu) {
-		unsigned long stack_page;
-
-		stack_page = __get_free_page(GFP_KERNEL);
-		if (!stack_page) {
-			err = -ENOMEM;
-			goto out_err;
-		}
-
-		per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
-	}
-
-	/*
-	 * Map the Hyp-code called directly from the host
-	 */
-	err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
-				  kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
-	if (err) {
-		kvm_err("Cannot map world-switch code\n");
-		goto out_err;
-	}
-
-	err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
-				  kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
-	if (err) {
-		kvm_err("Cannot map rodata section\n");
-		goto out_err;
-	}
-
-	err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
-				  kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
-	if (err) {
-		kvm_err("Cannot map bss section\n");
-		goto out_err;
-	}
-
-	/*
-	 * Map the Hyp stack pages
-	 */
-	for_each_possible_cpu(cpu) {
-		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
-		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
-					  PAGE_HYP);
-
-		if (err) {
-			kvm_err("Cannot map hyp stack\n");
-			goto out_err;
-		}
-	}
-
-	for_each_possible_cpu(cpu) {
-		kvm_cpu_context_t *cpu_ctxt;
-
-		cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
-		err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
-
-		if (err) {
-			kvm_err("Cannot map host CPU state: %d\n", err);
-			goto out_err;
-		}
-	}
-
-	kvm_info("Hyp mode initialized successfully\n");
-
-	return 0;
-
-out_err:
-	teardown_hyp_mode();
-	kvm_err("error initializing Hyp mode: %d\n", err);
-	return err;
-}
-
-static void check_kvm_target_cpu(void *ret)
-{
-	*(int *)ret = kvm_target_cpu();
-}
-
-struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
-{
-	struct kvm_vcpu *vcpu;
-	int i;
-
-	mpidr &= MPIDR_HWID_BITMASK;
-	kvm_for_each_vcpu(i, vcpu, kvm) {
-		if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
-			return vcpu;
-	}
-	return NULL;
-}
-
-/**
- * Initialize Hyp-mode and memory mappings on all CPUs.
- */
-int kvm_arch_init(void *opaque)
-{
-	int err;
-	int ret, cpu;
-
-	if (!is_hyp_mode_available()) {
-		kvm_err("HYP mode not available\n");
-		return -ENODEV;
-	}
-
-	for_each_online_cpu(cpu) {
-		smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
-		if (ret < 0) {
-			kvm_err("Error, CPU %d not supported!\n", cpu);
-			return -ENODEV;
-		}
-	}
-
-	err = init_common_resources();
-	if (err)
-		return err;
-
-	if (is_kernel_in_hyp_mode())
-		err = init_vhe_mode();
-	else
-		err = init_hyp_mode();
-	if (err)
-		goto out_err;
-
-	err = init_subsystems();
-	if (err)
-		goto out_hyp;
-
-	return 0;
-
-out_hyp:
-	teardown_hyp_mode();
-out_err:
-	teardown_common_resources();
-	return err;
-}
-
-/* NOP: Compiling as a module not supported */
-void kvm_arch_exit(void)
-{
-	kvm_perf_teardown();
-}
-
-static int arm_init(void)
-{
-	int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
-	return rc;
-}
-
-module_init(arm_init);
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
deleted file mode 100644
index b6e715fd3c90..000000000000
--- a/arch/arm/kvm/mmio.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#include <linux/kvm_host.h>
-#include <asm/kvm_mmio.h>
-#include <asm/kvm_emulate.h>
-#include <trace/events/kvm.h>
-
-#include "trace.h"
-
-void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data)
-{
-	void *datap = NULL;
-	union {
-		u8	byte;
-		u16	hword;
-		u32	word;
-		u64	dword;
-	} tmp;
-
-	switch (len) {
-	case 1:
-		tmp.byte	= data;
-		datap		= &tmp.byte;
-		break;
-	case 2:
-		tmp.hword	= data;
-		datap		= &tmp.hword;
-		break;
-	case 4:
-		tmp.word	= data;
-		datap		= &tmp.word;
-		break;
-	case 8:
-		tmp.dword	= data;
-		datap		= &tmp.dword;
-		break;
-	}
-
-	memcpy(buf, datap, len);
-}
-
-unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
-{
-	unsigned long data = 0;
-	union {
-		u16	hword;
-		u32	word;
-		u64	dword;
-	} tmp;
-
-	switch (len) {
-	case 1:
-		data = *(u8 *)buf;
-		break;
-	case 2:
-		memcpy(&tmp.hword, buf, len);
-		data = tmp.hword;
-		break;
-	case 4:
-		memcpy(&tmp.word, buf, len);
-		data = tmp.word;
-		break;
-	case 8:
-		memcpy(&tmp.dword, buf, len);
-		data = tmp.dword;
-		break;
-	}
-
-	return data;
-}
-
-/**
- * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
- *			     or in-kernel IO emulation
- *
- * @vcpu: The VCPU pointer
- * @run:  The VCPU run struct containing the mmio data
- */
-int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	unsigned long data;
-	unsigned int len;
-	int mask;
-
-	if (!run->mmio.is_write) {
-		len = run->mmio.len;
-		if (len > sizeof(unsigned long))
-			return -EINVAL;
-
-		data = kvm_mmio_read_buf(run->mmio.data, len);
-
-		if (vcpu->arch.mmio_decode.sign_extend &&
-		    len < sizeof(unsigned long)) {
-			mask = 1U << ((len * 8) - 1);
-			data = (data ^ mask) - mask;
-		}
-
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
-			       data);
-		data = vcpu_data_host_to_guest(vcpu, data, len);
-		vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
-	}
-
-	return 0;
-}
-
-static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
-{
-	unsigned long rt;
-	int access_size;
-	bool sign_extend;
-
-	if (kvm_vcpu_dabt_iss1tw(vcpu)) {
-		/* page table accesses IO mem: tell guest to fix its TTBR */
-		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
-		return 1;
-	}
-
-	access_size = kvm_vcpu_dabt_get_as(vcpu);
-	if (unlikely(access_size < 0))
-		return access_size;
-
-	*is_write = kvm_vcpu_dabt_iswrite(vcpu);
-	sign_extend = kvm_vcpu_dabt_issext(vcpu);
-	rt = kvm_vcpu_dabt_get_rd(vcpu);
-
-	*len = access_size;
-	vcpu->arch.mmio_decode.sign_extend = sign_extend;
-	vcpu->arch.mmio_decode.rt = rt;
-
-	/*
-	 * The MMIO instruction is emulated and should not be re-executed
-	 * in the guest.
-	 */
-	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-	return 0;
-}
-
-int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		 phys_addr_t fault_ipa)
-{
-	unsigned long data;
-	unsigned long rt;
-	int ret;
-	bool is_write;
-	int len;
-	u8 data_buf[8];
-
-	/*
-	 * Prepare MMIO operation. First decode the syndrome data we get
-	 * from the CPU. Then try if some in-kernel emulation feels
-	 * responsible, otherwise let user space do its magic.
-	 */
-	if (kvm_vcpu_dabt_isvalid(vcpu)) {
-		ret = decode_hsr(vcpu, &is_write, &len);
-		if (ret)
-			return ret;
-	} else {
-		kvm_err("load/store instruction decoding not implemented\n");
-		return -ENOSYS;
-	}
-
-	rt = vcpu->arch.mmio_decode.rt;
-
-	if (is_write) {
-		data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
-					       len);
-
-		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
-		kvm_mmio_write_buf(data_buf, len, data);
-
-		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
-				       data_buf);
-	} else {
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
-			       fault_ipa, 0);
-
-		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
-				      data_buf);
-	}
-
-	/* Now prepare kvm_run for the potential return to userland. */
-	run->mmio.is_write	= is_write;
-	run->mmio.phys_addr	= fault_ipa;
-	run->mmio.len		= len;
-
-	if (!ret) {
-		/* We handled the access successfully in the kernel. */
-		if (!is_write)
-			memcpy(run->mmio.data, data_buf, len);
-		vcpu->stat.mmio_exit_kernel++;
-		kvm_handle_mmio_return(vcpu, run);
-		return 1;
-	}
-
-	if (is_write)
-		memcpy(run->mmio.data, data_buf, len);
-	vcpu->stat.mmio_exit_user++;
-	run->exit_reason	= KVM_EXIT_MMIO;
-	return 0;
-}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
deleted file mode 100644
index efb4335aa5c4..000000000000
--- a/arch/arm/kvm/mmu.c
+++ /dev/null
@@ -1,1958 +0,0 @@
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#include <linux/mman.h>
-#include <linux/kvm_host.h>
-#include <linux/io.h>
-#include <linux/hugetlb.h>
-#include <trace/events/kvm.h>
-#include <asm/pgalloc.h>
-#include <asm/cacheflush.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-#include <asm/kvm_mmio.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/virt.h>
-
-#include "trace.h"
-
-static pgd_t *boot_hyp_pgd;
-static pgd_t *hyp_pgd;
-static pgd_t *merged_hyp_pgd;
-static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
-
-static unsigned long hyp_idmap_start;
-static unsigned long hyp_idmap_end;
-static phys_addr_t hyp_idmap_vector;
-
-#define S2_PGD_SIZE	(PTRS_PER_S2_PGD * sizeof(pgd_t))
-#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
-
-#define KVM_S2PTE_FLAG_IS_IOMAP		(1UL << 0)
-#define KVM_S2_FLAG_LOGGING_ACTIVE	(1UL << 1)
-
-static bool memslot_is_logging(struct kvm_memory_slot *memslot)
-{
-	return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
-}
-
-/**
- * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
- * @kvm:	pointer to kvm structure.
- *
- * Interface to HYP function to flush all VM TLB entries
- */
-void kvm_flush_remote_tlbs(struct kvm *kvm)
-{
-	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
-}
-
-static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
-{
-	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
-}
-
-/*
- * D-Cache management functions. They take the page table entries by
- * value, as they are flushing the cache using the kernel mapping (or
- * kmap on 32bit).
- */
-static void kvm_flush_dcache_pte(pte_t pte)
-{
-	__kvm_flush_dcache_pte(pte);
-}
-
-static void kvm_flush_dcache_pmd(pmd_t pmd)
-{
-	__kvm_flush_dcache_pmd(pmd);
-}
-
-static void kvm_flush_dcache_pud(pud_t pud)
-{
-	__kvm_flush_dcache_pud(pud);
-}
-
-static bool kvm_is_device_pfn(unsigned long pfn)
-{
-	return !pfn_valid(pfn);
-}
-
-/**
- * stage2_dissolve_pmd() - clear and flush huge PMD entry
- * @kvm:	pointer to kvm structure.
- * @addr:	IPA
- * @pmd:	pmd pointer for IPA
- *
- * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
- * pages in the range dirty.
- */
-static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
-{
-	if (!pmd_thp_or_huge(*pmd))
-		return;
-
-	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	put_page(virt_to_page(pmd));
-}
-
-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
-				  int min, int max)
-{
-	void *page;
-
-	BUG_ON(max > KVM_NR_MEM_OBJS);
-	if (cache->nobjs >= min)
-		return 0;
-	while (cache->nobjs < max) {
-		page = (void *)__get_free_page(PGALLOC_GFP);
-		if (!page)
-			return -ENOMEM;
-		cache->objects[cache->nobjs++] = page;
-	}
-	return 0;
-}
-
-static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
-{
-	while (mc->nobjs)
-		free_page((unsigned long)mc->objects[--mc->nobjs]);
-}
-
-static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
-{
-	void *p;
-
-	BUG_ON(!mc || !mc->nobjs);
-	p = mc->objects[--mc->nobjs];
-	return p;
-}
-
-static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
-{
-	pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
-	stage2_pgd_clear(pgd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	stage2_pud_free(pud_table);
-	put_page(virt_to_page(pgd));
-}
-
-static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
-{
-	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
-	VM_BUG_ON(stage2_pud_huge(*pud));
-	stage2_pud_clear(pud);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	stage2_pmd_free(pmd_table);
-	put_page(virt_to_page(pud));
-}
-
-static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
-{
-	pte_t *pte_table = pte_offset_kernel(pmd, 0);
-	VM_BUG_ON(pmd_thp_or_huge(*pmd));
-	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	pte_free_kernel(NULL, pte_table);
-	put_page(virt_to_page(pmd));
-}
-
-/*
- * Unmapping vs dcache management:
- *
- * If a guest maps certain memory pages as uncached, all writes will
- * bypass the data cache and go directly to RAM.  However, the CPUs
- * can still speculate reads (not writes) and fill cache lines with
- * data.
- *
- * Those cache lines will be *clean* cache lines though, so a
- * clean+invalidate operation is equivalent to an invalidate
- * operation, because no cache lines are marked dirty.
- *
- * Those clean cache lines could be filled prior to an uncached write
- * by the guest, and the cache coherent IO subsystem would therefore
- * end up writing old data to disk.
- *
- * This is why right after unmapping a page/section and invalidating
- * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
- * the IO subsystem will never hit in the cache.
- */
-static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
-		       phys_addr_t addr, phys_addr_t end)
-{
-	phys_addr_t start_addr = addr;
-	pte_t *pte, *start_pte;
-
-	start_pte = pte = pte_offset_kernel(pmd, addr);
-	do {
-		if (!pte_none(*pte)) {
-			pte_t old_pte = *pte;
-
-			kvm_set_pte(pte, __pte(0));
-			kvm_tlb_flush_vmid_ipa(kvm, addr);
-
-			/* No need to invalidate the cache for device mappings */
-			if (!kvm_is_device_pfn(pte_pfn(old_pte)))
-				kvm_flush_dcache_pte(old_pte);
-
-			put_page(virt_to_page(pte));
-		}
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-
-	if (stage2_pte_table_empty(start_pte))
-		clear_stage2_pmd_entry(kvm, pmd, start_addr);
-}
-
-static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
-		       phys_addr_t addr, phys_addr_t end)
-{
-	phys_addr_t next, start_addr = addr;
-	pmd_t *pmd, *start_pmd;
-
-	start_pmd = pmd = stage2_pmd_offset(pud, addr);
-	do {
-		next = stage2_pmd_addr_end(addr, end);
-		if (!pmd_none(*pmd)) {
-			if (pmd_thp_or_huge(*pmd)) {
-				pmd_t old_pmd = *pmd;
-
-				pmd_clear(pmd);
-				kvm_tlb_flush_vmid_ipa(kvm, addr);
-
-				kvm_flush_dcache_pmd(old_pmd);
-
-				put_page(virt_to_page(pmd));
-			} else {
-				unmap_stage2_ptes(kvm, pmd, addr, next);
-			}
-		}
-	} while (pmd++, addr = next, addr != end);
-
-	if (stage2_pmd_table_empty(start_pmd))
-		clear_stage2_pud_entry(kvm, pud, start_addr);
-}
-
-static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
-		       phys_addr_t addr, phys_addr_t end)
-{
-	phys_addr_t next, start_addr = addr;
-	pud_t *pud, *start_pud;
-
-	start_pud = pud = stage2_pud_offset(pgd, addr);
-	do {
-		next = stage2_pud_addr_end(addr, end);
-		if (!stage2_pud_none(*pud)) {
-			if (stage2_pud_huge(*pud)) {
-				pud_t old_pud = *pud;
-
-				stage2_pud_clear(pud);
-				kvm_tlb_flush_vmid_ipa(kvm, addr);
-				kvm_flush_dcache_pud(old_pud);
-				put_page(virt_to_page(pud));
-			} else {
-				unmap_stage2_pmds(kvm, pud, addr, next);
-			}
-		}
-	} while (pud++, addr = next, addr != end);
-
-	if (stage2_pud_table_empty(start_pud))
-		clear_stage2_pgd_entry(kvm, pgd, start_addr);
-}
-
-/**
- * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
- * @kvm:   The VM pointer
- * @start: The intermediate physical base address of the range to unmap
- * @size:  The size of the area to unmap
- *
- * Clear a range of stage-2 mappings, lowering the various ref-counts.  Must
- * be called while holding mmu_lock (unless for freeing the stage2 pgd before
- * destroying the VM), otherwise another faulting VCPU may come in and mess
- * with things behind our backs.
- */
-static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
-{
-	pgd_t *pgd;
-	phys_addr_t addr = start, end = start + size;
-	phys_addr_t next;
-
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
-	do {
-		next = stage2_pgd_addr_end(addr, end);
-		if (!stage2_pgd_none(*pgd))
-			unmap_stage2_puds(kvm, pgd, addr, next);
-	} while (pgd++, addr = next, addr != end);
-}
-
-static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
-			      phys_addr_t addr, phys_addr_t end)
-{
-	pte_t *pte;
-
-	pte = pte_offset_kernel(pmd, addr);
-	do {
-		if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
-			kvm_flush_dcache_pte(*pte);
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-}
-
-static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
-			      phys_addr_t addr, phys_addr_t end)
-{
-	pmd_t *pmd;
-	phys_addr_t next;
-
-	pmd = stage2_pmd_offset(pud, addr);
-	do {
-		next = stage2_pmd_addr_end(addr, end);
-		if (!pmd_none(*pmd)) {
-			if (pmd_thp_or_huge(*pmd))
-				kvm_flush_dcache_pmd(*pmd);
-			else
-				stage2_flush_ptes(kvm, pmd, addr, next);
-		}
-	} while (pmd++, addr = next, addr != end);
-}
-
-static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
-			      phys_addr_t addr, phys_addr_t end)
-{
-	pud_t *pud;
-	phys_addr_t next;
-
-	pud = stage2_pud_offset(pgd, addr);
-	do {
-		next = stage2_pud_addr_end(addr, end);
-		if (!stage2_pud_none(*pud)) {
-			if (stage2_pud_huge(*pud))
-				kvm_flush_dcache_pud(*pud);
-			else
-				stage2_flush_pmds(kvm, pud, addr, next);
-		}
-	} while (pud++, addr = next, addr != end);
-}
-
-static void stage2_flush_memslot(struct kvm *kvm,
-				 struct kvm_memory_slot *memslot)
-{
-	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
-	phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
-	phys_addr_t next;
-	pgd_t *pgd;
-
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
-	do {
-		next = stage2_pgd_addr_end(addr, end);
-		stage2_flush_puds(kvm, pgd, addr, next);
-	} while (pgd++, addr = next, addr != end);
-}
-
-/**
- * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
- * @kvm: The struct kvm pointer
- *
- * Go through the stage 2 page tables and invalidate any cache lines
- * backing memory already mapped to the VM.
- */
-static void stage2_flush_vm(struct kvm *kvm)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-	int idx;
-
-	idx = srcu_read_lock(&kvm->srcu);
-	spin_lock(&kvm->mmu_lock);
-
-	slots = kvm_memslots(kvm);
-	kvm_for_each_memslot(memslot, slots)
-		stage2_flush_memslot(kvm, memslot);
-
-	spin_unlock(&kvm->mmu_lock);
-	srcu_read_unlock(&kvm->srcu, idx);
-}
-
-static void clear_hyp_pgd_entry(pgd_t *pgd)
-{
-	pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
-	pgd_clear(pgd);
-	pud_free(NULL, pud_table);
-	put_page(virt_to_page(pgd));
-}
-
-static void clear_hyp_pud_entry(pud_t *pud)
-{
-	pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
-	VM_BUG_ON(pud_huge(*pud));
-	pud_clear(pud);
-	pmd_free(NULL, pmd_table);
-	put_page(virt_to_page(pud));
-}
-
-static void clear_hyp_pmd_entry(pmd_t *pmd)
-{
-	pte_t *pte_table = pte_offset_kernel(pmd, 0);
-	VM_BUG_ON(pmd_thp_or_huge(*pmd));
-	pmd_clear(pmd);
-	pte_free_kernel(NULL, pte_table);
-	put_page(virt_to_page(pmd));
-}
-
-static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
-{
-	pte_t *pte, *start_pte;
-
-	start_pte = pte = pte_offset_kernel(pmd, addr);
-	do {
-		if (!pte_none(*pte)) {
-			kvm_set_pte(pte, __pte(0));
-			put_page(virt_to_page(pte));
-		}
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-
-	if (hyp_pte_table_empty(start_pte))
-		clear_hyp_pmd_entry(pmd);
-}
-
-static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
-{
-	phys_addr_t next;
-	pmd_t *pmd, *start_pmd;
-
-	start_pmd = pmd = pmd_offset(pud, addr);
-	do {
-		next = pmd_addr_end(addr, end);
-		/* Hyp doesn't use huge pmds */
-		if (!pmd_none(*pmd))
-			unmap_hyp_ptes(pmd, addr, next);
-	} while (pmd++, addr = next, addr != end);
-
-	if (hyp_pmd_table_empty(start_pmd))
-		clear_hyp_pud_entry(pud);
-}
-
-static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
-{
-	phys_addr_t next;
-	pud_t *pud, *start_pud;
-
-	start_pud = pud = pud_offset(pgd, addr);
-	do {
-		next = pud_addr_end(addr, end);
-		/* Hyp doesn't use huge puds */
-		if (!pud_none(*pud))
-			unmap_hyp_pmds(pud, addr, next);
-	} while (pud++, addr = next, addr != end);
-
-	if (hyp_pud_table_empty(start_pud))
-		clear_hyp_pgd_entry(pgd);
-}
-
-static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
-{
-	pgd_t *pgd;
-	phys_addr_t addr = start, end = start + size;
-	phys_addr_t next;
-
-	/*
-	 * We don't unmap anything from HYP, except at the hyp tear down.
-	 * Hence, we don't have to invalidate the TLBs here.
-	 */
-	pgd = pgdp + pgd_index(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		if (!pgd_none(*pgd))
-			unmap_hyp_puds(pgd, addr, next);
-	} while (pgd++, addr = next, addr != end);
-}
-
-/**
- * free_hyp_pgds - free Hyp-mode page tables
- *
- * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
- * therefore contains either mappings in the kernel memory area (above
- * PAGE_OFFSET), or device mappings in the vmalloc range (from
- * VMALLOC_START to VMALLOC_END).
- *
- * boot_hyp_pgd should only map two pages for the init code.
- */
-void free_hyp_pgds(void)
-{
-	unsigned long addr;
-
-	mutex_lock(&kvm_hyp_pgd_mutex);
-
-	if (boot_hyp_pgd) {
-		unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
-		free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
-		boot_hyp_pgd = NULL;
-	}
-
-	if (hyp_pgd) {
-		unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
-		for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
-			unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
-		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
-			unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
-
-		free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
-		hyp_pgd = NULL;
-	}
-	if (merged_hyp_pgd) {
-		clear_page(merged_hyp_pgd);
-		free_page((unsigned long)merged_hyp_pgd);
-		merged_hyp_pgd = NULL;
-	}
-
-	mutex_unlock(&kvm_hyp_pgd_mutex);
-}
-
-static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
-				    unsigned long end, unsigned long pfn,
-				    pgprot_t prot)
-{
-	pte_t *pte;
-	unsigned long addr;
-
-	addr = start;
-	do {
-		pte = pte_offset_kernel(pmd, addr);
-		kvm_set_pte(pte, pfn_pte(pfn, prot));
-		get_page(virt_to_page(pte));
-		kvm_flush_dcache_to_poc(pte, sizeof(*pte));
-		pfn++;
-	} while (addr += PAGE_SIZE, addr != end);
-}
-
-static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
-				   unsigned long end, unsigned long pfn,
-				   pgprot_t prot)
-{
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long addr, next;
-
-	addr = start;
-	do {
-		pmd = pmd_offset(pud, addr);
-
-		BUG_ON(pmd_sect(*pmd));
-
-		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
-			if (!pte) {
-				kvm_err("Cannot allocate Hyp pte\n");
-				return -ENOMEM;
-			}
-			pmd_populate_kernel(NULL, pmd, pte);
-			get_page(virt_to_page(pmd));
-			kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
-		}
-
-		next = pmd_addr_end(addr, end);
-
-		create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
-		pfn += (next - addr) >> PAGE_SHIFT;
-	} while (addr = next, addr != end);
-
-	return 0;
-}
-
-static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
-				   unsigned long end, unsigned long pfn,
-				   pgprot_t prot)
-{
-	pud_t *pud;
-	pmd_t *pmd;
-	unsigned long addr, next;
-	int ret;
-
-	addr = start;
-	do {
-		pud = pud_offset(pgd, addr);
-
-		if (pud_none_or_clear_bad(pud)) {
-			pmd = pmd_alloc_one(NULL, addr);
-			if (!pmd) {
-				kvm_err("Cannot allocate Hyp pmd\n");
-				return -ENOMEM;
-			}
-			pud_populate(NULL, pud, pmd);
-			get_page(virt_to_page(pud));
-			kvm_flush_dcache_to_poc(pud, sizeof(*pud));
-		}
-
-		next = pud_addr_end(addr, end);
-		ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
-		if (ret)
-			return ret;
-		pfn += (next - addr) >> PAGE_SHIFT;
-	} while (addr = next, addr != end);
-
-	return 0;
-}
-
-static int __create_hyp_mappings(pgd_t *pgdp,
-				 unsigned long start, unsigned long end,
-				 unsigned long pfn, pgprot_t prot)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	unsigned long addr, next;
-	int err = 0;
-
-	mutex_lock(&kvm_hyp_pgd_mutex);
-	addr = start & PAGE_MASK;
-	end = PAGE_ALIGN(end);
-	do {
-		pgd = pgdp + pgd_index(addr);
-
-		if (pgd_none(*pgd)) {
-			pud = pud_alloc_one(NULL, addr);
-			if (!pud) {
-				kvm_err("Cannot allocate Hyp pud\n");
-				err = -ENOMEM;
-				goto out;
-			}
-			pgd_populate(NULL, pgd, pud);
-			get_page(virt_to_page(pgd));
-			kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
-		}
-
-		next = pgd_addr_end(addr, end);
-		err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
-		if (err)
-			goto out;
-		pfn += (next - addr) >> PAGE_SHIFT;
-	} while (addr = next, addr != end);
-out:
-	mutex_unlock(&kvm_hyp_pgd_mutex);
-	return err;
-}
-
-static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
-{
-	if (!is_vmalloc_addr(kaddr)) {
-		BUG_ON(!virt_addr_valid(kaddr));
-		return __pa(kaddr);
-	} else {
-		return page_to_phys(vmalloc_to_page(kaddr)) +
-		       offset_in_page(kaddr);
-	}
-}
-
-/**
- * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
- * @from:	The virtual kernel start address of the range
- * @to:		The virtual kernel end address of the range (exclusive)
- * @prot:	The protection to be applied to this range
- *
- * The same virtual address as the kernel virtual address is also used
- * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
- * physical pages.
- */
-int create_hyp_mappings(void *from, void *to, pgprot_t prot)
-{
-	phys_addr_t phys_addr;
-	unsigned long virt_addr;
-	unsigned long start = kern_hyp_va((unsigned long)from);
-	unsigned long end = kern_hyp_va((unsigned long)to);
-
-	if (is_kernel_in_hyp_mode())
-		return 0;
-
-	start = start & PAGE_MASK;
-	end = PAGE_ALIGN(end);
-
-	for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
-		int err;
-
-		phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
-		err = __create_hyp_mappings(hyp_pgd, virt_addr,
-					    virt_addr + PAGE_SIZE,
-					    __phys_to_pfn(phys_addr),
-					    prot);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-/**
- * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
- * @from:	The kernel start VA of the range
- * @to:		The kernel end VA of the range (exclusive)
- * @phys_addr:	The physical start address which gets mapped
- *
- * The resulting HYP VA is the same as the kernel VA, modulo
- * HYP_PAGE_OFFSET.
- */
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
-{
-	unsigned long start = kern_hyp_va((unsigned long)from);
-	unsigned long end = kern_hyp_va((unsigned long)to);
-
-	if (is_kernel_in_hyp_mode())
-		return 0;
-
-	/* Check for a valid kernel IO mapping */
-	if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
-		return -EINVAL;
-
-	return __create_hyp_mappings(hyp_pgd, start, end,
-				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
-}
-
-/**
- * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
- * @kvm:	The KVM struct pointer for the VM.
- *
- * Allocates only the stage-2 HW PGD level table(s) (can support either full
- * 40-bit input addresses or limited to 32-bit input addresses). Clears the
- * allocated pages.
- *
- * Note we don't need locking here as this is only called when the VM is
- * created, which can only be done once.
- */
-int kvm_alloc_stage2_pgd(struct kvm *kvm)
-{
-	pgd_t *pgd;
-
-	if (kvm->arch.pgd != NULL) {
-		kvm_err("kvm_arch already initialized?\n");
-		return -EINVAL;
-	}
-
-	/* Allocate the HW PGD, making sure that each page gets its own refcount */
-	pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
-	if (!pgd)
-		return -ENOMEM;
-
-	kvm->arch.pgd = pgd;
-	return 0;
-}
-
-static void stage2_unmap_memslot(struct kvm *kvm,
-				 struct kvm_memory_slot *memslot)
-{
-	hva_t hva = memslot->userspace_addr;
-	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
-	phys_addr_t size = PAGE_SIZE * memslot->npages;
-	hva_t reg_end = hva + size;
-
-	/*
-	 * A memory region could potentially cover multiple VMAs, and any holes
-	 * between them, so iterate over all of them to find out if we should
-	 * unmap any of them.
-	 *
-	 *     +--------------------------------------------+
-	 * +---------------+----------------+   +----------------+
-	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
-	 * +---------------+----------------+   +----------------+
-	 *     |               memory region                |
-	 *     +--------------------------------------------+
-	 */
-	do {
-		struct vm_area_struct *vma = find_vma(current->mm, hva);
-		hva_t vm_start, vm_end;
-
-		if (!vma || vma->vm_start >= reg_end)
-			break;
-
-		/*
-		 * Take the intersection of this VMA with the memory region
-		 */
-		vm_start = max(hva, vma->vm_start);
-		vm_end = min(reg_end, vma->vm_end);
-
-		if (!(vma->vm_flags & VM_PFNMAP)) {
-			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
-			unmap_stage2_range(kvm, gpa, vm_end - vm_start);
-		}
-		hva = vm_end;
-	} while (hva < reg_end);
-}
-
-/**
- * stage2_unmap_vm - Unmap Stage-2 RAM mappings
- * @kvm: The struct kvm pointer
- *
- * Go through the memregions and unmap any reguler RAM
- * backing memory already mapped to the VM.
- */
-void stage2_unmap_vm(struct kvm *kvm)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-	int idx;
-
-	idx = srcu_read_lock(&kvm->srcu);
-	spin_lock(&kvm->mmu_lock);
-
-	slots = kvm_memslots(kvm);
-	kvm_for_each_memslot(memslot, slots)
-		stage2_unmap_memslot(kvm, memslot);
-
-	spin_unlock(&kvm->mmu_lock);
-	srcu_read_unlock(&kvm->srcu, idx);
-}
-
-/**
- * kvm_free_stage2_pgd - free all stage-2 tables
- * @kvm:	The KVM struct pointer for the VM.
- *
- * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
- * underlying level-2 and level-3 tables before freeing the actual level-1 table
- * and setting the struct pointer to NULL.
- *
- * Note we don't need locking here as this is only called when the VM is
- * destroyed, which can only be done once.
- */
-void kvm_free_stage2_pgd(struct kvm *kvm)
-{
-	if (kvm->arch.pgd == NULL)
-		return;
-
-	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-	/* Free the HW pgd, one page at a time */
-	free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
-	kvm->arch.pgd = NULL;
-}
-
-static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
-			     phys_addr_t addr)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
-	if (WARN_ON(stage2_pgd_none(*pgd))) {
-		if (!cache)
-			return NULL;
-		pud = mmu_memory_cache_alloc(cache);
-		stage2_pgd_populate(pgd, pud);
-		get_page(virt_to_page(pgd));
-	}
-
-	return stage2_pud_offset(pgd, addr);
-}
-
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
-			     phys_addr_t addr)
-{
-	pud_t *pud;
-	pmd_t *pmd;
-
-	pud = stage2_get_pud(kvm, cache, addr);
-	if (stage2_pud_none(*pud)) {
-		if (!cache)
-			return NULL;
-		pmd = mmu_memory_cache_alloc(cache);
-		stage2_pud_populate(pud, pmd);
-		get_page(virt_to_page(pud));
-	}
-
-	return stage2_pmd_offset(pud, addr);
-}
-
-static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
-			       *cache, phys_addr_t addr, const pmd_t *new_pmd)
-{
-	pmd_t *pmd, old_pmd;
-
-	pmd = stage2_get_pmd(kvm, cache, addr);
-	VM_BUG_ON(!pmd);
-
-	/*
-	 * Mapping in huge pages should only happen through a fault.  If a
-	 * page is merged into a transparent huge page, the individual
-	 * subpages of that huge page should be unmapped through MMU
-	 * notifiers before we get here.
-	 *
-	 * Merging of CompoundPages is not supported; they should become
-	 * splitting first, unmapped, merged, and mapped back in on-demand.
-	 */
-	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
-
-	old_pmd = *pmd;
-	if (pmd_present(old_pmd)) {
-		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	} else {
-		get_page(virt_to_page(pmd));
-	}
-
-	kvm_set_pmd(pmd, *new_pmd);
-	return 0;
-}
-
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
-			  phys_addr_t addr, const pte_t *new_pte,
-			  unsigned long flags)
-{
-	pmd_t *pmd;
-	pte_t *pte, old_pte;
-	bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
-	bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
-
-	VM_BUG_ON(logging_active && !cache);
-
-	/* Create stage-2 page table mapping - Levels 0 and 1 */
-	pmd = stage2_get_pmd(kvm, cache, addr);
-	if (!pmd) {
-		/*
-		 * Ignore calls from kvm_set_spte_hva for unallocated
-		 * address ranges.
-		 */
-		return 0;
-	}
-
-	/*
-	 * While dirty page logging - dissolve huge PMD, then continue on to
-	 * allocate page.
-	 */
-	if (logging_active)
-		stage2_dissolve_pmd(kvm, addr, pmd);
-
-	/* Create stage-2 page mappings - Level 2 */
-	if (pmd_none(*pmd)) {
-		if (!cache)
-			return 0; /* ignore calls from kvm_set_spte_hva */
-		pte = mmu_memory_cache_alloc(cache);
-		pmd_populate_kernel(NULL, pmd, pte);
-		get_page(virt_to_page(pmd));
-	}
-
-	pte = pte_offset_kernel(pmd, addr);
-
-	if (iomap && pte_present(*pte))
-		return -EFAULT;
-
-	/* Create 2nd stage page table mapping - Level 3 */
-	old_pte = *pte;
-	if (pte_present(old_pte)) {
-		kvm_set_pte(pte, __pte(0));
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	} else {
-		get_page(virt_to_page(pte));
-	}
-
-	kvm_set_pte(pte, *new_pte);
-	return 0;
-}
-
-#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static int stage2_ptep_test_and_clear_young(pte_t *pte)
-{
-	if (pte_young(*pte)) {
-		*pte = pte_mkold(*pte);
-		return 1;
-	}
-	return 0;
-}
-#else
-static int stage2_ptep_test_and_clear_young(pte_t *pte)
-{
-	return __ptep_test_and_clear_young(pte);
-}
-#endif
-
-static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
-{
-	return stage2_ptep_test_and_clear_young((pte_t *)pmd);
-}
-
-/**
- * kvm_phys_addr_ioremap - map a device range to guest IPA
- *
- * @kvm:	The KVM pointer
- * @guest_ipa:	The IPA at which to insert the mapping
- * @pa:		The physical address of the device
- * @size:	The size of the mapping
- */
-int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
-			  phys_addr_t pa, unsigned long size, bool writable)
-{
-	phys_addr_t addr, end;
-	int ret = 0;
-	unsigned long pfn;
-	struct kvm_mmu_memory_cache cache = { 0, };
-
-	end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
-	pfn = __phys_to_pfn(pa);
-
-	for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
-		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
-
-		if (writable)
-			pte = kvm_s2pte_mkwrite(pte);
-
-		ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
-						KVM_NR_MEM_OBJS);
-		if (ret)
-			goto out;
-		spin_lock(&kvm->mmu_lock);
-		ret = stage2_set_pte(kvm, &cache, addr, &pte,
-						KVM_S2PTE_FLAG_IS_IOMAP);
-		spin_unlock(&kvm->mmu_lock);
-		if (ret)
-			goto out;
-
-		pfn++;
-	}
-
-out:
-	mmu_free_memory_cache(&cache);
-	return ret;
-}
-
-static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
-{
-	kvm_pfn_t pfn = *pfnp;
-	gfn_t gfn = *ipap >> PAGE_SHIFT;
-
-	if (PageTransCompoundMap(pfn_to_page(pfn))) {
-		unsigned long mask;
-		/*
-		 * The address we faulted on is backed by a transparent huge
-		 * page.  However, because we map the compound huge page and
-		 * not the individual tail page, we need to transfer the
-		 * refcount to the head page.  We have to be careful that the
-		 * THP doesn't start to split while we are adjusting the
-		 * refcounts.
-		 *
-		 * We are sure this doesn't happen, because mmu_notifier_retry
-		 * was successful and we are holding the mmu_lock, so if this
-		 * THP is trying to split, it will be blocked in the mmu
-		 * notifier before touching any of the pages, specifically
-		 * before being able to call __split_huge_page_refcount().
-		 *
-		 * We can therefore safely transfer the refcount from PG_tail
-		 * to PG_head and switch the pfn from a tail page to the head
-		 * page accordingly.
-		 */
-		mask = PTRS_PER_PMD - 1;
-		VM_BUG_ON((gfn & mask) != (pfn & mask));
-		if (pfn & mask) {
-			*ipap &= PMD_MASK;
-			kvm_release_pfn_clean(pfn);
-			pfn &= ~mask;
-			kvm_get_pfn(pfn);
-			*pfnp = pfn;
-		}
-
-		return true;
-	}
-
-	return false;
-}
-
-static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
-{
-	if (kvm_vcpu_trap_is_iabt(vcpu))
-		return false;
-
-	return kvm_vcpu_dabt_iswrite(vcpu);
-}
-
-/**
- * stage2_wp_ptes - write protect PMD range
- * @pmd:	pointer to pmd entry
- * @addr:	range start address
- * @end:	range end address
- */
-static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
-{
-	pte_t *pte;
-
-	pte = pte_offset_kernel(pmd, addr);
-	do {
-		if (!pte_none(*pte)) {
-			if (!kvm_s2pte_readonly(pte))
-				kvm_set_s2pte_readonly(pte);
-		}
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-}
-
-/**
- * stage2_wp_pmds - write protect PUD range
- * @pud:	pointer to pud entry
- * @addr:	range start address
- * @end:	range end address
- */
-static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
-{
-	pmd_t *pmd;
-	phys_addr_t next;
-
-	pmd = stage2_pmd_offset(pud, addr);
-
-	do {
-		next = stage2_pmd_addr_end(addr, end);
-		if (!pmd_none(*pmd)) {
-			if (pmd_thp_or_huge(*pmd)) {
-				if (!kvm_s2pmd_readonly(pmd))
-					kvm_set_s2pmd_readonly(pmd);
-			} else {
-				stage2_wp_ptes(pmd, addr, next);
-			}
-		}
-	} while (pmd++, addr = next, addr != end);
-}
-
-/**
-  * stage2_wp_puds - write protect PGD range
-  * @pgd:	pointer to pgd entry
-  * @addr:	range start address
-  * @end:	range end address
-  *
-  * Process PUD entries, for a huge PUD we cause a panic.
-  */
-static void  stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
-{
-	pud_t *pud;
-	phys_addr_t next;
-
-	pud = stage2_pud_offset(pgd, addr);
-	do {
-		next = stage2_pud_addr_end(addr, end);
-		if (!stage2_pud_none(*pud)) {
-			/* TODO:PUD not supported, revisit later if supported */
-			BUG_ON(stage2_pud_huge(*pud));
-			stage2_wp_pmds(pud, addr, next);
-		}
-	} while (pud++, addr = next, addr != end);
-}
-
-/**
- * stage2_wp_range() - write protect stage2 memory region range
- * @kvm:	The KVM pointer
- * @addr:	Start address of range
- * @end:	End address of range
- */
-static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
-{
-	pgd_t *pgd;
-	phys_addr_t next;
-
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
-	do {
-		/*
-		 * Release kvm_mmu_lock periodically if the memory region is
-		 * large. Otherwise, we may see kernel panics with
-		 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
-		 * CONFIG_LOCKDEP. Additionally, holding the lock too long
-		 * will also starve other vCPUs.
-		 */
-		if (need_resched() || spin_needbreak(&kvm->mmu_lock))
-			cond_resched_lock(&kvm->mmu_lock);
-
-		next = stage2_pgd_addr_end(addr, end);
-		if (stage2_pgd_present(*pgd))
-			stage2_wp_puds(pgd, addr, next);
-	} while (pgd++, addr = next, addr != end);
-}
-
-/**
- * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
- * @kvm:	The KVM pointer
- * @slot:	The memory slot to write protect
- *
- * Called to start logging dirty pages after memory region
- * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
- * all present PMD and PTEs are write protected in the memory region.
- * Afterwards read of dirty page log can be called.
- *
- * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
- * serializing operations for VM memory regions.
- */
-void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
-{
-	struct kvm_memslots *slots = kvm_memslots(kvm);
-	struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
-	phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
-	phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
-
-	spin_lock(&kvm->mmu_lock);
-	stage2_wp_range(kvm, start, end);
-	spin_unlock(&kvm->mmu_lock);
-	kvm_flush_remote_tlbs(kvm);
-}
-
-/**
- * kvm_mmu_write_protect_pt_masked() - write protect dirty pages
- * @kvm:	The KVM pointer
- * @slot:	The memory slot associated with mask
- * @gfn_offset:	The gfn offset in memory slot
- * @mask:	The mask of dirty pages at offset 'gfn_offset' in this memory
- *		slot to be write protected
- *
- * Walks bits set in mask write protects the associated pte's. Caller must
- * acquire kvm_mmu_lock.
- */
-static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
-		struct kvm_memory_slot *slot,
-		gfn_t gfn_offset, unsigned long mask)
-{
-	phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
-	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
-	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
-
-	stage2_wp_range(kvm, start, end);
-}
-
-/*
- * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
- * dirty pages.
- *
- * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
- * enable dirty logging for them.
- */
-void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
-		struct kvm_memory_slot *slot,
-		gfn_t gfn_offset, unsigned long mask)
-{
-	kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
-}
-
-static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
-				      unsigned long size)
-{
-	__coherent_cache_guest_page(vcpu, pfn, size);
-}
-
-static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  struct kvm_memory_slot *memslot, unsigned long hva,
-			  unsigned long fault_status)
-{
-	int ret;
-	bool write_fault, writable, hugetlb = false, force_pte = false;
-	unsigned long mmu_seq;
-	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
-	struct vm_area_struct *vma;
-	kvm_pfn_t pfn;
-	pgprot_t mem_type = PAGE_S2;
-	bool logging_active = memslot_is_logging(memslot);
-	unsigned long flags = 0;
-
-	write_fault = kvm_is_write_fault(vcpu);
-	if (fault_status == FSC_PERM && !write_fault) {
-		kvm_err("Unexpected L2 read permission error\n");
-		return -EFAULT;
-	}
-
-	/* Let's check if we will get back a huge page backed by hugetlbfs */
-	down_read(&current->mm->mmap_sem);
-	vma = find_vma_intersection(current->mm, hva, hva + 1);
-	if (unlikely(!vma)) {
-		kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
-		up_read(&current->mm->mmap_sem);
-		return -EFAULT;
-	}
-
-	if (is_vm_hugetlb_page(vma) && !logging_active) {
-		hugetlb = true;
-		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
-	} else {
-		/*
-		 * Pages belonging to memslots that don't have the same
-		 * alignment for userspace and IPA cannot be mapped using
-		 * block descriptors even if the pages belong to a THP for
-		 * the process, because the stage-2 block descriptor will
-		 * cover more than a single THP and we loose atomicity for
-		 * unmapping, updates, and splits of the THP or other pages
-		 * in the stage-2 block range.
-		 */
-		if ((memslot->userspace_addr & ~PMD_MASK) !=
-		    ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
-			force_pte = true;
-	}
-	up_read(&current->mm->mmap_sem);
-
-	/* We need minimum second+third level pages */
-	ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
-				     KVM_NR_MEM_OBJS);
-	if (ret)
-		return ret;
-
-	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	/*
-	 * Ensure the read of mmu_notifier_seq happens before we call
-	 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
-	 * the page we just got a reference to gets unmapped before we have a
-	 * chance to grab the mmu_lock, which ensure that if the page gets
-	 * unmapped afterwards, the call to kvm_unmap_hva will take it away
-	 * from us again properly. This smp_rmb() interacts with the smp_wmb()
-	 * in kvm_mmu_notifier_invalidate_<page|range_end>.
-	 */
-	smp_rmb();
-
-	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
-	if (is_error_noslot_pfn(pfn))
-		return -EFAULT;
-
-	if (kvm_is_device_pfn(pfn)) {
-		mem_type = PAGE_S2_DEVICE;
-		flags |= KVM_S2PTE_FLAG_IS_IOMAP;
-	} else if (logging_active) {
-		/*
-		 * Faults on pages in a memslot with logging enabled
-		 * should not be mapped with huge pages (it introduces churn
-		 * and performance degradation), so force a pte mapping.
-		 */
-		force_pte = true;
-		flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
-
-		/*
-		 * Only actually map the page as writable if this was a write
-		 * fault.
-		 */
-		if (!write_fault)
-			writable = false;
-	}
-
-	spin_lock(&kvm->mmu_lock);
-	if (mmu_notifier_retry(kvm, mmu_seq))
-		goto out_unlock;
-
-	if (!hugetlb && !force_pte)
-		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
-
-	if (hugetlb) {
-		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
-		new_pmd = pmd_mkhuge(new_pmd);
-		if (writable) {
-			new_pmd = kvm_s2pmd_mkwrite(new_pmd);
-			kvm_set_pfn_dirty(pfn);
-		}
-		coherent_cache_guest_page(vcpu, pfn, PMD_SIZE);
-		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
-	} else {
-		pte_t new_pte = pfn_pte(pfn, mem_type);
-
-		if (writable) {
-			new_pte = kvm_s2pte_mkwrite(new_pte);
-			kvm_set_pfn_dirty(pfn);
-			mark_page_dirty(kvm, gfn);
-		}
-		coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE);
-		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
-	}
-
-out_unlock:
-	spin_unlock(&kvm->mmu_lock);
-	kvm_set_pfn_accessed(pfn);
-	kvm_release_pfn_clean(pfn);
-	return ret;
-}
-
-/*
- * Resolve the access fault by making the page young again.
- * Note that because the faulting entry is guaranteed not to be
- * cached in the TLB, we don't need to invalidate anything.
- * Only the HW Access Flag updates are supported for Stage 2 (no DBM),
- * so there is no need for atomic (pte|pmd)_mkyoung operations.
- */
-static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
-{
-	pmd_t *pmd;
-	pte_t *pte;
-	kvm_pfn_t pfn;
-	bool pfn_valid = false;
-
-	trace_kvm_access_fault(fault_ipa);
-
-	spin_lock(&vcpu->kvm->mmu_lock);
-
-	pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
-	if (!pmd || pmd_none(*pmd))	/* Nothing there */
-		goto out;
-
-	if (pmd_thp_or_huge(*pmd)) {	/* THP, HugeTLB */
-		*pmd = pmd_mkyoung(*pmd);
-		pfn = pmd_pfn(*pmd);
-		pfn_valid = true;
-		goto out;
-	}
-
-	pte = pte_offset_kernel(pmd, fault_ipa);
-	if (pte_none(*pte))		/* Nothing there either */
-		goto out;
-
-	*pte = pte_mkyoung(*pte);	/* Just a page... */
-	pfn = pte_pfn(*pte);
-	pfn_valid = true;
-out:
-	spin_unlock(&vcpu->kvm->mmu_lock);
-	if (pfn_valid)
-		kvm_set_pfn_accessed(pfn);
-}
-
-/**
- * kvm_handle_guest_abort - handles all 2nd stage aborts
- * @vcpu:	the VCPU pointer
- * @run:	the kvm_run structure
- *
- * Any abort that gets to the host is almost guaranteed to be caused by a
- * missing second stage translation table entry, which can mean that either the
- * guest simply needs more memory and we must allocate an appropriate page or it
- * can mean that the guest tried to access I/O memory, which is emulated by user
- * space. The distinction is based on the IPA causing the fault and whether this
- * memory region has been registered as standard RAM by user space.
- */
-int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	unsigned long fault_status;
-	phys_addr_t fault_ipa;
-	struct kvm_memory_slot *memslot;
-	unsigned long hva;
-	bool is_iabt, write_fault, writable;
-	gfn_t gfn;
-	int ret, idx;
-
-	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
-	if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) {
-		kvm_inject_vabt(vcpu);
-		return 1;
-	}
-
-	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
-
-	trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
-			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
-
-	/* Check the stage-2 fault is trans. fault or write fault */
-	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
-	if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
-	    fault_status != FSC_ACCESS) {
-		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
-			kvm_vcpu_trap_get_class(vcpu),
-			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
-			(unsigned long)kvm_vcpu_get_hsr(vcpu));
-		return -EFAULT;
-	}
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-	gfn = fault_ipa >> PAGE_SHIFT;
-	memslot = gfn_to_memslot(vcpu->kvm, gfn);
-	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
-	write_fault = kvm_is_write_fault(vcpu);
-	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
-		if (is_iabt) {
-			/* Prefetch Abort on I/O address */
-			kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
-			ret = 1;
-			goto out_unlock;
-		}
-
-		/*
-		 * Check for a cache maintenance operation. Since we
-		 * ended-up here, we know it is outside of any memory
-		 * slot. But we can't find out if that is for a device,
-		 * or if the guest is just being stupid. The only thing
-		 * we know for sure is that this range cannot be cached.
-		 *
-		 * So let's assume that the guest is just being
-		 * cautious, and skip the instruction.
-		 */
-		if (kvm_vcpu_dabt_is_cm(vcpu)) {
-			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-			ret = 1;
-			goto out_unlock;
-		}
-
-		/*
-		 * The IPA is reported as [MAX:12], so we need to
-		 * complement it with the bottom 12 bits from the
-		 * faulting VA. This is always 12 bits, irrespective
-		 * of the page size.
-		 */
-		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
-		ret = io_mem_abort(vcpu, run, fault_ipa);
-		goto out_unlock;
-	}
-
-	/* Userspace should not be able to register out-of-bounds IPAs */
-	VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
-
-	if (fault_status == FSC_ACCESS) {
-		handle_access_fault(vcpu, fault_ipa);
-		ret = 1;
-		goto out_unlock;
-	}
-
-	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
-	if (ret == 0)
-		ret = 1;
-out_unlock:
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	return ret;
-}
-
-static int handle_hva_to_gpa(struct kvm *kvm,
-			     unsigned long start,
-			     unsigned long end,
-			     int (*handler)(struct kvm *kvm,
-					    gpa_t gpa, u64 size,
-					    void *data),
-			     void *data)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-	int ret = 0;
-
-	slots = kvm_memslots(kvm);
-
-	/* we only care about the pages that the guest sees */
-	kvm_for_each_memslot(memslot, slots) {
-		unsigned long hva_start, hva_end;
-		gfn_t gpa;
-
-		hva_start = max(start, memslot->userspace_addr);
-		hva_end = min(end, memslot->userspace_addr +
-					(memslot->npages << PAGE_SHIFT));
-		if (hva_start >= hva_end)
-			continue;
-
-		gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT;
-		ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data);
-	}
-
-	return ret;
-}
-
-static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
-{
-	unmap_stage2_range(kvm, gpa, size);
-	return 0;
-}
-
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
-	unsigned long end = hva + PAGE_SIZE;
-
-	if (!kvm->arch.pgd)
-		return 0;
-
-	trace_kvm_unmap_hva(hva);
-	handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
-	return 0;
-}
-
-int kvm_unmap_hva_range(struct kvm *kvm,
-			unsigned long start, unsigned long end)
-{
-	if (!kvm->arch.pgd)
-		return 0;
-
-	trace_kvm_unmap_hva_range(start, end);
-	handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
-	return 0;
-}
-
-static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
-{
-	pte_t *pte = (pte_t *)data;
-
-	WARN_ON(size != PAGE_SIZE);
-	/*
-	 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
-	 * flag clear because MMU notifiers will have unmapped a huge PMD before
-	 * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and
-	 * therefore stage2_set_pte() never needs to clear out a huge PMD
-	 * through this calling path.
-	 */
-	stage2_set_pte(kvm, NULL, gpa, pte, 0);
-	return 0;
-}
-
-
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
-{
-	unsigned long end = hva + PAGE_SIZE;
-	pte_t stage2_pte;
-
-	if (!kvm->arch.pgd)
-		return;
-
-	trace_kvm_set_spte_hva(hva);
-	stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
-	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
-}
-
-static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
-{
-	pmd_t *pmd;
-	pte_t *pte;
-
-	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
-	pmd = stage2_get_pmd(kvm, NULL, gpa);
-	if (!pmd || pmd_none(*pmd))	/* Nothing there */
-		return 0;
-
-	if (pmd_thp_or_huge(*pmd))	/* THP, HugeTLB */
-		return stage2_pmdp_test_and_clear_young(pmd);
-
-	pte = pte_offset_kernel(pmd, gpa);
-	if (pte_none(*pte))
-		return 0;
-
-	return stage2_ptep_test_and_clear_young(pte);
-}
-
-static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
-{
-	pmd_t *pmd;
-	pte_t *pte;
-
-	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
-	pmd = stage2_get_pmd(kvm, NULL, gpa);
-	if (!pmd || pmd_none(*pmd))	/* Nothing there */
-		return 0;
-
-	if (pmd_thp_or_huge(*pmd))		/* THP, HugeTLB */
-		return pmd_young(*pmd);
-
-	pte = pte_offset_kernel(pmd, gpa);
-	if (!pte_none(*pte))		/* Just a page... */
-		return pte_young(*pte);
-
-	return 0;
-}
-
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
-{
-	trace_kvm_age_hva(start, end);
-	return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
-}
-
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
-	trace_kvm_test_age_hva(hva);
-	return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
-}
-
-void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
-{
-	mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
-}
-
-phys_addr_t kvm_mmu_get_httbr(void)
-{
-	if (__kvm_cpu_uses_extended_idmap())
-		return virt_to_phys(merged_hyp_pgd);
-	else
-		return virt_to_phys(hyp_pgd);
-}
-
-phys_addr_t kvm_get_idmap_vector(void)
-{
-	return hyp_idmap_vector;
-}
-
-static int kvm_map_idmap_text(pgd_t *pgd)
-{
-	int err;
-
-	/* Create the idmap in the boot page tables */
-	err = 	__create_hyp_mappings(pgd,
-				      hyp_idmap_start, hyp_idmap_end,
-				      __phys_to_pfn(hyp_idmap_start),
-				      PAGE_HYP_EXEC);
-	if (err)
-		kvm_err("Failed to idmap %lx-%lx\n",
-			hyp_idmap_start, hyp_idmap_end);
-
-	return err;
-}
-
-int kvm_mmu_init(void)
-{
-	int err;
-
-	hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
-	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
-	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
-
-	/*
-	 * We rely on the linker script to ensure at build time that the HYP
-	 * init code does not cross a page boundary.
-	 */
-	BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
-
-	kvm_info("IDMAP page: %lx\n", hyp_idmap_start);
-	kvm_info("HYP VA range: %lx:%lx\n",
-		 kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
-
-	if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
-	    hyp_idmap_start <  kern_hyp_va(~0UL) &&
-	    hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
-		/*
-		 * The idmap page is intersecting with the VA space,
-		 * it is not safe to continue further.
-		 */
-		kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
-	if (!hyp_pgd) {
-		kvm_err("Hyp mode PGD not allocated\n");
-		err = -ENOMEM;
-		goto out;
-	}
-
-	if (__kvm_cpu_uses_extended_idmap()) {
-		boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-							 hyp_pgd_order);
-		if (!boot_hyp_pgd) {
-			kvm_err("Hyp boot PGD not allocated\n");
-			err = -ENOMEM;
-			goto out;
-		}
-
-		err = kvm_map_idmap_text(boot_hyp_pgd);
-		if (err)
-			goto out;
-
-		merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-		if (!merged_hyp_pgd) {
-			kvm_err("Failed to allocate extra HYP pgd\n");
-			goto out;
-		}
-		__kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
-				    hyp_idmap_start);
-	} else {
-		err = kvm_map_idmap_text(hyp_pgd);
-		if (err)
-			goto out;
-	}
-
-	return 0;
-out:
-	free_hyp_pgds();
-	return err;
-}
-
-void kvm_arch_commit_memory_region(struct kvm *kvm,
-				   const struct kvm_userspace_memory_region *mem,
-				   const struct kvm_memory_slot *old,
-				   const struct kvm_memory_slot *new,
-				   enum kvm_mr_change change)
-{
-	/*
-	 * At this point memslot has been committed and there is an
-	 * allocated dirty_bitmap[], dirty pages will be be tracked while the
-	 * memory slot is write protected.
-	 */
-	if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
-		kvm_mmu_wp_memory_region(kvm, mem->slot);
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot,
-				   const struct kvm_userspace_memory_region *mem,
-				   enum kvm_mr_change change)
-{
-	hva_t hva = mem->userspace_addr;
-	hva_t reg_end = hva + mem->memory_size;
-	bool writable = !(mem->flags & KVM_MEM_READONLY);
-	int ret = 0;
-
-	if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
-			change != KVM_MR_FLAGS_ONLY)
-		return 0;
-
-	/*
-	 * Prevent userspace from creating a memory region outside of the IPA
-	 * space addressable by the KVM guest IPA space.
-	 */
-	if (memslot->base_gfn + memslot->npages >=
-	    (KVM_PHYS_SIZE >> PAGE_SHIFT))
-		return -EFAULT;
-
-	/*
-	 * A memory region could potentially cover multiple VMAs, and any holes
-	 * between them, so iterate over all of them to find out if we can map
-	 * any of them right now.
-	 *
-	 *     +--------------------------------------------+
-	 * +---------------+----------------+   +----------------+
-	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
-	 * +---------------+----------------+   +----------------+
-	 *     |               memory region                |
-	 *     +--------------------------------------------+
-	 */
-	do {
-		struct vm_area_struct *vma = find_vma(current->mm, hva);
-		hva_t vm_start, vm_end;
-
-		if (!vma || vma->vm_start >= reg_end)
-			break;
-
-		/*
-		 * Mapping a read-only VMA is only allowed if the
-		 * memory region is configured as read-only.
-		 */
-		if (writable && !(vma->vm_flags & VM_WRITE)) {
-			ret = -EPERM;
-			break;
-		}
-
-		/*
-		 * Take the intersection of this VMA with the memory region
-		 */
-		vm_start = max(hva, vma->vm_start);
-		vm_end = min(reg_end, vma->vm_end);
-
-		if (vma->vm_flags & VM_PFNMAP) {
-			gpa_t gpa = mem->guest_phys_addr +
-				    (vm_start - mem->userspace_addr);
-			phys_addr_t pa;
-
-			pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
-			pa += vm_start - vma->vm_start;
-
-			/* IO region dirty page logging not allowed */
-			if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
-				return -EINVAL;
-
-			ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
-						    vm_end - vm_start,
-						    writable);
-			if (ret)
-				break;
-		}
-		hva = vm_end;
-	} while (hva < reg_end);
-
-	if (change == KVM_MR_FLAGS_ONLY)
-		return ret;
-
-	spin_lock(&kvm->mmu_lock);
-	if (ret)
-		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
-	else
-		stage2_flush_memslot(kvm, memslot);
-	spin_unlock(&kvm->mmu_lock);
-	return ret;
-}
-
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    unsigned long npages)
-{
-	return 0;
-}
-
-void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
-{
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-	kvm_free_stage2_pgd(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-	gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
-	phys_addr_t size = slot->npages << PAGE_SHIFT;
-
-	spin_lock(&kvm->mmu_lock);
-	unmap_stage2_range(kvm, gpa, size);
-	spin_unlock(&kvm->mmu_lock);
-}
-
-/*
- * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
- *
- * Main problems:
- * - S/W ops are local to a CPU (not broadcast)
- * - We have line migration behind our back (speculation)
- * - System caches don't support S/W at all (damn!)
- *
- * In the face of the above, the best we can do is to try and convert
- * S/W ops to VA ops. Because the guest is not allowed to infer the
- * S/W to PA mapping, it can only use S/W to nuke the whole cache,
- * which is a rather good thing for us.
- *
- * Also, it is only used when turning caches on/off ("The expected
- * usage of the cache maintenance instructions that operate by set/way
- * is associated with the cache maintenance instructions associated
- * with the powerdown and powerup of caches, if this is required by
- * the implementation.").
- *
- * We use the following policy:
- *
- * - If we trap a S/W operation, we enable VM trapping to detect
- *   caches being turned on/off, and do a full clean.
- *
- * - We flush the caches on both caches being turned on and off.
- *
- * - Once the caches are enabled, we stop trapping VM ops.
- */
-void kvm_set_way_flush(struct kvm_vcpu *vcpu)
-{
-	unsigned long hcr = vcpu_get_hcr(vcpu);
-
-	/*
-	 * If this is the first time we do a S/W operation
-	 * (i.e. HCR_TVM not set) flush the whole memory, and set the
-	 * VM trapping.
-	 *
-	 * Otherwise, rely on the VM trapping to wait for the MMU +
-	 * Caches to be turned off. At that point, we'll be able to
-	 * clean the caches again.
-	 */
-	if (!(hcr & HCR_TVM)) {
-		trace_kvm_set_way_flush(*vcpu_pc(vcpu),
-					vcpu_has_cache_enabled(vcpu));
-		stage2_flush_vm(vcpu->kvm);
-		vcpu_set_hcr(vcpu, hcr | HCR_TVM);
-	}
-}
-
-void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
-{
-	bool now_enabled = vcpu_has_cache_enabled(vcpu);
-
-	/*
-	 * If switching the MMU+caches on, need to invalidate the caches.
-	 * If switching it off, need to clean the caches.
-	 * Clean + invalidate does the trick always.
-	 */
-	if (now_enabled != was_enabled)
-		stage2_flush_vm(vcpu->kvm);
-
-	/* Caches are now on, stop trapping VM ops (until a S/W op) */
-	if (now_enabled)
-		vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
-
-	trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
-}
diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c
deleted file mode 100644
index 1a3849da0b4b..000000000000
--- a/arch/arm/kvm/perf.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Based on the x86 implementation.
- *
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/perf_event.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_emulate.h>
-
-static int kvm_is_in_guest(void)
-{
-        return kvm_arm_get_running_vcpu() != NULL;
-}
-
-static int kvm_is_user_mode(void)
-{
-	struct kvm_vcpu *vcpu;
-
-	vcpu = kvm_arm_get_running_vcpu();
-
-	if (vcpu)
-		return !vcpu_mode_priv(vcpu);
-
-	return 0;
-}
-
-static unsigned long kvm_get_guest_ip(void)
-{
-	struct kvm_vcpu *vcpu;
-
-	vcpu = kvm_arm_get_running_vcpu();
-
-	if (vcpu)
-		return *vcpu_pc(vcpu);
-
-	return 0;
-}
-
-static struct perf_guest_info_callbacks kvm_guest_cbs = {
-	.is_in_guest	= kvm_is_in_guest,
-	.is_user_mode	= kvm_is_user_mode,
-	.get_guest_ip	= kvm_get_guest_ip,
-};
-
-int kvm_perf_init(void)
-{
-	return perf_register_guest_info_callbacks(&kvm_guest_cbs);
-}
-
-int kvm_perf_teardown(void)
-{
-	return perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
-}
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
deleted file mode 100644
index a08d7a93aebb..000000000000
--- a/arch/arm/kvm/psci.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
- * Copyright (C) 2012 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/preempt.h>
-#include <linux/kvm_host.h>
-#include <linux/wait.h>
-
-#include <asm/cputype.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_psci.h>
-#include <asm/kvm_host.h>
-
-#include <uapi/linux/psci.h>
-
-/*
- * This is an implementation of the Power State Coordination Interface
- * as described in ARM document number ARM DEN 0022A.
- */
-
-#define AFFINITY_MASK(level)	~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
-
-static unsigned long psci_affinity_mask(unsigned long affinity_level)
-{
-	if (affinity_level <= 3)
-		return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
-
-	return 0;
-}
-
-static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
-{
-	/*
-	 * NOTE: For simplicity, we make VCPU suspend emulation to be
-	 * same-as WFI (Wait-for-interrupt) emulation.
-	 *
-	 * This means for KVM the wakeup events are interrupts and
-	 * this is consistent with intended use of StateID as described
-	 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
-	 *
-	 * Further, we also treat power-down request to be same as
-	 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
-	 * specification (ARM DEN 0022A). This means all suspend states
-	 * for KVM will preserve the register state.
-	 */
-	kvm_vcpu_block(vcpu);
-
-	return PSCI_RET_SUCCESS;
-}
-
-static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.power_off = true;
-}
-
-static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
-{
-	struct kvm *kvm = source_vcpu->kvm;
-	struct kvm_vcpu *vcpu = NULL;
-	struct swait_queue_head *wq;
-	unsigned long cpu_id;
-	unsigned long context_id;
-	phys_addr_t target_pc;
-
-	cpu_id = vcpu_get_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
-	if (vcpu_mode_is_32bit(source_vcpu))
-		cpu_id &= ~((u32) 0);
-
-	vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
-
-	/*
-	 * Make sure the caller requested a valid CPU and that the CPU is
-	 * turned off.
-	 */
-	if (!vcpu)
-		return PSCI_RET_INVALID_PARAMS;
-	if (!vcpu->arch.power_off) {
-		if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
-			return PSCI_RET_ALREADY_ON;
-		else
-			return PSCI_RET_INVALID_PARAMS;
-	}
-
-	target_pc = vcpu_get_reg(source_vcpu, 2);
-	context_id = vcpu_get_reg(source_vcpu, 3);
-
-	kvm_reset_vcpu(vcpu);
-
-	/* Gracefully handle Thumb2 entry point */
-	if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
-		target_pc &= ~((phys_addr_t) 1);
-		vcpu_set_thumb(vcpu);
-	}
-
-	/* Propagate caller endianness */
-	if (kvm_vcpu_is_be(source_vcpu))
-		kvm_vcpu_set_be(vcpu);
-
-	*vcpu_pc(vcpu) = target_pc;
-	/*
-	 * NOTE: We always update r0 (or x0) because for PSCI v0.1
-	 * the general puspose registers are undefined upon CPU_ON.
-	 */
-	vcpu_set_reg(vcpu, 0, context_id);
-	vcpu->arch.power_off = false;
-	smp_mb();		/* Make sure the above is visible */
-
-	wq = kvm_arch_vcpu_wq(vcpu);
-	swake_up(wq);
-
-	return PSCI_RET_SUCCESS;
-}
-
-static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
-{
-	int i, matching_cpus = 0;
-	unsigned long mpidr;
-	unsigned long target_affinity;
-	unsigned long target_affinity_mask;
-	unsigned long lowest_affinity_level;
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_vcpu *tmp;
-
-	target_affinity = vcpu_get_reg(vcpu, 1);
-	lowest_affinity_level = vcpu_get_reg(vcpu, 2);
-
-	/* Determine target affinity mask */
-	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
-	if (!target_affinity_mask)
-		return PSCI_RET_INVALID_PARAMS;
-
-	/* Ignore other bits of target affinity */
-	target_affinity &= target_affinity_mask;
-
-	/*
-	 * If one or more VCPU matching target affinity are running
-	 * then ON else OFF
-	 */
-	kvm_for_each_vcpu(i, tmp, kvm) {
-		mpidr = kvm_vcpu_get_mpidr_aff(tmp);
-		if ((mpidr & target_affinity_mask) == target_affinity) {
-			matching_cpus++;
-			if (!tmp->arch.power_off)
-				return PSCI_0_2_AFFINITY_LEVEL_ON;
-		}
-	}
-
-	if (!matching_cpus)
-		return PSCI_RET_INVALID_PARAMS;
-
-	return PSCI_0_2_AFFINITY_LEVEL_OFF;
-}
-
-static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
-{
-	int i;
-	struct kvm_vcpu *tmp;
-
-	/*
-	 * The KVM ABI specifies that a system event exit may call KVM_RUN
-	 * again and may perform shutdown/reboot at a later time that when the
-	 * actual request is made.  Since we are implementing PSCI and a
-	 * caller of PSCI reboot and shutdown expects that the system shuts
-	 * down or reboots immediately, let's make sure that VCPUs are not run
-	 * after this call is handled and before the VCPUs have been
-	 * re-initialized.
-	 */
-	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
-		tmp->arch.power_off = true;
-		kvm_vcpu_kick(tmp);
-	}
-
-	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
-	vcpu->run->system_event.type = type;
-	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
-}
-
-static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
-{
-	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
-}
-
-static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
-{
-	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
-}
-
-int kvm_psci_version(struct kvm_vcpu *vcpu)
-{
-	if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
-		return KVM_ARM_PSCI_0_2;
-
-	return KVM_ARM_PSCI_0_1;
-}
-
-static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = vcpu->kvm;
-	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
-	unsigned long val;
-	int ret = 1;
-
-	switch (psci_fn) {
-	case PSCI_0_2_FN_PSCI_VERSION:
-		/*
-		 * Bits[31:16] = Major Version = 0
-		 * Bits[15:0] = Minor Version = 2
-		 */
-		val = 2;
-		break;
-	case PSCI_0_2_FN_CPU_SUSPEND:
-	case PSCI_0_2_FN64_CPU_SUSPEND:
-		val = kvm_psci_vcpu_suspend(vcpu);
-		break;
-	case PSCI_0_2_FN_CPU_OFF:
-		kvm_psci_vcpu_off(vcpu);
-		val = PSCI_RET_SUCCESS;
-		break;
-	case PSCI_0_2_FN_CPU_ON:
-	case PSCI_0_2_FN64_CPU_ON:
-		mutex_lock(&kvm->lock);
-		val = kvm_psci_vcpu_on(vcpu);
-		mutex_unlock(&kvm->lock);
-		break;
-	case PSCI_0_2_FN_AFFINITY_INFO:
-	case PSCI_0_2_FN64_AFFINITY_INFO:
-		val = kvm_psci_vcpu_affinity_info(vcpu);
-		break;
-	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
-		/*
-		 * Trusted OS is MP hence does not require migration
-	         * or
-		 * Trusted OS is not present
-		 */
-		val = PSCI_0_2_TOS_MP;
-		break;
-	case PSCI_0_2_FN_SYSTEM_OFF:
-		kvm_psci_system_off(vcpu);
-		/*
-		 * We should'nt be going back to guest VCPU after
-		 * receiving SYSTEM_OFF request.
-		 *
-		 * If user space accidently/deliberately resumes
-		 * guest VCPU after SYSTEM_OFF request then guest
-		 * VCPU should see internal failure from PSCI return
-		 * value. To achieve this, we preload r0 (or x0) with
-		 * PSCI return value INTERNAL_FAILURE.
-		 */
-		val = PSCI_RET_INTERNAL_FAILURE;
-		ret = 0;
-		break;
-	case PSCI_0_2_FN_SYSTEM_RESET:
-		kvm_psci_system_reset(vcpu);
-		/*
-		 * Same reason as SYSTEM_OFF for preloading r0 (or x0)
-		 * with PSCI return value INTERNAL_FAILURE.
-		 */
-		val = PSCI_RET_INTERNAL_FAILURE;
-		ret = 0;
-		break;
-	default:
-		val = PSCI_RET_NOT_SUPPORTED;
-		break;
-	}
-
-	vcpu_set_reg(vcpu, 0, val);
-	return ret;
-}
-
-static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = vcpu->kvm;
-	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
-	unsigned long val;
-
-	switch (psci_fn) {
-	case KVM_PSCI_FN_CPU_OFF:
-		kvm_psci_vcpu_off(vcpu);
-		val = PSCI_RET_SUCCESS;
-		break;
-	case KVM_PSCI_FN_CPU_ON:
-		mutex_lock(&kvm->lock);
-		val = kvm_psci_vcpu_on(vcpu);
-		mutex_unlock(&kvm->lock);
-		break;
-	default:
-		val = PSCI_RET_NOT_SUPPORTED;
-		break;
-	}
-
-	vcpu_set_reg(vcpu, 0, val);
-	return 1;
-}
-
-/**
- * kvm_psci_call - handle PSCI call if r0 value is in range
- * @vcpu: Pointer to the VCPU struct
- *
- * Handle PSCI calls from guests through traps from HVC instructions.
- * The calling convention is similar to SMC calls to the secure world
- * where the function number is placed in r0.
- *
- * This function returns: > 0 (success), 0 (success but exit to user
- * space), and < 0 (errors)
- *
- * Errors:
- * -EINVAL: Unrecognized PSCI function
- */
-int kvm_psci_call(struct kvm_vcpu *vcpu)
-{
-	switch (kvm_psci_version(vcpu)) {
-	case KVM_ARM_PSCI_0_2:
-		return kvm_psci_0_2_call(vcpu);
-	case KVM_ARM_PSCI_0_1:
-		return kvm_psci_0_1_call(vcpu);
-	default:
-		return -EINVAL;
-	};
-}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index c25a88598eb0..fc0943776db2 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -6,133 +6,6 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
 
-/*
- * Tracepoints for entry/exit to guest
- */
-TRACE_EVENT(kvm_entry,
-	TP_PROTO(unsigned long vcpu_pc),
-	TP_ARGS(vcpu_pc),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	vcpu_pc		)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu_pc		= vcpu_pc;
-	),
-
-	TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
-);
-
-TRACE_EVENT(kvm_exit,
-	TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
-	TP_ARGS(idx, exit_reason, vcpu_pc),
-
-	TP_STRUCT__entry(
-		__field(	int,		idx		)
-		__field(	unsigned int,	exit_reason	)
-		__field(	unsigned long,	vcpu_pc		)
-	),
-
-	TP_fast_assign(
-		__entry->idx			= idx;
-		__entry->exit_reason		= exit_reason;
-		__entry->vcpu_pc		= vcpu_pc;
-	),
-
-	TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
-		  __print_symbolic(__entry->idx, kvm_arm_exception_type),
-		  __entry->exit_reason,
-		  __print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
-		  __entry->vcpu_pc)
-);
-
-TRACE_EVENT(kvm_guest_fault,
-	TP_PROTO(unsigned long vcpu_pc, unsigned long hsr,
-		 unsigned long hxfar,
-		 unsigned long long ipa),
-	TP_ARGS(vcpu_pc, hsr, hxfar, ipa),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	vcpu_pc		)
-		__field(	unsigned long,	hsr		)
-		__field(	unsigned long,	hxfar		)
-		__field(   unsigned long long,	ipa		)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu_pc		= vcpu_pc;
-		__entry->hsr			= hsr;
-		__entry->hxfar			= hxfar;
-		__entry->ipa			= ipa;
-	),
-
-	TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
-		  __entry->ipa, __entry->hsr,
-		  __entry->hxfar, __entry->vcpu_pc)
-);
-
-TRACE_EVENT(kvm_access_fault,
-	TP_PROTO(unsigned long ipa),
-	TP_ARGS(ipa),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	ipa		)
-	),
-
-	TP_fast_assign(
-		__entry->ipa		= ipa;
-	),
-
-	TP_printk("IPA: %lx", __entry->ipa)
-);
-
-TRACE_EVENT(kvm_irq_line,
-	TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
-	TP_ARGS(type, vcpu_idx, irq_num, level),
-
-	TP_STRUCT__entry(
-		__field(	unsigned int,	type		)
-		__field(	int,		vcpu_idx	)
-		__field(	int,		irq_num		)
-		__field(	int,		level		)
-	),
-
-	TP_fast_assign(
-		__entry->type		= type;
-		__entry->vcpu_idx	= vcpu_idx;
-		__entry->irq_num	= irq_num;
-		__entry->level		= level;
-	),
-
-	TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d",
-		  (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" :
-		  (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" :
-		  (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN",
-		  __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level)
-);
-
-TRACE_EVENT(kvm_mmio_emulate,
-	TP_PROTO(unsigned long vcpu_pc, unsigned long instr,
-		 unsigned long cpsr),
-	TP_ARGS(vcpu_pc, instr, cpsr),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	vcpu_pc		)
-		__field(	unsigned long,	instr		)
-		__field(	unsigned long,	cpsr		)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu_pc		= vcpu_pc;
-		__entry->instr			= instr;
-		__entry->cpsr			= cpsr;
-	),
-
-	TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
-		  __entry->vcpu_pc, __entry->instr, __entry->cpsr)
-);
-
 /* Architecturally implementation defined CP15 register access */
 TRACE_EVENT(kvm_emulate_cp15_imp,
 	TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
@@ -181,87 +54,6 @@ TRACE_EVENT(kvm_wfx,
 		__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
 );
 
-TRACE_EVENT(kvm_unmap_hva,
-	TP_PROTO(unsigned long hva),
-	TP_ARGS(hva),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	hva		)
-	),
-
-	TP_fast_assign(
-		__entry->hva		= hva;
-	),
-
-	TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
-);
-
-TRACE_EVENT(kvm_unmap_hva_range,
-	TP_PROTO(unsigned long start, unsigned long end),
-	TP_ARGS(start, end),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	start		)
-		__field(	unsigned long,	end		)
-	),
-
-	TP_fast_assign(
-		__entry->start		= start;
-		__entry->end		= end;
-	),
-
-	TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
-		  __entry->start, __entry->end)
-);
-
-TRACE_EVENT(kvm_set_spte_hva,
-	TP_PROTO(unsigned long hva),
-	TP_ARGS(hva),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	hva		)
-	),
-
-	TP_fast_assign(
-		__entry->hva		= hva;
-	),
-
-	TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
-);
-
-TRACE_EVENT(kvm_age_hva,
-	TP_PROTO(unsigned long start, unsigned long end),
-	TP_ARGS(start, end),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	start		)
-		__field(	unsigned long,	end		)
-	),
-
-	TP_fast_assign(
-		__entry->start		= start;
-		__entry->end		= end;
-	),
-
-	TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
-		  __entry->start, __entry->end)
-);
-
-TRACE_EVENT(kvm_test_age_hva,
-	TP_PROTO(unsigned long hva),
-	TP_ARGS(hva),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	hva		)
-	),
-
-	TP_fast_assign(
-		__entry->hva		= hva;
-	),
-
-	TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
-);
-
 TRACE_EVENT(kvm_hvc,
 	TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
 	TP_ARGS(vcpu_pc, r0, imm),
@@ -282,45 +74,6 @@ TRACE_EVENT(kvm_hvc,
 		  __entry->vcpu_pc, __entry->r0, __entry->imm)
 );
 
-TRACE_EVENT(kvm_set_way_flush,
-	    TP_PROTO(unsigned long vcpu_pc, bool cache),
-	    TP_ARGS(vcpu_pc, cache),
-
-	    TP_STRUCT__entry(
-		    __field(	unsigned long,	vcpu_pc		)
-		    __field(	bool,		cache		)
-	    ),
-
-	    TP_fast_assign(
-		    __entry->vcpu_pc		= vcpu_pc;
-		    __entry->cache		= cache;
-	    ),
-
-	    TP_printk("S/W flush at 0x%016lx (cache %s)",
-		      __entry->vcpu_pc, __entry->cache ? "on" : "off")
-);
-
-TRACE_EVENT(kvm_toggle_cache,
-	    TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
-	    TP_ARGS(vcpu_pc, was, now),
-
-	    TP_STRUCT__entry(
-		    __field(	unsigned long,	vcpu_pc		)
-		    __field(	bool,		was		)
-		    __field(	bool,		now		)
-	    ),
-
-	    TP_fast_assign(
-		    __entry->vcpu_pc		= vcpu_pc;
-		    __entry->was		= was;
-		    __entry->now		= now;
-	    ),
-
-	    TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
-		      __entry->vcpu_pc, __entry->was ? "on" : "off",
-		      __entry->now ? "on" : "off")
-);
-
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index afd51bebb9c5..5d9810086c25 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -7,14 +7,13 @@ CFLAGS_arm.o := -I.
 CFLAGS_mmu.o := -I.
 
 KVM=../../../virt/kvm
-ARM=../../../arch/arm/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp/
 
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
 
 kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
new file mode 100644
index 000000000000..7941699a766d
--- /dev/null
+++ b/virt/kvm/arm/arm.c
@@ -0,0 +1,1480 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/cpu_pm.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/kvm.h>
+#include <trace/events/kvm.h>
+#include <kvm/arm_pmu.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/mman.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/virt.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_psci.h>
+#include <asm/sections.h>
+
+#ifdef REQUIRES_VIRT
+__asm__(".arch_extension	virt");
+#endif
+
+static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
+
+/* Per-CPU variable containing the currently running vcpu. */
+static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
+
+/* The VMID used in the VTTBR */
+static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
+static u32 kvm_next_vmid;
+static unsigned int kvm_vmid_bits __read_mostly;
+static DEFINE_SPINLOCK(kvm_vmid_lock);
+
+static bool vgic_present;
+
+static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+
+static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(preemptible());
+	__this_cpu_write(kvm_arm_running_vcpu, vcpu);
+}
+
+/**
+ * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
+ * Must be called from non-preemptible context
+ */
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
+{
+	BUG_ON(preemptible());
+	return __this_cpu_read(kvm_arm_running_vcpu);
+}
+
+/**
+ * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
+ */
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
+{
+	return &kvm_arm_running_vcpu;
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+	*(int *)rtn = 0;
+}
+
+
+/**
+ * kvm_arch_init_vm - initializes a VM data structure
+ * @kvm:	pointer to the KVM struct
+ */
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+	int ret, cpu;
+
+	if (type)
+		return -EINVAL;
+
+	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
+	if (!kvm->arch.last_vcpu_ran)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
+
+	ret = kvm_alloc_stage2_pgd(kvm);
+	if (ret)
+		goto out_fail_alloc;
+
+	ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
+	if (ret)
+		goto out_free_stage2_pgd;
+
+	kvm_vgic_early_init(kvm);
+
+	/* Mark the initial VMID generation invalid */
+	kvm->arch.vmid_gen = 0;
+
+	/* The maximum number of VCPUs is limited by the host's GIC model */
+	kvm->arch.max_vcpus = vgic_present ?
+				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
+
+	return ret;
+out_free_stage2_pgd:
+	kvm_free_stage2_pgd(kvm);
+out_fail_alloc:
+	free_percpu(kvm->arch.last_vcpu_ran);
+	kvm->arch.last_vcpu_ran = NULL;
+	return ret;
+}
+
+bool kvm_arch_has_vcpu_debugfs(void)
+{
+	return false;
+}
+
+int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+	return VM_FAULT_SIGBUS;
+}
+
+
+/**
+ * kvm_arch_destroy_vm - destroy the VM data structure
+ * @kvm:	pointer to the KVM struct
+ */
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	int i;
+
+	free_percpu(kvm->arch.last_vcpu_ran);
+	kvm->arch.last_vcpu_ran = NULL;
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (kvm->vcpus[i]) {
+			kvm_arch_vcpu_free(kvm->vcpus[i]);
+			kvm->vcpus[i] = NULL;
+		}
+	}
+
+	kvm_vgic_destroy(kvm);
+}
+
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+	int r;
+	switch (ext) {
+	case KVM_CAP_IRQCHIP:
+		r = vgic_present;
+		break;
+	case KVM_CAP_IOEVENTFD:
+	case KVM_CAP_DEVICE_CTRL:
+	case KVM_CAP_USER_MEMORY:
+	case KVM_CAP_SYNC_MMU:
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+	case KVM_CAP_ONE_REG:
+	case KVM_CAP_ARM_PSCI:
+	case KVM_CAP_ARM_PSCI_0_2:
+	case KVM_CAP_READONLY_MEM:
+	case KVM_CAP_MP_STATE:
+	case KVM_CAP_IMMEDIATE_EXIT:
+		r = 1;
+		break;
+	case KVM_CAP_COALESCED_MMIO:
+		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+		break;
+	case KVM_CAP_ARM_SET_DEVICE_ADDR:
+		r = 1;
+		break;
+	case KVM_CAP_NR_VCPUS:
+		r = num_online_cpus();
+		break;
+	case KVM_CAP_MAX_VCPUS:
+		r = KVM_MAX_VCPUS;
+		break;
+	case KVM_CAP_NR_MEMSLOTS:
+		r = KVM_USER_MEM_SLOTS;
+		break;
+	case KVM_CAP_MSI_DEVID:
+		if (!kvm)
+			r = -EINVAL;
+		else
+			r = kvm->arch.vgic.msis_require_devid;
+		break;
+	case KVM_CAP_ARM_USER_IRQ:
+		/*
+		 * 1: EL1_VTIMER, EL1_PTIMER, and PMU.
+		 * (bump this number if adding more devices)
+		 */
+		r = 1;
+		break;
+	default:
+		r = kvm_arch_dev_ioctl_check_extension(kvm, ext);
+		break;
+	}
+	return r;
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	int err;
+	struct kvm_vcpu *vcpu;
+
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	if (id >= kvm->arch.max_vcpus) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = kvm_vcpu_init(vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
+
+	err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
+	if (err)
+		goto vcpu_uninit;
+
+	return vcpu;
+vcpu_uninit:
+	kvm_vcpu_uninit(vcpu);
+free_vcpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+	return ERR_PTR(err);
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	kvm_vgic_vcpu_early_init(vcpu);
+}
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	kvm_mmu_free_memory_caches(vcpu);
+	kvm_timer_vcpu_terminate(vcpu);
+	kvm_vgic_vcpu_destroy(vcpu);
+	kvm_pmu_vcpu_destroy(vcpu);
+	kvm_vcpu_uninit(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_arch_vcpu_free(vcpu);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return kvm_timer_should_fire(vcpu_vtimer(vcpu)) ||
+	       kvm_timer_should_fire(vcpu_ptimer(vcpu));
+}
+
+void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
+{
+	kvm_timer_schedule(vcpu);
+}
+
+void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
+{
+	kvm_timer_unschedule(vcpu);
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	/* Force users to call KVM_ARM_VCPU_INIT */
+	vcpu->arch.target = -1;
+	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+
+	/* Set up the timer */
+	kvm_timer_vcpu_init(vcpu);
+
+	kvm_arm_reset_debug_ptr(vcpu);
+
+	return 0;
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	int *last_ran;
+
+	last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
+
+	/*
+	 * We might get preempted before the vCPU actually runs, but
+	 * over-invalidation doesn't affect correctness.
+	 */
+	if (*last_ran != vcpu->vcpu_id) {
+		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
+		*last_ran = vcpu->vcpu_id;
+	}
+
+	vcpu->cpu = cpu;
+	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
+
+	kvm_arm_set_running_vcpu(vcpu);
+
+	kvm_vgic_load(vcpu);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	kvm_vgic_put(vcpu);
+
+	vcpu->cpu = -1;
+
+	kvm_arm_set_running_vcpu(NULL);
+	kvm_timer_vcpu_put(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	if (vcpu->arch.power_off)
+		mp_state->mp_state = KVM_MP_STATE_STOPPED;
+	else
+		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	switch (mp_state->mp_state) {
+	case KVM_MP_STATE_RUNNABLE:
+		vcpu->arch.power_off = false;
+		break;
+	case KVM_MP_STATE_STOPPED:
+		vcpu->arch.power_off = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
+ * @v:		The VCPU pointer
+ *
+ * If the guest CPU is not waiting for interrupts or an interrupt line is
+ * asserted, the CPU is by definition runnable.
+ */
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+{
+	return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
+		&& !v->arch.power_off && !v->arch.pause);
+}
+
+/* Just ensure a guest exit from a particular CPU */
+static void exit_vm_noop(void *info)
+{
+}
+
+void force_vm_exit(const cpumask_t *mask)
+{
+	preempt_disable();
+	smp_call_function_many(mask, exit_vm_noop, NULL, true);
+	preempt_enable();
+}
+
+/**
+ * need_new_vmid_gen - check that the VMID is still valid
+ * @kvm: The VM's VMID to check
+ *
+ * return true if there is a new generation of VMIDs being used
+ *
+ * The hardware supports only 256 values with the value zero reserved for the
+ * host, so we check if an assigned value belongs to a previous generation,
+ * which which requires us to assign a new value. If we're the first to use a
+ * VMID for the new generation, we must flush necessary caches and TLBs on all
+ * CPUs.
+ */
+static bool need_new_vmid_gen(struct kvm *kvm)
+{
+	return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
+}
+
+/**
+ * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
+ * @kvm	The guest that we are about to run
+ *
+ * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
+ * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
+ * caches and TLBs.
+ */
+static void update_vttbr(struct kvm *kvm)
+{
+	phys_addr_t pgd_phys;
+	u64 vmid;
+
+	if (!need_new_vmid_gen(kvm))
+		return;
+
+	spin_lock(&kvm_vmid_lock);
+
+	/*
+	 * We need to re-check the vmid_gen here to ensure that if another vcpu
+	 * already allocated a valid vmid for this vm, then this vcpu should
+	 * use the same vmid.
+	 */
+	if (!need_new_vmid_gen(kvm)) {
+		spin_unlock(&kvm_vmid_lock);
+		return;
+	}
+
+	/* First user of a new VMID generation? */
+	if (unlikely(kvm_next_vmid == 0)) {
+		atomic64_inc(&kvm_vmid_gen);
+		kvm_next_vmid = 1;
+
+		/*
+		 * On SMP we know no other CPUs can use this CPU's or each
+		 * other's VMID after force_vm_exit returns since the
+		 * kvm_vmid_lock blocks them from reentry to the guest.
+		 */
+		force_vm_exit(cpu_all_mask);
+		/*
+		 * Now broadcast TLB + ICACHE invalidation over the inner
+		 * shareable domain to make sure all data structures are
+		 * clean.
+		 */
+		kvm_call_hyp(__kvm_flush_vm_context);
+	}
+
+	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
+	kvm->arch.vmid = kvm_next_vmid;
+	kvm_next_vmid++;
+	kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
+
+	/* update vttbr to be used with the new vmid */
+	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
+	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
+	kvm->arch.vttbr = pgd_phys | vmid;
+
+	spin_unlock(&kvm_vmid_lock);
+}
+
+static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	int ret = 0;
+
+	if (likely(vcpu->arch.has_run_once))
+		return 0;
+
+	vcpu->arch.has_run_once = true;
+
+	/*
+	 * Map the VGIC hardware resources before running a vcpu the first
+	 * time on this VM.
+	 */
+	if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) {
+		ret = kvm_vgic_map_resources(kvm);
+		if (ret)
+			return ret;
+	}
+
+	ret = kvm_timer_enable(vcpu);
+
+	return ret;
+}
+
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+	return vgic_initialized(kvm);
+}
+
+void kvm_arm_halt_guest(struct kvm *kvm)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		vcpu->arch.pause = true;
+	kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT);
+}
+
+void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pause = true;
+	kvm_vcpu_kick(vcpu);
+}
+
+void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
+
+	vcpu->arch.pause = false;
+	swake_up(wq);
+}
+
+void kvm_arm_resume_guest(struct kvm *kvm)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_arm_resume_vcpu(vcpu);
+}
+
+static void vcpu_sleep(struct kvm_vcpu *vcpu)
+{
+	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
+
+	swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
+				       (!vcpu->arch.pause)));
+}
+
+static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.target >= 0;
+}
+
+/**
+ * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
+ * @vcpu:	The VCPU pointer
+ * @run:	The kvm_run structure pointer used for userspace state exchange
+ *
+ * This function is called through the VCPU_RUN ioctl called from user space. It
+ * will execute VM code in a loop until the time slice for the process is used
+ * or some emulation is needed from user space in which case the function will
+ * return with return value 0 and with the kvm_run structure filled in with the
+ * required data for the requested emulation.
+ */
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	int ret;
+	sigset_t sigsaved;
+
+	if (unlikely(!kvm_vcpu_initialized(vcpu)))
+		return -ENOEXEC;
+
+	ret = kvm_vcpu_first_run_init(vcpu);
+	if (ret)
+		return ret;
+
+	if (run->exit_reason == KVM_EXIT_MMIO) {
+		ret = kvm_handle_mmio_return(vcpu, vcpu->run);
+		if (ret)
+			return ret;
+	}
+
+	if (run->immediate_exit)
+		return -EINTR;
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	ret = 1;
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	while (ret > 0) {
+		/*
+		 * Check conditions before entering the guest
+		 */
+		cond_resched();
+
+		update_vttbr(vcpu->kvm);
+
+		if (vcpu->arch.power_off || vcpu->arch.pause)
+			vcpu_sleep(vcpu);
+
+		/*
+		 * Preparing the interrupts to be injected also
+		 * involves poking the GIC, which must be done in a
+		 * non-preemptible context.
+		 */
+		preempt_disable();
+
+		kvm_pmu_flush_hwstate(vcpu);
+
+		kvm_timer_flush_hwstate(vcpu);
+		kvm_vgic_flush_hwstate(vcpu);
+
+		local_irq_disable();
+
+		/*
+		 * If we have a singal pending, or need to notify a userspace
+		 * irqchip about timer or PMU level changes, then we exit (and
+		 * update the timer level state in kvm_timer_update_run
+		 * below).
+		 */
+		if (signal_pending(current) ||
+		    kvm_timer_should_notify_user(vcpu) ||
+		    kvm_pmu_should_notify_user(vcpu)) {
+			ret = -EINTR;
+			run->exit_reason = KVM_EXIT_INTR;
+		}
+
+		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
+			vcpu->arch.power_off || vcpu->arch.pause) {
+			local_irq_enable();
+			kvm_pmu_sync_hwstate(vcpu);
+			kvm_timer_sync_hwstate(vcpu);
+			kvm_vgic_sync_hwstate(vcpu);
+			preempt_enable();
+			continue;
+		}
+
+		kvm_arm_setup_debug(vcpu);
+
+		/**************************************************************
+		 * Enter the guest
+		 */
+		trace_kvm_entry(*vcpu_pc(vcpu));
+		guest_enter_irqoff();
+		vcpu->mode = IN_GUEST_MODE;
+
+		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
+
+		vcpu->mode = OUTSIDE_GUEST_MODE;
+		vcpu->stat.exits++;
+		/*
+		 * Back from guest
+		 *************************************************************/
+
+		kvm_arm_clear_debug(vcpu);
+
+		/*
+		 * We may have taken a host interrupt in HYP mode (ie
+		 * while executing the guest). This interrupt is still
+		 * pending, as we haven't serviced it yet!
+		 *
+		 * We're now back in SVC mode, with interrupts
+		 * disabled.  Enabling the interrupts now will have
+		 * the effect of taking the interrupt again, in SVC
+		 * mode this time.
+		 */
+		local_irq_enable();
+
+		/*
+		 * We do local_irq_enable() before calling guest_exit() so
+		 * that if a timer interrupt hits while running the guest we
+		 * account that tick as being spent in the guest.  We enable
+		 * preemption after calling guest_exit() so that if we get
+		 * preempted we make sure ticks after that is not counted as
+		 * guest time.
+		 */
+		guest_exit();
+		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+
+		/*
+		 * We must sync the PMU and timer state before the vgic state so
+		 * that the vgic can properly sample the updated state of the
+		 * interrupt line.
+		 */
+		kvm_pmu_sync_hwstate(vcpu);
+		kvm_timer_sync_hwstate(vcpu);
+
+		kvm_vgic_sync_hwstate(vcpu);
+
+		preempt_enable();
+
+		ret = handle_exit(vcpu, run, ret);
+	}
+
+	/* Tell userspace about in-kernel device output levels */
+	if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
+		kvm_timer_update_run(vcpu);
+		kvm_pmu_update_run(vcpu);
+	}
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	return ret;
+}
+
+static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
+{
+	int bit_index;
+	bool set;
+	unsigned long *ptr;
+
+	if (number == KVM_ARM_IRQ_CPU_IRQ)
+		bit_index = __ffs(HCR_VI);
+	else /* KVM_ARM_IRQ_CPU_FIQ */
+		bit_index = __ffs(HCR_VF);
+
+	ptr = (unsigned long *)&vcpu->arch.irq_lines;
+	if (level)
+		set = test_and_set_bit(bit_index, ptr);
+	else
+		set = test_and_clear_bit(bit_index, ptr);
+
+	/*
+	 * If we didn't change anything, no need to wake up or kick other CPUs
+	 */
+	if (set == level)
+		return 0;
+
+	/*
+	 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
+	 * trigger a world-switch round on the running physical CPU to set the
+	 * virtual IRQ/FIQ fields in the HCR appropriately.
+	 */
+	kvm_vcpu_kick(vcpu);
+
+	return 0;
+}
+
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
+			  bool line_status)
+{
+	u32 irq = irq_level->irq;
+	unsigned int irq_type, vcpu_idx, irq_num;
+	int nrcpus = atomic_read(&kvm->online_vcpus);
+	struct kvm_vcpu *vcpu = NULL;
+	bool level = irq_level->level;
+
+	irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
+	vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
+	irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
+
+	trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
+
+	switch (irq_type) {
+	case KVM_ARM_IRQ_TYPE_CPU:
+		if (irqchip_in_kernel(kvm))
+			return -ENXIO;
+
+		if (vcpu_idx >= nrcpus)
+			return -EINVAL;
+
+		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+		if (!vcpu)
+			return -EINVAL;
+
+		if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
+			return -EINVAL;
+
+		return vcpu_interrupt_line(vcpu, irq_num, level);
+	case KVM_ARM_IRQ_TYPE_PPI:
+		if (!irqchip_in_kernel(kvm))
+			return -ENXIO;
+
+		if (vcpu_idx >= nrcpus)
+			return -EINVAL;
+
+		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+		if (!vcpu)
+			return -EINVAL;
+
+		if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
+			return -EINVAL;
+
+		return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level);
+	case KVM_ARM_IRQ_TYPE_SPI:
+		if (!irqchip_in_kernel(kvm))
+			return -ENXIO;
+
+		if (irq_num < VGIC_NR_PRIVATE_IRQS)
+			return -EINVAL;
+
+		return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
+	}
+
+	return -EINVAL;
+}
+
+static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			       const struct kvm_vcpu_init *init)
+{
+	unsigned int i;
+	int phys_target = kvm_target_cpu();
+
+	if (init->target != phys_target)
+		return -EINVAL;
+
+	/*
+	 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+	 * use the same target.
+	 */
+	if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
+		return -EINVAL;
+
+	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+	for (i = 0; i < sizeof(init->features) * 8; i++) {
+		bool set = (init->features[i / 32] & (1 << (i % 32)));
+
+		if (set && i >= KVM_VCPU_MAX_FEATURES)
+			return -ENOENT;
+
+		/*
+		 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+		 * use the same feature set.
+		 */
+		if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
+		    test_bit(i, vcpu->arch.features) != set)
+			return -EINVAL;
+
+		if (set)
+			set_bit(i, vcpu->arch.features);
+	}
+
+	vcpu->arch.target = phys_target;
+
+	/* Now we know what it is, we can reset it. */
+	return kvm_reset_vcpu(vcpu);
+}
+
+
+static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
+					 struct kvm_vcpu_init *init)
+{
+	int ret;
+
+	ret = kvm_vcpu_set_target(vcpu, init);
+	if (ret)
+		return ret;
+
+	/*
+	 * Ensure a rebooted VM will fault in RAM pages and detect if the
+	 * guest MMU is turned off and flush the caches as needed.
+	 */
+	if (vcpu->arch.has_run_once)
+		stage2_unmap_vm(vcpu->kvm);
+
+	vcpu_reset_hcr(vcpu);
+
+	/*
+	 * Handle the "start in power-off" case.
+	 */
+	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+		vcpu->arch.power_off = true;
+	else
+		vcpu->arch.power_off = false;
+
+	return 0;
+}
+
+static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
+				 struct kvm_device_attr *attr)
+{
+	int ret = -ENXIO;
+
+	switch (attr->group) {
+	default:
+		ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
+		break;
+	}
+
+	return ret;
+}
+
+static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
+				 struct kvm_device_attr *attr)
+{
+	int ret = -ENXIO;
+
+	switch (attr->group) {
+	default:
+		ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
+		break;
+	}
+
+	return ret;
+}
+
+static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
+				 struct kvm_device_attr *attr)
+{
+	int ret = -ENXIO;
+
+	switch (attr->group) {
+	default:
+		ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
+		break;
+	}
+
+	return ret;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	struct kvm_device_attr attr;
+
+	switch (ioctl) {
+	case KVM_ARM_VCPU_INIT: {
+		struct kvm_vcpu_init init;
+
+		if (copy_from_user(&init, argp, sizeof(init)))
+			return -EFAULT;
+
+		return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
+	}
+	case KVM_SET_ONE_REG:
+	case KVM_GET_ONE_REG: {
+		struct kvm_one_reg reg;
+
+		if (unlikely(!kvm_vcpu_initialized(vcpu)))
+			return -ENOEXEC;
+
+		if (copy_from_user(&reg, argp, sizeof(reg)))
+			return -EFAULT;
+		if (ioctl == KVM_SET_ONE_REG)
+			return kvm_arm_set_reg(vcpu, &reg);
+		else
+			return kvm_arm_get_reg(vcpu, &reg);
+	}
+	case KVM_GET_REG_LIST: {
+		struct kvm_reg_list __user *user_list = argp;
+		struct kvm_reg_list reg_list;
+		unsigned n;
+
+		if (unlikely(!kvm_vcpu_initialized(vcpu)))
+			return -ENOEXEC;
+
+		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
+			return -EFAULT;
+		n = reg_list.n;
+		reg_list.n = kvm_arm_num_regs(vcpu);
+		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
+			return -EFAULT;
+		if (n < reg_list.n)
+			return -E2BIG;
+		return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
+	}
+	case KVM_SET_DEVICE_ATTR: {
+		if (copy_from_user(&attr, argp, sizeof(attr)))
+			return -EFAULT;
+		return kvm_arm_vcpu_set_attr(vcpu, &attr);
+	}
+	case KVM_GET_DEVICE_ATTR: {
+		if (copy_from_user(&attr, argp, sizeof(attr)))
+			return -EFAULT;
+		return kvm_arm_vcpu_get_attr(vcpu, &attr);
+	}
+	case KVM_HAS_DEVICE_ATTR: {
+		if (copy_from_user(&attr, argp, sizeof(attr)))
+			return -EFAULT;
+		return kvm_arm_vcpu_has_attr(vcpu, &attr);
+	}
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
+ *
+ * Steps 1-4 below provide general overview of dirty page logging. See
+ * kvm_get_dirty_log_protect() function description for additional details.
+ *
+ * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
+ * always flush the TLB (step 4) even if previous step failed  and the dirty
+ * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
+ * does not preclude user space subsequent dirty log read. Flushing TLB ensures
+ * writes will be marked dirty for next log read.
+ *
+ *   1. Take a snapshot of the bit and clear it if needed.
+ *   2. Write protect the corresponding page.
+ *   3. Copy the snapshot to the userspace.
+ *   4. Flush TLB's if needed.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+	bool is_dirty = false;
+	int r;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
+
+	if (is_dirty)
+		kvm_flush_remote_tlbs(kvm);
+
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
+					struct kvm_arm_device_addr *dev_addr)
+{
+	unsigned long dev_id, type;
+
+	dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
+		KVM_ARM_DEVICE_ID_SHIFT;
+	type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
+		KVM_ARM_DEVICE_TYPE_SHIFT;
+
+	switch (dev_id) {
+	case KVM_ARM_DEVICE_VGIC_V2:
+		if (!vgic_present)
+			return -ENXIO;
+		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
+	default:
+		return -ENODEV;
+	}
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		       unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+
+	switch (ioctl) {
+	case KVM_CREATE_IRQCHIP: {
+		int ret;
+		if (!vgic_present)
+			return -ENXIO;
+		mutex_lock(&kvm->lock);
+		ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+		mutex_unlock(&kvm->lock);
+		return ret;
+	}
+	case KVM_ARM_SET_DEVICE_ADDR: {
+		struct kvm_arm_device_addr dev_addr;
+
+		if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
+			return -EFAULT;
+		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
+	}
+	case KVM_ARM_PREFERRED_TARGET: {
+		int err;
+		struct kvm_vcpu_init init;
+
+		err = kvm_vcpu_preferred_target(&init);
+		if (err)
+			return err;
+
+		if (copy_to_user(argp, &init, sizeof(init)))
+			return -EFAULT;
+
+		return 0;
+	}
+	default:
+		return -EINVAL;
+	}
+}
+
+static void cpu_init_hyp_mode(void *dummy)
+{
+	phys_addr_t pgd_ptr;
+	unsigned long hyp_stack_ptr;
+	unsigned long stack_page;
+	unsigned long vector_ptr;
+
+	/* Switch from the HYP stub to our own HYP init vector */
+	__hyp_set_vectors(kvm_get_idmap_vector());
+
+	pgd_ptr = kvm_mmu_get_httbr();
+	stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
+	hyp_stack_ptr = stack_page + PAGE_SIZE;
+	vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
+
+	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
+	__cpu_init_stage2();
+
+	if (is_kernel_in_hyp_mode())
+		kvm_timer_init_vhe();
+
+	kvm_arm_init_debug();
+}
+
+static void cpu_hyp_reset(void)
+{
+	if (!is_kernel_in_hyp_mode())
+		__hyp_reset_vectors();
+}
+
+static void cpu_hyp_reinit(void)
+{
+	cpu_hyp_reset();
+
+	if (is_kernel_in_hyp_mode()) {
+		/*
+		 * __cpu_init_stage2() is safe to call even if the PM
+		 * event was cancelled before the CPU was reset.
+		 */
+		__cpu_init_stage2();
+	} else {
+		cpu_init_hyp_mode(NULL);
+	}
+}
+
+static void _kvm_arch_hardware_enable(void *discard)
+{
+	if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
+		cpu_hyp_reinit();
+		__this_cpu_write(kvm_arm_hardware_enabled, 1);
+	}
+}
+
+int kvm_arch_hardware_enable(void)
+{
+	_kvm_arch_hardware_enable(NULL);
+	return 0;
+}
+
+static void _kvm_arch_hardware_disable(void *discard)
+{
+	if (__this_cpu_read(kvm_arm_hardware_enabled)) {
+		cpu_hyp_reset();
+		__this_cpu_write(kvm_arm_hardware_enabled, 0);
+	}
+}
+
+void kvm_arch_hardware_disable(void)
+{
+	_kvm_arch_hardware_disable(NULL);
+}
+
+#ifdef CONFIG_CPU_PM
+static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
+				    unsigned long cmd,
+				    void *v)
+{
+	/*
+	 * kvm_arm_hardware_enabled is left with its old value over
+	 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
+	 * re-enable hyp.
+	 */
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		if (__this_cpu_read(kvm_arm_hardware_enabled))
+			/*
+			 * don't update kvm_arm_hardware_enabled here
+			 * so that the hardware will be re-enabled
+			 * when we resume. See below.
+			 */
+			cpu_hyp_reset();
+
+		return NOTIFY_OK;
+	case CPU_PM_EXIT:
+		if (__this_cpu_read(kvm_arm_hardware_enabled))
+			/* The hardware was enabled before suspend. */
+			cpu_hyp_reinit();
+
+		return NOTIFY_OK;
+
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+static struct notifier_block hyp_init_cpu_pm_nb = {
+	.notifier_call = hyp_init_cpu_pm_notifier,
+};
+
+static void __init hyp_cpu_pm_init(void)
+{
+	cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
+}
+static void __init hyp_cpu_pm_exit(void)
+{
+	cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
+}
+#else
+static inline void hyp_cpu_pm_init(void)
+{
+}
+static inline void hyp_cpu_pm_exit(void)
+{
+}
+#endif
+
+static void teardown_common_resources(void)
+{
+	free_percpu(kvm_host_cpu_state);
+}
+
+static int init_common_resources(void)
+{
+	kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
+	if (!kvm_host_cpu_state) {
+		kvm_err("Cannot allocate host CPU state\n");
+		return -ENOMEM;
+	}
+
+	/* set size of VMID supported by CPU */
+	kvm_vmid_bits = kvm_get_vmid_bits();
+	kvm_info("%d-bit VMID\n", kvm_vmid_bits);
+
+	return 0;
+}
+
+static int init_subsystems(void)
+{
+	int err = 0;
+
+	/*
+	 * Enable hardware so that subsystem initialisation can access EL2.
+	 */
+	on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
+
+	/*
+	 * Register CPU lower-power notifier
+	 */
+	hyp_cpu_pm_init();
+
+	/*
+	 * Init HYP view of VGIC
+	 */
+	err = kvm_vgic_hyp_init();
+	switch (err) {
+	case 0:
+		vgic_present = true;
+		break;
+	case -ENODEV:
+	case -ENXIO:
+		vgic_present = false;
+		err = 0;
+		break;
+	default:
+		goto out;
+	}
+
+	/*
+	 * Init HYP architected timer support
+	 */
+	err = kvm_timer_hyp_init();
+	if (err)
+		goto out;
+
+	kvm_perf_init();
+	kvm_coproc_table_init();
+
+out:
+	on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
+
+	return err;
+}
+
+static void teardown_hyp_mode(void)
+{
+	int cpu;
+
+	if (is_kernel_in_hyp_mode())
+		return;
+
+	free_hyp_pgds();
+	for_each_possible_cpu(cpu)
+		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+	hyp_cpu_pm_exit();
+}
+
+static int init_vhe_mode(void)
+{
+	kvm_info("VHE mode initialized successfully\n");
+	return 0;
+}
+
+/**
+ * Inits Hyp-mode on all online CPUs
+ */
+static int init_hyp_mode(void)
+{
+	int cpu;
+	int err = 0;
+
+	/*
+	 * Allocate Hyp PGD and setup Hyp identity mapping
+	 */
+	err = kvm_mmu_init();
+	if (err)
+		goto out_err;
+
+	/*
+	 * Allocate stack pages for Hypervisor-mode
+	 */
+	for_each_possible_cpu(cpu) {
+		unsigned long stack_page;
+
+		stack_page = __get_free_page(GFP_KERNEL);
+		if (!stack_page) {
+			err = -ENOMEM;
+			goto out_err;
+		}
+
+		per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
+	}
+
+	/*
+	 * Map the Hyp-code called directly from the host
+	 */
+	err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
+				  kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
+	if (err) {
+		kvm_err("Cannot map world-switch code\n");
+		goto out_err;
+	}
+
+	err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
+				  kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
+	if (err) {
+		kvm_err("Cannot map rodata section\n");
+		goto out_err;
+	}
+
+	err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
+				  kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
+	if (err) {
+		kvm_err("Cannot map bss section\n");
+		goto out_err;
+	}
+
+	/*
+	 * Map the Hyp stack pages
+	 */
+	for_each_possible_cpu(cpu) {
+		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
+		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
+					  PAGE_HYP);
+
+		if (err) {
+			kvm_err("Cannot map hyp stack\n");
+			goto out_err;
+		}
+	}
+
+	for_each_possible_cpu(cpu) {
+		kvm_cpu_context_t *cpu_ctxt;
+
+		cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
+		err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
+
+		if (err) {
+			kvm_err("Cannot map host CPU state: %d\n", err);
+			goto out_err;
+		}
+	}
+
+	kvm_info("Hyp mode initialized successfully\n");
+
+	return 0;
+
+out_err:
+	teardown_hyp_mode();
+	kvm_err("error initializing Hyp mode: %d\n", err);
+	return err;
+}
+
+static void check_kvm_target_cpu(void *ret)
+{
+	*(int *)ret = kvm_target_cpu();
+}
+
+struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	mpidr &= MPIDR_HWID_BITMASK;
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
+			return vcpu;
+	}
+	return NULL;
+}
+
+/**
+ * Initialize Hyp-mode and memory mappings on all CPUs.
+ */
+int kvm_arch_init(void *opaque)
+{
+	int err;
+	int ret, cpu;
+
+	if (!is_hyp_mode_available()) {
+		kvm_err("HYP mode not available\n");
+		return -ENODEV;
+	}
+
+	for_each_online_cpu(cpu) {
+		smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
+		if (ret < 0) {
+			kvm_err("Error, CPU %d not supported!\n", cpu);
+			return -ENODEV;
+		}
+	}
+
+	err = init_common_resources();
+	if (err)
+		return err;
+
+	if (is_kernel_in_hyp_mode())
+		err = init_vhe_mode();
+	else
+		err = init_hyp_mode();
+	if (err)
+		goto out_err;
+
+	err = init_subsystems();
+	if (err)
+		goto out_hyp;
+
+	return 0;
+
+out_hyp:
+	teardown_hyp_mode();
+out_err:
+	teardown_common_resources();
+	return err;
+}
+
+/* NOP: Compiling as a module not supported */
+void kvm_arch_exit(void)
+{
+	kvm_perf_teardown();
+}
+
+static int arm_init(void)
+{
+	int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	return rc;
+}
+
+module_init(arm_init);
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
new file mode 100644
index 000000000000..b6e715fd3c90
--- /dev/null
+++ b/virt/kvm/arm/mmio.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmio.h>
+#include <asm/kvm_emulate.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data)
+{
+	void *datap = NULL;
+	union {
+		u8	byte;
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		tmp.byte	= data;
+		datap		= &tmp.byte;
+		break;
+	case 2:
+		tmp.hword	= data;
+		datap		= &tmp.hword;
+		break;
+	case 4:
+		tmp.word	= data;
+		datap		= &tmp.word;
+		break;
+	case 8:
+		tmp.dword	= data;
+		datap		= &tmp.dword;
+		break;
+	}
+
+	memcpy(buf, datap, len);
+}
+
+unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
+{
+	unsigned long data = 0;
+	union {
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		data = *(u8 *)buf;
+		break;
+	case 2:
+		memcpy(&tmp.hword, buf, len);
+		data = tmp.hword;
+		break;
+	case 4:
+		memcpy(&tmp.word, buf, len);
+		data = tmp.word;
+		break;
+	case 8:
+		memcpy(&tmp.dword, buf, len);
+		data = tmp.dword;
+		break;
+	}
+
+	return data;
+}
+
+/**
+ * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
+ *			     or in-kernel IO emulation
+ *
+ * @vcpu: The VCPU pointer
+ * @run:  The VCPU run struct containing the mmio data
+ */
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	unsigned long data;
+	unsigned int len;
+	int mask;
+
+	if (!run->mmio.is_write) {
+		len = run->mmio.len;
+		if (len > sizeof(unsigned long))
+			return -EINVAL;
+
+		data = kvm_mmio_read_buf(run->mmio.data, len);
+
+		if (vcpu->arch.mmio_decode.sign_extend &&
+		    len < sizeof(unsigned long)) {
+			mask = 1U << ((len * 8) - 1);
+			data = (data ^ mask) - mask;
+		}
+
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+			       data);
+		data = vcpu_data_host_to_guest(vcpu, data, len);
+		vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
+	}
+
+	return 0;
+}
+
+static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
+{
+	unsigned long rt;
+	int access_size;
+	bool sign_extend;
+
+	if (kvm_vcpu_dabt_iss1tw(vcpu)) {
+		/* page table accesses IO mem: tell guest to fix its TTBR */
+		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+		return 1;
+	}
+
+	access_size = kvm_vcpu_dabt_get_as(vcpu);
+	if (unlikely(access_size < 0))
+		return access_size;
+
+	*is_write = kvm_vcpu_dabt_iswrite(vcpu);
+	sign_extend = kvm_vcpu_dabt_issext(vcpu);
+	rt = kvm_vcpu_dabt_get_rd(vcpu);
+
+	*len = access_size;
+	vcpu->arch.mmio_decode.sign_extend = sign_extend;
+	vcpu->arch.mmio_decode.rt = rt;
+
+	/*
+	 * The MMIO instruction is emulated and should not be re-executed
+	 * in the guest.
+	 */
+	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+	return 0;
+}
+
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		 phys_addr_t fault_ipa)
+{
+	unsigned long data;
+	unsigned long rt;
+	int ret;
+	bool is_write;
+	int len;
+	u8 data_buf[8];
+
+	/*
+	 * Prepare MMIO operation. First decode the syndrome data we get
+	 * from the CPU. Then try if some in-kernel emulation feels
+	 * responsible, otherwise let user space do its magic.
+	 */
+	if (kvm_vcpu_dabt_isvalid(vcpu)) {
+		ret = decode_hsr(vcpu, &is_write, &len);
+		if (ret)
+			return ret;
+	} else {
+		kvm_err("load/store instruction decoding not implemented\n");
+		return -ENOSYS;
+	}
+
+	rt = vcpu->arch.mmio_decode.rt;
+
+	if (is_write) {
+		data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
+					       len);
+
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+		kvm_mmio_write_buf(data_buf, len, data);
+
+		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+				       data_buf);
+	} else {
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
+			       fault_ipa, 0);
+
+		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+				      data_buf);
+	}
+
+	/* Now prepare kvm_run for the potential return to userland. */
+	run->mmio.is_write	= is_write;
+	run->mmio.phys_addr	= fault_ipa;
+	run->mmio.len		= len;
+
+	if (!ret) {
+		/* We handled the access successfully in the kernel. */
+		if (!is_write)
+			memcpy(run->mmio.data, data_buf, len);
+		vcpu->stat.mmio_exit_kernel++;
+		kvm_handle_mmio_return(vcpu, run);
+		return 1;
+	}
+
+	if (is_write)
+		memcpy(run->mmio.data, data_buf, len);
+	vcpu->stat.mmio_exit_user++;
+	run->exit_reason	= KVM_EXIT_MMIO;
+	return 0;
+}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
new file mode 100644
index 000000000000..efb4335aa5c4
--- /dev/null
+++ b/virt/kvm/arm/mmu.c
@@ -0,0 +1,1958 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/mman.h>
+#include <linux/kvm_host.h>
+#include <linux/io.h>
+#include <linux/hugetlb.h>
+#include <trace/events/kvm.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_mmio.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/virt.h>
+
+#include "trace.h"
+
+static pgd_t *boot_hyp_pgd;
+static pgd_t *hyp_pgd;
+static pgd_t *merged_hyp_pgd;
+static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
+
+static unsigned long hyp_idmap_start;
+static unsigned long hyp_idmap_end;
+static phys_addr_t hyp_idmap_vector;
+
+#define S2_PGD_SIZE	(PTRS_PER_S2_PGD * sizeof(pgd_t))
+#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+
+#define KVM_S2PTE_FLAG_IS_IOMAP		(1UL << 0)
+#define KVM_S2_FLAG_LOGGING_ACTIVE	(1UL << 1)
+
+static bool memslot_is_logging(struct kvm_memory_slot *memslot)
+{
+	return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
+}
+
+/**
+ * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * @kvm:	pointer to kvm structure.
+ *
+ * Interface to HYP function to flush all VM TLB entries
+ */
+void kvm_flush_remote_tlbs(struct kvm *kvm)
+{
+	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
+}
+
+static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+{
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+}
+
+/*
+ * D-Cache management functions. They take the page table entries by
+ * value, as they are flushing the cache using the kernel mapping (or
+ * kmap on 32bit).
+ */
+static void kvm_flush_dcache_pte(pte_t pte)
+{
+	__kvm_flush_dcache_pte(pte);
+}
+
+static void kvm_flush_dcache_pmd(pmd_t pmd)
+{
+	__kvm_flush_dcache_pmd(pmd);
+}
+
+static void kvm_flush_dcache_pud(pud_t pud)
+{
+	__kvm_flush_dcache_pud(pud);
+}
+
+static bool kvm_is_device_pfn(unsigned long pfn)
+{
+	return !pfn_valid(pfn);
+}
+
+/**
+ * stage2_dissolve_pmd() - clear and flush huge PMD entry
+ * @kvm:	pointer to kvm structure.
+ * @addr:	IPA
+ * @pmd:	pmd pointer for IPA
+ *
+ * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
+ * pages in the range dirty.
+ */
+static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
+{
+	if (!pmd_thp_or_huge(*pmd))
+		return;
+
+	pmd_clear(pmd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	put_page(virt_to_page(pmd));
+}
+
+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
+				  int min, int max)
+{
+	void *page;
+
+	BUG_ON(max > KVM_NR_MEM_OBJS);
+	if (cache->nobjs >= min)
+		return 0;
+	while (cache->nobjs < max) {
+		page = (void *)__get_free_page(PGALLOC_GFP);
+		if (!page)
+			return -ENOMEM;
+		cache->objects[cache->nobjs++] = page;
+	}
+	return 0;
+}
+
+static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
+{
+	while (mc->nobjs)
+		free_page((unsigned long)mc->objects[--mc->nobjs]);
+}
+
+static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
+{
+	void *p;
+
+	BUG_ON(!mc || !mc->nobjs);
+	p = mc->objects[--mc->nobjs];
+	return p;
+}
+
+static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+{
+	pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
+	stage2_pgd_clear(pgd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	stage2_pud_free(pud_table);
+	put_page(virt_to_page(pgd));
+}
+
+static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
+{
+	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
+	VM_BUG_ON(stage2_pud_huge(*pud));
+	stage2_pud_clear(pud);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	stage2_pmd_free(pmd_table);
+	put_page(virt_to_page(pud));
+}
+
+static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+{
+	pte_t *pte_table = pte_offset_kernel(pmd, 0);
+	VM_BUG_ON(pmd_thp_or_huge(*pmd));
+	pmd_clear(pmd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pte_free_kernel(NULL, pte_table);
+	put_page(virt_to_page(pmd));
+}
+
+/*
+ * Unmapping vs dcache management:
+ *
+ * If a guest maps certain memory pages as uncached, all writes will
+ * bypass the data cache and go directly to RAM.  However, the CPUs
+ * can still speculate reads (not writes) and fill cache lines with
+ * data.
+ *
+ * Those cache lines will be *clean* cache lines though, so a
+ * clean+invalidate operation is equivalent to an invalidate
+ * operation, because no cache lines are marked dirty.
+ *
+ * Those clean cache lines could be filled prior to an uncached write
+ * by the guest, and the cache coherent IO subsystem would therefore
+ * end up writing old data to disk.
+ *
+ * This is why right after unmapping a page/section and invalidating
+ * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
+ * the IO subsystem will never hit in the cache.
+ */
+static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
+		       phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t start_addr = addr;
+	pte_t *pte, *start_pte;
+
+	start_pte = pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			pte_t old_pte = *pte;
+
+			kvm_set_pte(pte, __pte(0));
+			kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+			/* No need to invalidate the cache for device mappings */
+			if (!kvm_is_device_pfn(pte_pfn(old_pte)))
+				kvm_flush_dcache_pte(old_pte);
+
+			put_page(virt_to_page(pte));
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	if (stage2_pte_table_empty(start_pte))
+		clear_stage2_pmd_entry(kvm, pmd, start_addr);
+}
+
+static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
+		       phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t next, start_addr = addr;
+	pmd_t *pmd, *start_pmd;
+
+	start_pmd = pmd = stage2_pmd_offset(pud, addr);
+	do {
+		next = stage2_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (pmd_thp_or_huge(*pmd)) {
+				pmd_t old_pmd = *pmd;
+
+				pmd_clear(pmd);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+				kvm_flush_dcache_pmd(old_pmd);
+
+				put_page(virt_to_page(pmd));
+			} else {
+				unmap_stage2_ptes(kvm, pmd, addr, next);
+			}
+		}
+	} while (pmd++, addr = next, addr != end);
+
+	if (stage2_pmd_table_empty(start_pmd))
+		clear_stage2_pud_entry(kvm, pud, start_addr);
+}
+
+static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
+		       phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t next, start_addr = addr;
+	pud_t *pud, *start_pud;
+
+	start_pud = pud = stage2_pud_offset(pgd, addr);
+	do {
+		next = stage2_pud_addr_end(addr, end);
+		if (!stage2_pud_none(*pud)) {
+			if (stage2_pud_huge(*pud)) {
+				pud_t old_pud = *pud;
+
+				stage2_pud_clear(pud);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				kvm_flush_dcache_pud(old_pud);
+				put_page(virt_to_page(pud));
+			} else {
+				unmap_stage2_pmds(kvm, pud, addr, next);
+			}
+		}
+	} while (pud++, addr = next, addr != end);
+
+	if (stage2_pud_table_empty(start_pud))
+		clear_stage2_pgd_entry(kvm, pgd, start_addr);
+}
+
+/**
+ * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
+ * @kvm:   The VM pointer
+ * @start: The intermediate physical base address of the range to unmap
+ * @size:  The size of the area to unmap
+ *
+ * Clear a range of stage-2 mappings, lowering the various ref-counts.  Must
+ * be called while holding mmu_lock (unless for freeing the stage2 pgd before
+ * destroying the VM), otherwise another faulting VCPU may come in and mess
+ * with things behind our backs.
+ */
+static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
+{
+	pgd_t *pgd;
+	phys_addr_t addr = start, end = start + size;
+	phys_addr_t next;
+
+	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	do {
+		next = stage2_pgd_addr_end(addr, end);
+		if (!stage2_pgd_none(*pgd))
+			unmap_stage2_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pte_t *pte;
+
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
+			kvm_flush_dcache_pte(*pte);
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pmd_t *pmd;
+	phys_addr_t next;
+
+	pmd = stage2_pmd_offset(pud, addr);
+	do {
+		next = stage2_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (pmd_thp_or_huge(*pmd))
+				kvm_flush_dcache_pmd(*pmd);
+			else
+				stage2_flush_ptes(kvm, pmd, addr, next);
+		}
+	} while (pmd++, addr = next, addr != end);
+}
+
+static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pud_t *pud;
+	phys_addr_t next;
+
+	pud = stage2_pud_offset(pgd, addr);
+	do {
+		next = stage2_pud_addr_end(addr, end);
+		if (!stage2_pud_none(*pud)) {
+			if (stage2_pud_huge(*pud))
+				kvm_flush_dcache_pud(*pud);
+			else
+				stage2_flush_pmds(kvm, pud, addr, next);
+		}
+	} while (pud++, addr = next, addr != end);
+}
+
+static void stage2_flush_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
+	phys_addr_t next;
+	pgd_t *pgd;
+
+	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	do {
+		next = stage2_pgd_addr_end(addr, end);
+		stage2_flush_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the stage 2 page tables and invalidate any cache lines
+ * backing memory already mapped to the VM.
+ */
+static void stage2_flush_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_flush_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
+static void clear_hyp_pgd_entry(pgd_t *pgd)
+{
+	pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
+	pgd_clear(pgd);
+	pud_free(NULL, pud_table);
+	put_page(virt_to_page(pgd));
+}
+
+static void clear_hyp_pud_entry(pud_t *pud)
+{
+	pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
+	VM_BUG_ON(pud_huge(*pud));
+	pud_clear(pud);
+	pmd_free(NULL, pmd_table);
+	put_page(virt_to_page(pud));
+}
+
+static void clear_hyp_pmd_entry(pmd_t *pmd)
+{
+	pte_t *pte_table = pte_offset_kernel(pmd, 0);
+	VM_BUG_ON(pmd_thp_or_huge(*pmd));
+	pmd_clear(pmd);
+	pte_free_kernel(NULL, pte_table);
+	put_page(virt_to_page(pmd));
+}
+
+static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
+{
+	pte_t *pte, *start_pte;
+
+	start_pte = pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			kvm_set_pte(pte, __pte(0));
+			put_page(virt_to_page(pte));
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	if (hyp_pte_table_empty(start_pte))
+		clear_hyp_pmd_entry(pmd);
+}
+
+static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t next;
+	pmd_t *pmd, *start_pmd;
+
+	start_pmd = pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		/* Hyp doesn't use huge pmds */
+		if (!pmd_none(*pmd))
+			unmap_hyp_ptes(pmd, addr, next);
+	} while (pmd++, addr = next, addr != end);
+
+	if (hyp_pmd_table_empty(start_pmd))
+		clear_hyp_pud_entry(pud);
+}
+
+static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t next;
+	pud_t *pud, *start_pud;
+
+	start_pud = pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		/* Hyp doesn't use huge puds */
+		if (!pud_none(*pud))
+			unmap_hyp_pmds(pud, addr, next);
+	} while (pud++, addr = next, addr != end);
+
+	if (hyp_pud_table_empty(start_pud))
+		clear_hyp_pgd_entry(pgd);
+}
+
+static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+{
+	pgd_t *pgd;
+	phys_addr_t addr = start, end = start + size;
+	phys_addr_t next;
+
+	/*
+	 * We don't unmap anything from HYP, except at the hyp tear down.
+	 * Hence, we don't have to invalidate the TLBs here.
+	 */
+	pgd = pgdp + pgd_index(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (!pgd_none(*pgd))
+			unmap_hyp_puds(pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * free_hyp_pgds - free Hyp-mode page tables
+ *
+ * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
+ * therefore contains either mappings in the kernel memory area (above
+ * PAGE_OFFSET), or device mappings in the vmalloc range (from
+ * VMALLOC_START to VMALLOC_END).
+ *
+ * boot_hyp_pgd should only map two pages for the init code.
+ */
+void free_hyp_pgds(void)
+{
+	unsigned long addr;
+
+	mutex_lock(&kvm_hyp_pgd_mutex);
+
+	if (boot_hyp_pgd) {
+		unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+		free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
+		boot_hyp_pgd = NULL;
+	}
+
+	if (hyp_pgd) {
+		unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+		for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
+			unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
+		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
+			unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
+
+		free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
+		hyp_pgd = NULL;
+	}
+	if (merged_hyp_pgd) {
+		clear_page(merged_hyp_pgd);
+		free_page((unsigned long)merged_hyp_pgd);
+		merged_hyp_pgd = NULL;
+	}
+
+	mutex_unlock(&kvm_hyp_pgd_mutex);
+}
+
+static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
+				    unsigned long end, unsigned long pfn,
+				    pgprot_t prot)
+{
+	pte_t *pte;
+	unsigned long addr;
+
+	addr = start;
+	do {
+		pte = pte_offset_kernel(pmd, addr);
+		kvm_set_pte(pte, pfn_pte(pfn, prot));
+		get_page(virt_to_page(pte));
+		kvm_flush_dcache_to_poc(pte, sizeof(*pte));
+		pfn++;
+	} while (addr += PAGE_SIZE, addr != end);
+}
+
+static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
+				   unsigned long end, unsigned long pfn,
+				   pgprot_t prot)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long addr, next;
+
+	addr = start;
+	do {
+		pmd = pmd_offset(pud, addr);
+
+		BUG_ON(pmd_sect(*pmd));
+
+		if (pmd_none(*pmd)) {
+			pte = pte_alloc_one_kernel(NULL, addr);
+			if (!pte) {
+				kvm_err("Cannot allocate Hyp pte\n");
+				return -ENOMEM;
+			}
+			pmd_populate_kernel(NULL, pmd, pte);
+			get_page(virt_to_page(pmd));
+			kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
+		}
+
+		next = pmd_addr_end(addr, end);
+
+		create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
+		pfn += (next - addr) >> PAGE_SHIFT;
+	} while (addr = next, addr != end);
+
+	return 0;
+}
+
+static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
+				   unsigned long end, unsigned long pfn,
+				   pgprot_t prot)
+{
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned long addr, next;
+	int ret;
+
+	addr = start;
+	do {
+		pud = pud_offset(pgd, addr);
+
+		if (pud_none_or_clear_bad(pud)) {
+			pmd = pmd_alloc_one(NULL, addr);
+			if (!pmd) {
+				kvm_err("Cannot allocate Hyp pmd\n");
+				return -ENOMEM;
+			}
+			pud_populate(NULL, pud, pmd);
+			get_page(virt_to_page(pud));
+			kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+		}
+
+		next = pud_addr_end(addr, end);
+		ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+		if (ret)
+			return ret;
+		pfn += (next - addr) >> PAGE_SHIFT;
+	} while (addr = next, addr != end);
+
+	return 0;
+}
+
+static int __create_hyp_mappings(pgd_t *pgdp,
+				 unsigned long start, unsigned long end,
+				 unsigned long pfn, pgprot_t prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	unsigned long addr, next;
+	int err = 0;
+
+	mutex_lock(&kvm_hyp_pgd_mutex);
+	addr = start & PAGE_MASK;
+	end = PAGE_ALIGN(end);
+	do {
+		pgd = pgdp + pgd_index(addr);
+
+		if (pgd_none(*pgd)) {
+			pud = pud_alloc_one(NULL, addr);
+			if (!pud) {
+				kvm_err("Cannot allocate Hyp pud\n");
+				err = -ENOMEM;
+				goto out;
+			}
+			pgd_populate(NULL, pgd, pud);
+			get_page(virt_to_page(pgd));
+			kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
+		}
+
+		next = pgd_addr_end(addr, end);
+		err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
+		if (err)
+			goto out;
+		pfn += (next - addr) >> PAGE_SHIFT;
+	} while (addr = next, addr != end);
+out:
+	mutex_unlock(&kvm_hyp_pgd_mutex);
+	return err;
+}
+
+static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
+{
+	if (!is_vmalloc_addr(kaddr)) {
+		BUG_ON(!virt_addr_valid(kaddr));
+		return __pa(kaddr);
+	} else {
+		return page_to_phys(vmalloc_to_page(kaddr)) +
+		       offset_in_page(kaddr);
+	}
+}
+
+/**
+ * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
+ * @from:	The virtual kernel start address of the range
+ * @to:		The virtual kernel end address of the range (exclusive)
+ * @prot:	The protection to be applied to this range
+ *
+ * The same virtual address as the kernel virtual address is also used
+ * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
+ * physical pages.
+ */
+int create_hyp_mappings(void *from, void *to, pgprot_t prot)
+{
+	phys_addr_t phys_addr;
+	unsigned long virt_addr;
+	unsigned long start = kern_hyp_va((unsigned long)from);
+	unsigned long end = kern_hyp_va((unsigned long)to);
+
+	if (is_kernel_in_hyp_mode())
+		return 0;
+
+	start = start & PAGE_MASK;
+	end = PAGE_ALIGN(end);
+
+	for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
+		int err;
+
+		phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
+		err = __create_hyp_mappings(hyp_pgd, virt_addr,
+					    virt_addr + PAGE_SIZE,
+					    __phys_to_pfn(phys_addr),
+					    prot);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
+ * @from:	The kernel start VA of the range
+ * @to:		The kernel end VA of the range (exclusive)
+ * @phys_addr:	The physical start address which gets mapped
+ *
+ * The resulting HYP VA is the same as the kernel VA, modulo
+ * HYP_PAGE_OFFSET.
+ */
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
+{
+	unsigned long start = kern_hyp_va((unsigned long)from);
+	unsigned long end = kern_hyp_va((unsigned long)to);
+
+	if (is_kernel_in_hyp_mode())
+		return 0;
+
+	/* Check for a valid kernel IO mapping */
+	if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
+		return -EINVAL;
+
+	return __create_hyp_mappings(hyp_pgd, start, end,
+				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
+}
+
+/**
+ * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Allocates only the stage-2 HW PGD level table(s) (can support either full
+ * 40-bit input addresses or limited to 32-bit input addresses). Clears the
+ * allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * created, which can only be done once.
+ */
+int kvm_alloc_stage2_pgd(struct kvm *kvm)
+{
+	pgd_t *pgd;
+
+	if (kvm->arch.pgd != NULL) {
+		kvm_err("kvm_arch already initialized?\n");
+		return -EINVAL;
+	}
+
+	/* Allocate the HW PGD, making sure that each page gets its own refcount */
+	pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
+	if (!pgd)
+		return -ENOMEM;
+
+	kvm->arch.pgd = pgd;
+	return 0;
+}
+
+static void stage2_unmap_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	hva_t hva = memslot->userspace_addr;
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = PAGE_SIZE * memslot->npages;
+	hva_t reg_end = hva + size;
+
+	/*
+	 * A memory region could potentially cover multiple VMAs, and any holes
+	 * between them, so iterate over all of them to find out if we should
+	 * unmap any of them.
+	 *
+	 *     +--------------------------------------------+
+	 * +---------------+----------------+   +----------------+
+	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+	 * +---------------+----------------+   +----------------+
+	 *     |               memory region                |
+	 *     +--------------------------------------------+
+	 */
+	do {
+		struct vm_area_struct *vma = find_vma(current->mm, hva);
+		hva_t vm_start, vm_end;
+
+		if (!vma || vma->vm_start >= reg_end)
+			break;
+
+		/*
+		 * Take the intersection of this VMA with the memory region
+		 */
+		vm_start = max(hva, vma->vm_start);
+		vm_end = min(reg_end, vma->vm_end);
+
+		if (!(vma->vm_flags & VM_PFNMAP)) {
+			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+			unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+		}
+		hva = vm_end;
+	} while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_unmap_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
+/**
+ * kvm_free_stage2_pgd - free all stage-2 tables
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
+ * underlying level-2 and level-3 tables before freeing the actual level-1 table
+ * and setting the struct pointer to NULL.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
+ */
+void kvm_free_stage2_pgd(struct kvm *kvm)
+{
+	if (kvm->arch.pgd == NULL)
+		return;
+
+	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+	/* Free the HW pgd, one page at a time */
+	free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
+	kvm->arch.pgd = NULL;
+}
+
+static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			     phys_addr_t addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+
+	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	if (WARN_ON(stage2_pgd_none(*pgd))) {
+		if (!cache)
+			return NULL;
+		pud = mmu_memory_cache_alloc(cache);
+		stage2_pgd_populate(pgd, pud);
+		get_page(virt_to_page(pgd));
+	}
+
+	return stage2_pud_offset(pgd, addr);
+}
+
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			     phys_addr_t addr)
+{
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pud = stage2_get_pud(kvm, cache, addr);
+	if (stage2_pud_none(*pud)) {
+		if (!cache)
+			return NULL;
+		pmd = mmu_memory_cache_alloc(cache);
+		stage2_pud_populate(pud, pmd);
+		get_page(virt_to_page(pud));
+	}
+
+	return stage2_pmd_offset(pud, addr);
+}
+
+static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+			       *cache, phys_addr_t addr, const pmd_t *new_pmd)
+{
+	pmd_t *pmd, old_pmd;
+
+	pmd = stage2_get_pmd(kvm, cache, addr);
+	VM_BUG_ON(!pmd);
+
+	/*
+	 * Mapping in huge pages should only happen through a fault.  If a
+	 * page is merged into a transparent huge page, the individual
+	 * subpages of that huge page should be unmapped through MMU
+	 * notifiers before we get here.
+	 *
+	 * Merging of CompoundPages is not supported; they should become
+	 * splitting first, unmapped, merged, and mapped back in on-demand.
+	 */
+	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
+
+	old_pmd = *pmd;
+	if (pmd_present(old_pmd)) {
+		pmd_clear(pmd);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+	} else {
+		get_page(virt_to_page(pmd));
+	}
+
+	kvm_set_pmd(pmd, *new_pmd);
+	return 0;
+}
+
+static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			  phys_addr_t addr, const pte_t *new_pte,
+			  unsigned long flags)
+{
+	pmd_t *pmd;
+	pte_t *pte, old_pte;
+	bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
+	bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
+
+	VM_BUG_ON(logging_active && !cache);
+
+	/* Create stage-2 page table mapping - Levels 0 and 1 */
+	pmd = stage2_get_pmd(kvm, cache, addr);
+	if (!pmd) {
+		/*
+		 * Ignore calls from kvm_set_spte_hva for unallocated
+		 * address ranges.
+		 */
+		return 0;
+	}
+
+	/*
+	 * While dirty page logging - dissolve huge PMD, then continue on to
+	 * allocate page.
+	 */
+	if (logging_active)
+		stage2_dissolve_pmd(kvm, addr, pmd);
+
+	/* Create stage-2 page mappings - Level 2 */
+	if (pmd_none(*pmd)) {
+		if (!cache)
+			return 0; /* ignore calls from kvm_set_spte_hva */
+		pte = mmu_memory_cache_alloc(cache);
+		pmd_populate_kernel(NULL, pmd, pte);
+		get_page(virt_to_page(pmd));
+	}
+
+	pte = pte_offset_kernel(pmd, addr);
+
+	if (iomap && pte_present(*pte))
+		return -EFAULT;
+
+	/* Create 2nd stage page table mapping - Level 3 */
+	old_pte = *pte;
+	if (pte_present(old_pte)) {
+		kvm_set_pte(pte, __pte(0));
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+	} else {
+		get_page(virt_to_page(pte));
+	}
+
+	kvm_set_pte(pte, *new_pte);
+	return 0;
+}
+
+#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static int stage2_ptep_test_and_clear_young(pte_t *pte)
+{
+	if (pte_young(*pte)) {
+		*pte = pte_mkold(*pte);
+		return 1;
+	}
+	return 0;
+}
+#else
+static int stage2_ptep_test_and_clear_young(pte_t *pte)
+{
+	return __ptep_test_and_clear_young(pte);
+}
+#endif
+
+static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
+{
+	return stage2_ptep_test_and_clear_young((pte_t *)pmd);
+}
+
+/**
+ * kvm_phys_addr_ioremap - map a device range to guest IPA
+ *
+ * @kvm:	The KVM pointer
+ * @guest_ipa:	The IPA at which to insert the mapping
+ * @pa:		The physical address of the device
+ * @size:	The size of the mapping
+ */
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+			  phys_addr_t pa, unsigned long size, bool writable)
+{
+	phys_addr_t addr, end;
+	int ret = 0;
+	unsigned long pfn;
+	struct kvm_mmu_memory_cache cache = { 0, };
+
+	end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
+	pfn = __phys_to_pfn(pa);
+
+	for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
+		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
+
+		if (writable)
+			pte = kvm_s2pte_mkwrite(pte);
+
+		ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
+						KVM_NR_MEM_OBJS);
+		if (ret)
+			goto out;
+		spin_lock(&kvm->mmu_lock);
+		ret = stage2_set_pte(kvm, &cache, addr, &pte,
+						KVM_S2PTE_FLAG_IS_IOMAP);
+		spin_unlock(&kvm->mmu_lock);
+		if (ret)
+			goto out;
+
+		pfn++;
+	}
+
+out:
+	mmu_free_memory_cache(&cache);
+	return ret;
+}
+
+static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
+{
+	kvm_pfn_t pfn = *pfnp;
+	gfn_t gfn = *ipap >> PAGE_SHIFT;
+
+	if (PageTransCompoundMap(pfn_to_page(pfn))) {
+		unsigned long mask;
+		/*
+		 * The address we faulted on is backed by a transparent huge
+		 * page.  However, because we map the compound huge page and
+		 * not the individual tail page, we need to transfer the
+		 * refcount to the head page.  We have to be careful that the
+		 * THP doesn't start to split while we are adjusting the
+		 * refcounts.
+		 *
+		 * We are sure this doesn't happen, because mmu_notifier_retry
+		 * was successful and we are holding the mmu_lock, so if this
+		 * THP is trying to split, it will be blocked in the mmu
+		 * notifier before touching any of the pages, specifically
+		 * before being able to call __split_huge_page_refcount().
+		 *
+		 * We can therefore safely transfer the refcount from PG_tail
+		 * to PG_head and switch the pfn from a tail page to the head
+		 * page accordingly.
+		 */
+		mask = PTRS_PER_PMD - 1;
+		VM_BUG_ON((gfn & mask) != (pfn & mask));
+		if (pfn & mask) {
+			*ipap &= PMD_MASK;
+			kvm_release_pfn_clean(pfn);
+			pfn &= ~mask;
+			kvm_get_pfn(pfn);
+			*pfnp = pfn;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
+/**
+ * stage2_wp_ptes - write protect PMD range
+ * @pmd:	pointer to pmd entry
+ * @addr:	range start address
+ * @end:	range end address
+ */
+static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
+{
+	pte_t *pte;
+
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			if (!kvm_s2pte_readonly(pte))
+				kvm_set_s2pte_readonly(pte);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+/**
+ * stage2_wp_pmds - write protect PUD range
+ * @pud:	pointer to pud entry
+ * @addr:	range start address
+ * @end:	range end address
+ */
+static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
+{
+	pmd_t *pmd;
+	phys_addr_t next;
+
+	pmd = stage2_pmd_offset(pud, addr);
+
+	do {
+		next = stage2_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (pmd_thp_or_huge(*pmd)) {
+				if (!kvm_s2pmd_readonly(pmd))
+					kvm_set_s2pmd_readonly(pmd);
+			} else {
+				stage2_wp_ptes(pmd, addr, next);
+			}
+		}
+	} while (pmd++, addr = next, addr != end);
+}
+
+/**
+  * stage2_wp_puds - write protect PGD range
+  * @pgd:	pointer to pgd entry
+  * @addr:	range start address
+  * @end:	range end address
+  *
+  * Process PUD entries, for a huge PUD we cause a panic.
+  */
+static void  stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
+{
+	pud_t *pud;
+	phys_addr_t next;
+
+	pud = stage2_pud_offset(pgd, addr);
+	do {
+		next = stage2_pud_addr_end(addr, end);
+		if (!stage2_pud_none(*pud)) {
+			/* TODO:PUD not supported, revisit later if supported */
+			BUG_ON(stage2_pud_huge(*pud));
+			stage2_wp_pmds(pud, addr, next);
+		}
+	} while (pud++, addr = next, addr != end);
+}
+
+/**
+ * stage2_wp_range() - write protect stage2 memory region range
+ * @kvm:	The KVM pointer
+ * @addr:	Start address of range
+ * @end:	End address of range
+ */
+static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+{
+	pgd_t *pgd;
+	phys_addr_t next;
+
+	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	do {
+		/*
+		 * Release kvm_mmu_lock periodically if the memory region is
+		 * large. Otherwise, we may see kernel panics with
+		 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
+		 * CONFIG_LOCKDEP. Additionally, holding the lock too long
+		 * will also starve other vCPUs.
+		 */
+		if (need_resched() || spin_needbreak(&kvm->mmu_lock))
+			cond_resched_lock(&kvm->mmu_lock);
+
+		next = stage2_pgd_addr_end(addr, end);
+		if (stage2_pgd_present(*pgd))
+			stage2_wp_puds(pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
+ * @kvm:	The KVM pointer
+ * @slot:	The memory slot to write protect
+ *
+ * Called to start logging dirty pages after memory region
+ * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
+ * all present PMD and PTEs are write protected in the memory region.
+ * Afterwards read of dirty page log can be called.
+ *
+ * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
+ * serializing operations for VM memory regions.
+ */
+void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
+{
+	struct kvm_memslots *slots = kvm_memslots(kvm);
+	struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
+	phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
+
+	spin_lock(&kvm->mmu_lock);
+	stage2_wp_range(kvm, start, end);
+	spin_unlock(&kvm->mmu_lock);
+	kvm_flush_remote_tlbs(kvm);
+}
+
+/**
+ * kvm_mmu_write_protect_pt_masked() - write protect dirty pages
+ * @kvm:	The KVM pointer
+ * @slot:	The memory slot associated with mask
+ * @gfn_offset:	The gfn offset in memory slot
+ * @mask:	The mask of dirty pages at offset 'gfn_offset' in this memory
+ *		slot to be write protected
+ *
+ * Walks bits set in mask write protects the associated pte's. Caller must
+ * acquire kvm_mmu_lock.
+ */
+static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask)
+{
+	phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
+	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
+	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
+
+	stage2_wp_range(kvm, start, end);
+}
+
+/*
+ * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
+ * dirty pages.
+ *
+ * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
+ * enable dirty logging for them.
+ */
+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask)
+{
+	kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
+}
+
+static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
+				      unsigned long size)
+{
+	__coherent_cache_guest_page(vcpu, pfn, size);
+}
+
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			  struct kvm_memory_slot *memslot, unsigned long hva,
+			  unsigned long fault_status)
+{
+	int ret;
+	bool write_fault, writable, hugetlb = false, force_pte = false;
+	unsigned long mmu_seq;
+	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+	struct vm_area_struct *vma;
+	kvm_pfn_t pfn;
+	pgprot_t mem_type = PAGE_S2;
+	bool logging_active = memslot_is_logging(memslot);
+	unsigned long flags = 0;
+
+	write_fault = kvm_is_write_fault(vcpu);
+	if (fault_status == FSC_PERM && !write_fault) {
+		kvm_err("Unexpected L2 read permission error\n");
+		return -EFAULT;
+	}
+
+	/* Let's check if we will get back a huge page backed by hugetlbfs */
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma_intersection(current->mm, hva, hva + 1);
+	if (unlikely(!vma)) {
+		kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+		up_read(&current->mm->mmap_sem);
+		return -EFAULT;
+	}
+
+	if (is_vm_hugetlb_page(vma) && !logging_active) {
+		hugetlb = true;
+		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+	} else {
+		/*
+		 * Pages belonging to memslots that don't have the same
+		 * alignment for userspace and IPA cannot be mapped using
+		 * block descriptors even if the pages belong to a THP for
+		 * the process, because the stage-2 block descriptor will
+		 * cover more than a single THP and we loose atomicity for
+		 * unmapping, updates, and splits of the THP or other pages
+		 * in the stage-2 block range.
+		 */
+		if ((memslot->userspace_addr & ~PMD_MASK) !=
+		    ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
+			force_pte = true;
+	}
+	up_read(&current->mm->mmap_sem);
+
+	/* We need minimum second+third level pages */
+	ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
+				     KVM_NR_MEM_OBJS);
+	if (ret)
+		return ret;
+
+	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	/*
+	 * Ensure the read of mmu_notifier_seq happens before we call
+	 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
+	 * the page we just got a reference to gets unmapped before we have a
+	 * chance to grab the mmu_lock, which ensure that if the page gets
+	 * unmapped afterwards, the call to kvm_unmap_hva will take it away
+	 * from us again properly. This smp_rmb() interacts with the smp_wmb()
+	 * in kvm_mmu_notifier_invalidate_<page|range_end>.
+	 */
+	smp_rmb();
+
+	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
+	if (is_error_noslot_pfn(pfn))
+		return -EFAULT;
+
+	if (kvm_is_device_pfn(pfn)) {
+		mem_type = PAGE_S2_DEVICE;
+		flags |= KVM_S2PTE_FLAG_IS_IOMAP;
+	} else if (logging_active) {
+		/*
+		 * Faults on pages in a memslot with logging enabled
+		 * should not be mapped with huge pages (it introduces churn
+		 * and performance degradation), so force a pte mapping.
+		 */
+		force_pte = true;
+		flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
+
+		/*
+		 * Only actually map the page as writable if this was a write
+		 * fault.
+		 */
+		if (!write_fault)
+			writable = false;
+	}
+
+	spin_lock(&kvm->mmu_lock);
+	if (mmu_notifier_retry(kvm, mmu_seq))
+		goto out_unlock;
+
+	if (!hugetlb && !force_pte)
+		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+
+	if (hugetlb) {
+		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
+		new_pmd = pmd_mkhuge(new_pmd);
+		if (writable) {
+			new_pmd = kvm_s2pmd_mkwrite(new_pmd);
+			kvm_set_pfn_dirty(pfn);
+		}
+		coherent_cache_guest_page(vcpu, pfn, PMD_SIZE);
+		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+	} else {
+		pte_t new_pte = pfn_pte(pfn, mem_type);
+
+		if (writable) {
+			new_pte = kvm_s2pte_mkwrite(new_pte);
+			kvm_set_pfn_dirty(pfn);
+			mark_page_dirty(kvm, gfn);
+		}
+		coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE);
+		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
+	}
+
+out_unlock:
+	spin_unlock(&kvm->mmu_lock);
+	kvm_set_pfn_accessed(pfn);
+	kvm_release_pfn_clean(pfn);
+	return ret;
+}
+
+/*
+ * Resolve the access fault by making the page young again.
+ * Note that because the faulting entry is guaranteed not to be
+ * cached in the TLB, we don't need to invalidate anything.
+ * Only the HW Access Flag updates are supported for Stage 2 (no DBM),
+ * so there is no need for atomic (pte|pmd)_mkyoung operations.
+ */
+static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+	kvm_pfn_t pfn;
+	bool pfn_valid = false;
+
+	trace_kvm_access_fault(fault_ipa);
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+
+	pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
+	if (!pmd || pmd_none(*pmd))	/* Nothing there */
+		goto out;
+
+	if (pmd_thp_or_huge(*pmd)) {	/* THP, HugeTLB */
+		*pmd = pmd_mkyoung(*pmd);
+		pfn = pmd_pfn(*pmd);
+		pfn_valid = true;
+		goto out;
+	}
+
+	pte = pte_offset_kernel(pmd, fault_ipa);
+	if (pte_none(*pte))		/* Nothing there either */
+		goto out;
+
+	*pte = pte_mkyoung(*pte);	/* Just a page... */
+	pfn = pte_pfn(*pte);
+	pfn_valid = true;
+out:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	if (pfn_valid)
+		kvm_set_pfn_accessed(pfn);
+}
+
+/**
+ * kvm_handle_guest_abort - handles all 2nd stage aborts
+ * @vcpu:	the VCPU pointer
+ * @run:	the kvm_run structure
+ *
+ * Any abort that gets to the host is almost guaranteed to be caused by a
+ * missing second stage translation table entry, which can mean that either the
+ * guest simply needs more memory and we must allocate an appropriate page or it
+ * can mean that the guest tried to access I/O memory, which is emulated by user
+ * space. The distinction is based on the IPA causing the fault and whether this
+ * memory region has been registered as standard RAM by user space.
+ */
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	unsigned long fault_status;
+	phys_addr_t fault_ipa;
+	struct kvm_memory_slot *memslot;
+	unsigned long hva;
+	bool is_iabt, write_fault, writable;
+	gfn_t gfn;
+	int ret, idx;
+
+	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
+	if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) {
+		kvm_inject_vabt(vcpu);
+		return 1;
+	}
+
+	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+
+	trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
+			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
+
+	/* Check the stage-2 fault is trans. fault or write fault */
+	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+	if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
+	    fault_status != FSC_ACCESS) {
+		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+			kvm_vcpu_trap_get_class(vcpu),
+			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+			(unsigned long)kvm_vcpu_get_hsr(vcpu));
+		return -EFAULT;
+	}
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	gfn = fault_ipa >> PAGE_SHIFT;
+	memslot = gfn_to_memslot(vcpu->kvm, gfn);
+	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+	write_fault = kvm_is_write_fault(vcpu);
+	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
+		if (is_iabt) {
+			/* Prefetch Abort on I/O address */
+			kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+			ret = 1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Check for a cache maintenance operation. Since we
+		 * ended-up here, we know it is outside of any memory
+		 * slot. But we can't find out if that is for a device,
+		 * or if the guest is just being stupid. The only thing
+		 * we know for sure is that this range cannot be cached.
+		 *
+		 * So let's assume that the guest is just being
+		 * cautious, and skip the instruction.
+		 */
+		if (kvm_vcpu_dabt_is_cm(vcpu)) {
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			ret = 1;
+			goto out_unlock;
+		}
+
+		/*
+		 * The IPA is reported as [MAX:12], so we need to
+		 * complement it with the bottom 12 bits from the
+		 * faulting VA. This is always 12 bits, irrespective
+		 * of the page size.
+		 */
+		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
+		ret = io_mem_abort(vcpu, run, fault_ipa);
+		goto out_unlock;
+	}
+
+	/* Userspace should not be able to register out-of-bounds IPAs */
+	VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+
+	if (fault_status == FSC_ACCESS) {
+		handle_access_fault(vcpu, fault_ipa);
+		ret = 1;
+		goto out_unlock;
+	}
+
+	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
+	if (ret == 0)
+		ret = 1;
+out_unlock:
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	return ret;
+}
+
+static int handle_hva_to_gpa(struct kvm *kvm,
+			     unsigned long start,
+			     unsigned long end,
+			     int (*handler)(struct kvm *kvm,
+					    gpa_t gpa, u64 size,
+					    void *data),
+			     void *data)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int ret = 0;
+
+	slots = kvm_memslots(kvm);
+
+	/* we only care about the pages that the guest sees */
+	kvm_for_each_memslot(memslot, slots) {
+		unsigned long hva_start, hva_end;
+		gfn_t gpa;
+
+		hva_start = max(start, memslot->userspace_addr);
+		hva_end = min(end, memslot->userspace_addr +
+					(memslot->npages << PAGE_SHIFT));
+		if (hva_start >= hva_end)
+			continue;
+
+		gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT;
+		ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data);
+	}
+
+	return ret;
+}
+
+static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
+{
+	unmap_stage2_range(kvm, gpa, size);
+	return 0;
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+	unsigned long end = hva + PAGE_SIZE;
+
+	if (!kvm->arch.pgd)
+		return 0;
+
+	trace_kvm_unmap_hva(hva);
+	handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
+	return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm,
+			unsigned long start, unsigned long end)
+{
+	if (!kvm->arch.pgd)
+		return 0;
+
+	trace_kvm_unmap_hva_range(start, end);
+	handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+	return 0;
+}
+
+static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
+{
+	pte_t *pte = (pte_t *)data;
+
+	WARN_ON(size != PAGE_SIZE);
+	/*
+	 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
+	 * flag clear because MMU notifiers will have unmapped a huge PMD before
+	 * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and
+	 * therefore stage2_set_pte() never needs to clear out a huge PMD
+	 * through this calling path.
+	 */
+	stage2_set_pte(kvm, NULL, gpa, pte, 0);
+	return 0;
+}
+
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+	unsigned long end = hva + PAGE_SIZE;
+	pte_t stage2_pte;
+
+	if (!kvm->arch.pgd)
+		return;
+
+	trace_kvm_set_spte_hva(hva);
+	stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
+	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
+}
+
+static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+
+	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
+	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	if (!pmd || pmd_none(*pmd))	/* Nothing there */
+		return 0;
+
+	if (pmd_thp_or_huge(*pmd))	/* THP, HugeTLB */
+		return stage2_pmdp_test_and_clear_young(pmd);
+
+	pte = pte_offset_kernel(pmd, gpa);
+	if (pte_none(*pte))
+		return 0;
+
+	return stage2_ptep_test_and_clear_young(pte);
+}
+
+static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+
+	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
+	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	if (!pmd || pmd_none(*pmd))	/* Nothing there */
+		return 0;
+
+	if (pmd_thp_or_huge(*pmd))		/* THP, HugeTLB */
+		return pmd_young(*pmd);
+
+	pte = pte_offset_kernel(pmd, gpa);
+	if (!pte_none(*pte))		/* Just a page... */
+		return pte_young(*pte);
+
+	return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	trace_kvm_age_hva(start, end);
+	return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	trace_kvm_test_age_hva(hva);
+	return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
+}
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
+{
+	mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+}
+
+phys_addr_t kvm_mmu_get_httbr(void)
+{
+	if (__kvm_cpu_uses_extended_idmap())
+		return virt_to_phys(merged_hyp_pgd);
+	else
+		return virt_to_phys(hyp_pgd);
+}
+
+phys_addr_t kvm_get_idmap_vector(void)
+{
+	return hyp_idmap_vector;
+}
+
+static int kvm_map_idmap_text(pgd_t *pgd)
+{
+	int err;
+
+	/* Create the idmap in the boot page tables */
+	err = 	__create_hyp_mappings(pgd,
+				      hyp_idmap_start, hyp_idmap_end,
+				      __phys_to_pfn(hyp_idmap_start),
+				      PAGE_HYP_EXEC);
+	if (err)
+		kvm_err("Failed to idmap %lx-%lx\n",
+			hyp_idmap_start, hyp_idmap_end);
+
+	return err;
+}
+
+int kvm_mmu_init(void)
+{
+	int err;
+
+	hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
+	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
+	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
+
+	/*
+	 * We rely on the linker script to ensure at build time that the HYP
+	 * init code does not cross a page boundary.
+	 */
+	BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
+
+	kvm_info("IDMAP page: %lx\n", hyp_idmap_start);
+	kvm_info("HYP VA range: %lx:%lx\n",
+		 kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
+
+	if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
+	    hyp_idmap_start <  kern_hyp_va(~0UL) &&
+	    hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
+		/*
+		 * The idmap page is intersecting with the VA space,
+		 * it is not safe to continue further.
+		 */
+		kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
+	if (!hyp_pgd) {
+		kvm_err("Hyp mode PGD not allocated\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (__kvm_cpu_uses_extended_idmap()) {
+		boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+							 hyp_pgd_order);
+		if (!boot_hyp_pgd) {
+			kvm_err("Hyp boot PGD not allocated\n");
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kvm_map_idmap_text(boot_hyp_pgd);
+		if (err)
+			goto out;
+
+		merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+		if (!merged_hyp_pgd) {
+			kvm_err("Failed to allocate extra HYP pgd\n");
+			goto out;
+		}
+		__kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
+				    hyp_idmap_start);
+	} else {
+		err = kvm_map_idmap_text(hyp_pgd);
+		if (err)
+			goto out;
+	}
+
+	return 0;
+out:
+	free_hyp_pgds();
+	return err;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				   const struct kvm_userspace_memory_region *mem,
+				   const struct kvm_memory_slot *old,
+				   const struct kvm_memory_slot *new,
+				   enum kvm_mr_change change)
+{
+	/*
+	 * At this point memslot has been committed and there is an
+	 * allocated dirty_bitmap[], dirty pages will be be tracked while the
+	 * memory slot is write protected.
+	 */
+	if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
+		kvm_mmu_wp_memory_region(kvm, mem->slot);
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   const struct kvm_userspace_memory_region *mem,
+				   enum kvm_mr_change change)
+{
+	hva_t hva = mem->userspace_addr;
+	hva_t reg_end = hva + mem->memory_size;
+	bool writable = !(mem->flags & KVM_MEM_READONLY);
+	int ret = 0;
+
+	if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
+			change != KVM_MR_FLAGS_ONLY)
+		return 0;
+
+	/*
+	 * Prevent userspace from creating a memory region outside of the IPA
+	 * space addressable by the KVM guest IPA space.
+	 */
+	if (memslot->base_gfn + memslot->npages >=
+	    (KVM_PHYS_SIZE >> PAGE_SHIFT))
+		return -EFAULT;
+
+	/*
+	 * A memory region could potentially cover multiple VMAs, and any holes
+	 * between them, so iterate over all of them to find out if we can map
+	 * any of them right now.
+	 *
+	 *     +--------------------------------------------+
+	 * +---------------+----------------+   +----------------+
+	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+	 * +---------------+----------------+   +----------------+
+	 *     |               memory region                |
+	 *     +--------------------------------------------+
+	 */
+	do {
+		struct vm_area_struct *vma = find_vma(current->mm, hva);
+		hva_t vm_start, vm_end;
+
+		if (!vma || vma->vm_start >= reg_end)
+			break;
+
+		/*
+		 * Mapping a read-only VMA is only allowed if the
+		 * memory region is configured as read-only.
+		 */
+		if (writable && !(vma->vm_flags & VM_WRITE)) {
+			ret = -EPERM;
+			break;
+		}
+
+		/*
+		 * Take the intersection of this VMA with the memory region
+		 */
+		vm_start = max(hva, vma->vm_start);
+		vm_end = min(reg_end, vma->vm_end);
+
+		if (vma->vm_flags & VM_PFNMAP) {
+			gpa_t gpa = mem->guest_phys_addr +
+				    (vm_start - mem->userspace_addr);
+			phys_addr_t pa;
+
+			pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
+			pa += vm_start - vma->vm_start;
+
+			/* IO region dirty page logging not allowed */
+			if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
+				return -EINVAL;
+
+			ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
+						    vm_end - vm_start,
+						    writable);
+			if (ret)
+				break;
+		}
+		hva = vm_end;
+	} while (hva < reg_end);
+
+	if (change == KVM_MR_FLAGS_ONLY)
+		return ret;
+
+	spin_lock(&kvm->mmu_lock);
+	if (ret)
+		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+	else
+		stage2_flush_memslot(kvm, memslot);
+	spin_unlock(&kvm->mmu_lock);
+	return ret;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+			   struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
+{
+	return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+	kvm_free_stage2_pgd(kvm);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+	gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+	spin_lock(&kvm->mmu_lock);
+	unmap_stage2_range(kvm, gpa, size);
+	spin_unlock(&kvm->mmu_lock);
+}
+
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ *
+ * Main problems:
+ * - S/W ops are local to a CPU (not broadcast)
+ * - We have line migration behind our back (speculation)
+ * - System caches don't support S/W at all (damn!)
+ *
+ * In the face of the above, the best we can do is to try and convert
+ * S/W ops to VA ops. Because the guest is not allowed to infer the
+ * S/W to PA mapping, it can only use S/W to nuke the whole cache,
+ * which is a rather good thing for us.
+ *
+ * Also, it is only used when turning caches on/off ("The expected
+ * usage of the cache maintenance instructions that operate by set/way
+ * is associated with the cache maintenance instructions associated
+ * with the powerdown and powerup of caches, if this is required by
+ * the implementation.").
+ *
+ * We use the following policy:
+ *
+ * - If we trap a S/W operation, we enable VM trapping to detect
+ *   caches being turned on/off, and do a full clean.
+ *
+ * - We flush the caches on both caches being turned on and off.
+ *
+ * - Once the caches are enabled, we stop trapping VM ops.
+ */
+void kvm_set_way_flush(struct kvm_vcpu *vcpu)
+{
+	unsigned long hcr = vcpu_get_hcr(vcpu);
+
+	/*
+	 * If this is the first time we do a S/W operation
+	 * (i.e. HCR_TVM not set) flush the whole memory, and set the
+	 * VM trapping.
+	 *
+	 * Otherwise, rely on the VM trapping to wait for the MMU +
+	 * Caches to be turned off. At that point, we'll be able to
+	 * clean the caches again.
+	 */
+	if (!(hcr & HCR_TVM)) {
+		trace_kvm_set_way_flush(*vcpu_pc(vcpu),
+					vcpu_has_cache_enabled(vcpu));
+		stage2_flush_vm(vcpu->kvm);
+		vcpu_set_hcr(vcpu, hcr | HCR_TVM);
+	}
+}
+
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
+{
+	bool now_enabled = vcpu_has_cache_enabled(vcpu);
+
+	/*
+	 * If switching the MMU+caches on, need to invalidate the caches.
+	 * If switching it off, need to clean the caches.
+	 * Clean + invalidate does the trick always.
+	 */
+	if (now_enabled != was_enabled)
+		stage2_flush_vm(vcpu->kvm);
+
+	/* Caches are now on, stop trapping VM ops (until a S/W op) */
+	if (now_enabled)
+		vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
+
+	trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
+}
diff --git a/virt/kvm/arm/perf.c b/virt/kvm/arm/perf.c
new file mode 100644
index 000000000000..1a3849da0b4b
--- /dev/null
+++ b/virt/kvm/arm/perf.c
@@ -0,0 +1,68 @@
+/*
+ * Based on the x86 implementation.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+
+static int kvm_is_in_guest(void)
+{
+        return kvm_arm_get_running_vcpu() != NULL;
+}
+
+static int kvm_is_user_mode(void)
+{
+	struct kvm_vcpu *vcpu;
+
+	vcpu = kvm_arm_get_running_vcpu();
+
+	if (vcpu)
+		return !vcpu_mode_priv(vcpu);
+
+	return 0;
+}
+
+static unsigned long kvm_get_guest_ip(void)
+{
+	struct kvm_vcpu *vcpu;
+
+	vcpu = kvm_arm_get_running_vcpu();
+
+	if (vcpu)
+		return *vcpu_pc(vcpu);
+
+	return 0;
+}
+
+static struct perf_guest_info_callbacks kvm_guest_cbs = {
+	.is_in_guest	= kvm_is_in_guest,
+	.is_user_mode	= kvm_is_user_mode,
+	.get_guest_ip	= kvm_get_guest_ip,
+};
+
+int kvm_perf_init(void)
+{
+	return perf_register_guest_info_callbacks(&kvm_guest_cbs);
+}
+
+int kvm_perf_teardown(void)
+{
+	return perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
+}
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
new file mode 100644
index 000000000000..a08d7a93aebb
--- /dev/null
+++ b/virt/kvm/arm/psci.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/preempt.h>
+#include <linux/kvm_host.h>
+#include <linux/wait.h>
+
+#include <asm/cputype.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_psci.h>
+#include <asm/kvm_host.h>
+
+#include <uapi/linux/psci.h>
+
+/*
+ * This is an implementation of the Power State Coordination Interface
+ * as described in ARM document number ARM DEN 0022A.
+ */
+
+#define AFFINITY_MASK(level)	~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
+
+static unsigned long psci_affinity_mask(unsigned long affinity_level)
+{
+	if (affinity_level <= 3)
+		return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
+
+	return 0;
+}
+
+static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * NOTE: For simplicity, we make VCPU suspend emulation to be
+	 * same-as WFI (Wait-for-interrupt) emulation.
+	 *
+	 * This means for KVM the wakeup events are interrupts and
+	 * this is consistent with intended use of StateID as described
+	 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
+	 *
+	 * Further, we also treat power-down request to be same as
+	 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
+	 * specification (ARM DEN 0022A). This means all suspend states
+	 * for KVM will preserve the register state.
+	 */
+	kvm_vcpu_block(vcpu);
+
+	return PSCI_RET_SUCCESS;
+}
+
+static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.power_off = true;
+}
+
+static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
+{
+	struct kvm *kvm = source_vcpu->kvm;
+	struct kvm_vcpu *vcpu = NULL;
+	struct swait_queue_head *wq;
+	unsigned long cpu_id;
+	unsigned long context_id;
+	phys_addr_t target_pc;
+
+	cpu_id = vcpu_get_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
+	if (vcpu_mode_is_32bit(source_vcpu))
+		cpu_id &= ~((u32) 0);
+
+	vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
+
+	/*
+	 * Make sure the caller requested a valid CPU and that the CPU is
+	 * turned off.
+	 */
+	if (!vcpu)
+		return PSCI_RET_INVALID_PARAMS;
+	if (!vcpu->arch.power_off) {
+		if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
+			return PSCI_RET_ALREADY_ON;
+		else
+			return PSCI_RET_INVALID_PARAMS;
+	}
+
+	target_pc = vcpu_get_reg(source_vcpu, 2);
+	context_id = vcpu_get_reg(source_vcpu, 3);
+
+	kvm_reset_vcpu(vcpu);
+
+	/* Gracefully handle Thumb2 entry point */
+	if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
+		target_pc &= ~((phys_addr_t) 1);
+		vcpu_set_thumb(vcpu);
+	}
+
+	/* Propagate caller endianness */
+	if (kvm_vcpu_is_be(source_vcpu))
+		kvm_vcpu_set_be(vcpu);
+
+	*vcpu_pc(vcpu) = target_pc;
+	/*
+	 * NOTE: We always update r0 (or x0) because for PSCI v0.1
+	 * the general puspose registers are undefined upon CPU_ON.
+	 */
+	vcpu_set_reg(vcpu, 0, context_id);
+	vcpu->arch.power_off = false;
+	smp_mb();		/* Make sure the above is visible */
+
+	wq = kvm_arch_vcpu_wq(vcpu);
+	swake_up(wq);
+
+	return PSCI_RET_SUCCESS;
+}
+
+static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
+{
+	int i, matching_cpus = 0;
+	unsigned long mpidr;
+	unsigned long target_affinity;
+	unsigned long target_affinity_mask;
+	unsigned long lowest_affinity_level;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu *tmp;
+
+	target_affinity = vcpu_get_reg(vcpu, 1);
+	lowest_affinity_level = vcpu_get_reg(vcpu, 2);
+
+	/* Determine target affinity mask */
+	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
+	if (!target_affinity_mask)
+		return PSCI_RET_INVALID_PARAMS;
+
+	/* Ignore other bits of target affinity */
+	target_affinity &= target_affinity_mask;
+
+	/*
+	 * If one or more VCPU matching target affinity are running
+	 * then ON else OFF
+	 */
+	kvm_for_each_vcpu(i, tmp, kvm) {
+		mpidr = kvm_vcpu_get_mpidr_aff(tmp);
+		if ((mpidr & target_affinity_mask) == target_affinity) {
+			matching_cpus++;
+			if (!tmp->arch.power_off)
+				return PSCI_0_2_AFFINITY_LEVEL_ON;
+		}
+	}
+
+	if (!matching_cpus)
+		return PSCI_RET_INVALID_PARAMS;
+
+	return PSCI_0_2_AFFINITY_LEVEL_OFF;
+}
+
+static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
+{
+	int i;
+	struct kvm_vcpu *tmp;
+
+	/*
+	 * The KVM ABI specifies that a system event exit may call KVM_RUN
+	 * again and may perform shutdown/reboot at a later time that when the
+	 * actual request is made.  Since we are implementing PSCI and a
+	 * caller of PSCI reboot and shutdown expects that the system shuts
+	 * down or reboots immediately, let's make sure that VCPUs are not run
+	 * after this call is handled and before the VCPUs have been
+	 * re-initialized.
+	 */
+	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+		tmp->arch.power_off = true;
+		kvm_vcpu_kick(tmp);
+	}
+
+	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+	vcpu->run->system_event.type = type;
+	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
+}
+
+static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
+}
+
+int kvm_psci_version(struct kvm_vcpu *vcpu)
+{
+	if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+		return KVM_ARM_PSCI_0_2;
+
+	return KVM_ARM_PSCI_0_1;
+}
+
+static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long val;
+	int ret = 1;
+
+	switch (psci_fn) {
+	case PSCI_0_2_FN_PSCI_VERSION:
+		/*
+		 * Bits[31:16] = Major Version = 0
+		 * Bits[15:0] = Minor Version = 2
+		 */
+		val = 2;
+		break;
+	case PSCI_0_2_FN_CPU_SUSPEND:
+	case PSCI_0_2_FN64_CPU_SUSPEND:
+		val = kvm_psci_vcpu_suspend(vcpu);
+		break;
+	case PSCI_0_2_FN_CPU_OFF:
+		kvm_psci_vcpu_off(vcpu);
+		val = PSCI_RET_SUCCESS;
+		break;
+	case PSCI_0_2_FN_CPU_ON:
+	case PSCI_0_2_FN64_CPU_ON:
+		mutex_lock(&kvm->lock);
+		val = kvm_psci_vcpu_on(vcpu);
+		mutex_unlock(&kvm->lock);
+		break;
+	case PSCI_0_2_FN_AFFINITY_INFO:
+	case PSCI_0_2_FN64_AFFINITY_INFO:
+		val = kvm_psci_vcpu_affinity_info(vcpu);
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+		/*
+		 * Trusted OS is MP hence does not require migration
+	         * or
+		 * Trusted OS is not present
+		 */
+		val = PSCI_0_2_TOS_MP;
+		break;
+	case PSCI_0_2_FN_SYSTEM_OFF:
+		kvm_psci_system_off(vcpu);
+		/*
+		 * We should'nt be going back to guest VCPU after
+		 * receiving SYSTEM_OFF request.
+		 *
+		 * If user space accidently/deliberately resumes
+		 * guest VCPU after SYSTEM_OFF request then guest
+		 * VCPU should see internal failure from PSCI return
+		 * value. To achieve this, we preload r0 (or x0) with
+		 * PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
+	case PSCI_0_2_FN_SYSTEM_RESET:
+		kvm_psci_system_reset(vcpu);
+		/*
+		 * Same reason as SYSTEM_OFF for preloading r0 (or x0)
+		 * with PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
+	default:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	}
+
+	vcpu_set_reg(vcpu, 0, val);
+	return ret;
+}
+
+static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long val;
+
+	switch (psci_fn) {
+	case KVM_PSCI_FN_CPU_OFF:
+		kvm_psci_vcpu_off(vcpu);
+		val = PSCI_RET_SUCCESS;
+		break;
+	case KVM_PSCI_FN_CPU_ON:
+		mutex_lock(&kvm->lock);
+		val = kvm_psci_vcpu_on(vcpu);
+		mutex_unlock(&kvm->lock);
+		break;
+	default:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	}
+
+	vcpu_set_reg(vcpu, 0, val);
+	return 1;
+}
+
+/**
+ * kvm_psci_call - handle PSCI call if r0 value is in range
+ * @vcpu: Pointer to the VCPU struct
+ *
+ * Handle PSCI calls from guests through traps from HVC instructions.
+ * The calling convention is similar to SMC calls to the secure world
+ * where the function number is placed in r0.
+ *
+ * This function returns: > 0 (success), 0 (success but exit to user
+ * space), and < 0 (errors)
+ *
+ * Errors:
+ * -EINVAL: Unrecognized PSCI function
+ */
+int kvm_psci_call(struct kvm_vcpu *vcpu)
+{
+	switch (kvm_psci_version(vcpu)) {
+	case KVM_ARM_PSCI_0_2:
+		return kvm_psci_0_2_call(vcpu);
+	case KVM_ARM_PSCI_0_1:
+		return kvm_psci_0_1_call(vcpu);
+	default:
+		return -EINVAL;
+	};
+}
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index 37d8b98867d5..f7dc5ddd6847 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -7,26 +7,250 @@
 #define TRACE_SYSTEM kvm
 
 /*
- * Tracepoints for vgic
+ * Tracepoints for entry/exit to guest
  */
-TRACE_EVENT(vgic_update_irq_pending,
-	TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
-	TP_ARGS(vcpu_id, irq, level),
+TRACE_EVENT(kvm_entry,
+	TP_PROTO(unsigned long vcpu_pc),
+	TP_ARGS(vcpu_pc),
 
 	TP_STRUCT__entry(
-		__field(	unsigned long,	vcpu_id	)
-		__field(	__u32,		irq	)
-		__field(	bool,		level	)
+		__field(	unsigned long,	vcpu_pc		)
 	),
 
 	TP_fast_assign(
-		__entry->vcpu_id	= vcpu_id;
-		__entry->irq		= irq;
+		__entry->vcpu_pc		= vcpu_pc;
+	),
+
+	TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_exit,
+	TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
+	TP_ARGS(idx, exit_reason, vcpu_pc),
+
+	TP_STRUCT__entry(
+		__field(	int,		idx		)
+		__field(	unsigned int,	exit_reason	)
+		__field(	unsigned long,	vcpu_pc		)
+	),
+
+	TP_fast_assign(
+		__entry->idx			= idx;
+		__entry->exit_reason		= exit_reason;
+		__entry->vcpu_pc		= vcpu_pc;
+	),
+
+	TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
+		  __print_symbolic(__entry->idx, kvm_arm_exception_type),
+		  __entry->exit_reason,
+		  __print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
+		  __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_guest_fault,
+	TP_PROTO(unsigned long vcpu_pc, unsigned long hsr,
+		 unsigned long hxfar,
+		 unsigned long long ipa),
+	TP_ARGS(vcpu_pc, hsr, hxfar, ipa),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_pc		)
+		__field(	unsigned long,	hsr		)
+		__field(	unsigned long,	hxfar		)
+		__field(   unsigned long long,	ipa		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc		= vcpu_pc;
+		__entry->hsr			= hsr;
+		__entry->hxfar			= hxfar;
+		__entry->ipa			= ipa;
+	),
+
+	TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
+		  __entry->ipa, __entry->hsr,
+		  __entry->hxfar, __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_access_fault,
+	TP_PROTO(unsigned long ipa),
+	TP_ARGS(ipa),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	ipa		)
+	),
+
+	TP_fast_assign(
+		__entry->ipa		= ipa;
+	),
+
+	TP_printk("IPA: %lx", __entry->ipa)
+);
+
+TRACE_EVENT(kvm_irq_line,
+	TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
+	TP_ARGS(type, vcpu_idx, irq_num, level),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	type		)
+		__field(	int,		vcpu_idx	)
+		__field(	int,		irq_num		)
+		__field(	int,		level		)
+	),
+
+	TP_fast_assign(
+		__entry->type		= type;
+		__entry->vcpu_idx	= vcpu_idx;
+		__entry->irq_num	= irq_num;
 		__entry->level		= level;
 	),
 
-	TP_printk("VCPU: %ld, IRQ %d, level: %d",
-		  __entry->vcpu_id, __entry->irq, __entry->level)
+	TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d",
+		  (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" :
+		  (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" :
+		  (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN",
+		  __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level)
+);
+
+TRACE_EVENT(kvm_mmio_emulate,
+	TP_PROTO(unsigned long vcpu_pc, unsigned long instr,
+		 unsigned long cpsr),
+	TP_ARGS(vcpu_pc, instr, cpsr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_pc		)
+		__field(	unsigned long,	instr		)
+		__field(	unsigned long,	cpsr		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc		= vcpu_pc;
+		__entry->instr			= instr;
+		__entry->cpsr			= cpsr;
+	),
+
+	TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
+		  __entry->vcpu_pc, __entry->instr, __entry->cpsr)
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
+);
+
+TRACE_EVENT(kvm_unmap_hva_range,
+	TP_PROTO(unsigned long start, unsigned long end),
+	TP_ARGS(start, end),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	start		)
+		__field(	unsigned long,	end		)
+	),
+
+	TP_fast_assign(
+		__entry->start		= start;
+		__entry->end		= end;
+	),
+
+	TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
+		  __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_set_spte_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
+);
+
+TRACE_EVENT(kvm_age_hva,
+	TP_PROTO(unsigned long start, unsigned long end),
+	TP_ARGS(start, end),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	start		)
+		__field(	unsigned long,	end		)
+	),
+
+	TP_fast_assign(
+		__entry->start		= start;
+		__entry->end		= end;
+	),
+
+	TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
+		  __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_test_age_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
+);
+
+TRACE_EVENT(kvm_set_way_flush,
+	    TP_PROTO(unsigned long vcpu_pc, bool cache),
+	    TP_ARGS(vcpu_pc, cache),
+
+	    TP_STRUCT__entry(
+		    __field(	unsigned long,	vcpu_pc		)
+		    __field(	bool,		cache		)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->vcpu_pc		= vcpu_pc;
+		    __entry->cache		= cache;
+	    ),
+
+	    TP_printk("S/W flush at 0x%016lx (cache %s)",
+		      __entry->vcpu_pc, __entry->cache ? "on" : "off")
+);
+
+TRACE_EVENT(kvm_toggle_cache,
+	    TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
+	    TP_ARGS(vcpu_pc, was, now),
+
+	    TP_STRUCT__entry(
+		    __field(	unsigned long,	vcpu_pc		)
+		    __field(	bool,		was		)
+		    __field(	bool,		now		)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->vcpu_pc		= vcpu_pc;
+		    __entry->was		= was;
+		    __entry->now		= now;
+	    ),
+
+	    TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
+		      __entry->vcpu_pc, __entry->was ? "on" : "off",
+		      __entry->now ? "on" : "off")
 );
 
 /*
diff --git a/virt/kvm/arm/vgic/trace.h b/virt/kvm/arm/vgic/trace.h
new file mode 100644
index 000000000000..ed3229282888
--- /dev/null
+++ b/virt/kvm/arm/vgic/trace.h
@@ -0,0 +1,37 @@
+#if !defined(_TRACE_VGIC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_VGIC_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+TRACE_EVENT(vgic_update_irq_pending,
+	TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
+	TP_ARGS(vcpu_id, irq, level),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_id	)
+		__field(	__u32,		irq	)
+		__field(	bool,		level	)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu_id;
+		__entry->irq		= irq;
+		__entry->level		= level;
+	),
+
+	TP_printk("VCPU: %ld, IRQ %d, level: %d",
+		  __entry->vcpu_id, __entry->irq, __entry->level)
+);
+
+#endif /* _TRACE_VGIC_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm/vgic
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 3d0979c30721..d40210ae9474 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -21,7 +21,7 @@
 #include "vgic.h"
 
 #define CREATE_TRACE_POINTS
-#include "../trace.h"
+#include "trace.h"
 
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
-- 
cgit v1.2.3


From 453d0744f6c6ca3f9749b8c57c2e85b5b9f52514 Mon Sep 17 00:00:00 2001
From: Moritz Fischer <mdf@kernel.org>
Date: Mon, 24 Apr 2017 15:05:11 -0700
Subject: rtc: ds1374: wdt: Fix issue with timeout scaling from secs to wdt
 ticks

The issue is that the internal counter that triggers the watchdog reset
is actually running at 4096 Hz instead of 1Hz, therefore the value
given by userland (in sec) needs to be multiplied by 4096 to get the
correct behavior.

Fixes: 920f91e50c5b ("drivers/rtc/rtc-ds1374.c: add watchdog support")
Signed-off-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1374.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c
index 4cd115e93223..2a8b5b3e429b 100644
--- a/drivers/rtc/rtc-ds1374.c
+++ b/drivers/rtc/rtc-ds1374.c
@@ -525,6 +525,10 @@ static long ds1374_wdt_ioctl(struct file *file, unsigned int cmd,
 		if (get_user(new_margin, (int __user *)arg))
 			return -EFAULT;
 
+		/* the hardware's tick rate is 4096 Hz, so
+		 * the counter value needs to be scaled accordingly
+		 */
+		new_margin <<= 12;
 		if (new_margin < 1 || new_margin > 16777216)
 			return -EINVAL;
 
@@ -533,7 +537,8 @@ static long ds1374_wdt_ioctl(struct file *file, unsigned int cmd,
 		ds1374_wdt_ping();
 		/* fallthrough */
 	case WDIOC_GETTIMEOUT:
-		return put_user(wdt_margin, (int __user *)arg);
+		/* when returning ... inverse is true */
+		return put_user((wdt_margin >> 12), (int __user *)arg);
 	case WDIOC_SETOPTIONS:
 		if (copy_from_user(&options, (int __user *)arg, sizeof(int)))
 			return -EFAULT;
-- 
cgit v1.2.3


From 538c08f4c89580fc644e2bc64e0a4b86c925da4e Mon Sep 17 00:00:00 2001
From: Moritz Fischer <mdf@kernel.org>
Date: Mon, 24 Apr 2017 15:05:12 -0700
Subject: rtc: ds1374: wdt: Fix stop/start ioctl always returning -EINVAL

The WDIOC_SETOPTIONS case in the watchdog ioctl would alwayss falls
through to the -EINVAL case. This is wrong since thew watchdog does
actually get stopped or started correctly.

Fixes: 920f91e50c5b ("drivers/rtc/rtc-ds1374.c: add watchdog support")
Signed-off-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1374.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c
index 2a8b5b3e429b..38a2e9e684df 100644
--- a/drivers/rtc/rtc-ds1374.c
+++ b/drivers/rtc/rtc-ds1374.c
@@ -546,14 +546,15 @@ static long ds1374_wdt_ioctl(struct file *file, unsigned int cmd,
 		if (options & WDIOS_DISABLECARD) {
 			pr_info("disable watchdog\n");
 			ds1374_wdt_disable();
+			return 0;
 		}
 
 		if (options & WDIOS_ENABLECARD) {
 			pr_info("enable watchdog\n");
 			ds1374_wdt_settimeout(wdt_margin);
 			ds1374_wdt_ping();
+			return 0;
 		}
-
 		return -EINVAL;
 	}
 	return -ENOTTY;
-- 
cgit v1.2.3


From 1291927a490832a9bbd0158a5d3c05c25a3c7159 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@arm.com>
Date: Wed, 3 May 2017 13:13:50 +0100
Subject: perf tools: Fix spelling mistakes

Mostly in the documentation.

Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170503131350.cebeecd8bd0f2968417626ab@arm.com
[ Fix spelling of "parameter" in one of the spell-checked lines ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-c2c.txt              | 4 ++--
 tools/perf/Documentation/perf-record.txt           | 2 +-
 tools/perf/Documentation/perf-report.txt           | 6 +++---
 tools/perf/Documentation/perf.data-file-format.txt | 4 ++--
 tools/perf/Documentation/tips.txt                  | 2 +-
 tools/perf/util/event.h                            | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 2da07e51e119..822414235170 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -76,7 +76,7 @@ REPORT OPTIONS
 
 -c::
 --coalesce::
-	Specify sorintg fields for single cacheline display.
+	Specify sorting fields for single cacheline display.
 	Following fields are available: tid,pid,iaddr,dso
 	(see COALESCE)
 
@@ -106,7 +106,7 @@ REPORT OPTIONS
 
 -d::
 --display::
-	Siwtch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
+	Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
 
 C2C RECORD
 ----------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index ea3789d05e5e..b0e9e921d534 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -225,7 +225,7 @@ OPTIONS
 	the libunwind or libdw library) should be used instead.
 	Using the "lbr" method doesn't require any compiler options. It
 	will produce call graphs from the hardware LBR registers. The
-	main limition is that it is only available on new Intel
+	main limitation is that it is only available on new Intel
 	platforms, such as Haswell. It can only get user call chain. It
 	doesn't work with branch stack sampling at the same time.
 
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 37a175914157..9fa84617181e 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -182,7 +182,7 @@ OPTIONS
 --parent=<regex>::
         A regex filter to identify parent. The parent is a caller of this
 	function and searched through the callchain, thus it requires callchain
-	information recorded. The pattern is in the exteneded regex format and
+	information recorded. The pattern is in the extended regex format and
 	defaults to "\^sys_|^do_page_fault", see '--sort parent'.
 
 -x::
@@ -207,8 +207,8 @@ OPTIONS
 -g::
 --call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
         Display call chains using type, min percent threshold, print limit,
-	call order, sort key, optional branch and value.  Note that ordering of
-	parameters is not fixed so any parement can be given in an arbitraty order.
+	call order, sort key, optional branch and value.  Note that ordering
+	is not fixed so any parameter can be given in an arbitrary order.
 	One exception is the print_limit which should be preceded by threshold.
 
 	print_type can be either:
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index fa2a9132f0a9..de8b39dda7b8 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -270,7 +270,7 @@ When the event stream contains multiple events each event is identified
 by an ID. This can be either through the PERF_SAMPLE_ID or the
 PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
 at a fixed offset from the event header, which allows reliable
-parsing of the header. Relying on ID may be ambigious.
+parsing of the header. Relying on ID may be ambiguous.
 IDENTIFIER is only supported by newer Linux kernels.
 
 Perf record specific events:
@@ -288,7 +288,7 @@ struct attr_event {
 	uint64_t id[];
 };
 
-	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* depreceated */
+	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* deprecated */
 
 #define MAX_EVENT_NAME 64
 
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
index 170b0289a7bc..db0ca3063eae 100644
--- a/tools/perf/Documentation/tips.txt
+++ b/tools/perf/Documentation/tips.txt
@@ -23,7 +23,7 @@ For memory address profiling, try: perf mem record / perf mem report
 For tracepoint events, try: perf report -s trace_fields
 To record callchains for each sample: perf record -g
 To record every process run by a user: perf record -u <user>
-Skip collecing build-id when recording: perf record -B
+Skip collecting build-id when recording: perf record -B
 To change sampling frequency to 100 Hz: perf record -F 100
 See assembly instructions with percentage: perf annotate <symbol>
 If you prefer Intel style assembly, try: perf annotate -M intel
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 27ac047490c3..7c3fa1c8cbcd 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -229,7 +229,7 @@ struct build_id_event {
 enum perf_user_event_type { /* above any possible kernel type */
 	PERF_RECORD_USER_TYPE_START		= 64,
 	PERF_RECORD_HEADER_ATTR			= 64,
-	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* depreceated */
+	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* deprecated */
 	PERF_RECORD_HEADER_TRACING_DATA		= 66,
 	PERF_RECORD_HEADER_BUILD_ID		= 67,
 	PERF_RECORD_FINISHED_ROUND		= 68,
-- 
cgit v1.2.3


From d1f7b0234ec7743899321aaaba379b2f5f9d0500 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@arm.com>
Date: Wed, 3 May 2017 13:13:56 +0100
Subject: perf annotate: Fix AArch64 comment char

The commit 0fcb1da4aba "perf annotate: AArch64 support" blindly copied
the comment character from the original:

            https://lkml.org/lkml/2016/5/19/461

whereas that same commit shows objdump output utilizing the C++ style
"//" as the comment delimeter.  Since '/' doesn't occur elsewhere in
objdump output, we retain the single character check, but fix it to be
'/'.

Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 0fcb1da4aba6 ("perf annotate: AArch64 support")
Link: http://lkml.kernel.org/r/20170503131356.be88f977094fb3fa0f49b99d@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm64/annotate/instructions.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
index 44eafd6f2d50..8f1908756cb6 100644
--- a/tools/perf/arch/arm64/annotate/instructions.c
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -50,7 +50,7 @@ static int arm64__annotate_init(struct arch *arch)
 	arch->initialized = true;
 	arch->priv	  = arm;
 	arch->associate_instruction_ops   = arm64__associate_instruction_ops;
-	arch->objdump.comment_char	  = ';';
+	arch->objdump.comment_char	  = '/';
 	arch->objdump.skip_functions_char = '+';
 	return 0;
 
-- 
cgit v1.2.3


From 805b151a1afd24414706a7f6ae275fbb9649be74 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@arm.com>
Date: Wed, 3 May 2017 13:14:02 +0100
Subject: perf tests kmod-path: Don't fail if compressed modules aren't
 supported

__kmod_path__parse() uses is_supported_compression() to determine and
parse out compressed module file extensions.  On systems without zlib,
this test fails and __kmod_path__parse() continues to strcmp "ko" with
"gz".  Don't do this on those systems.

Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 3c8a67f50a1e ("perf tools: Add kmod_path__parse function")
Link: http://lkml.kernel.org/r/20170503131402.c66e314460026c80cd787b34@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/kmod-path.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index 76f41f249944..6cd9e5107f77 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -61,6 +61,7 @@ int test__kmod_path__parse(int subtest __maybe_unused)
 	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true);
 	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false);
 
+#ifdef HAVE_ZLIB_SUPPORT
 	/* path                alloc_name  alloc_ext   kmod  comp  name   ext */
 	T("/xxxx/xxxx/x.ko.gz", true     , true      , true, true, "[x]", "gz");
 	T("/xxxx/xxxx/x.ko.gz", false    , true      , true, true, NULL , "gz");
@@ -96,6 +97,7 @@ int test__kmod_path__parse(int subtest __maybe_unused)
 	M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
 	M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
 	M("x.ko.gz", PERF_RECORD_MISC_USER, false);
+#endif
 
 	/* path            alloc_name  alloc_ext  kmod  comp   name             ext */
 	T("[test_module]", true      , true     , true, false, "[test_module]", NULL);
-- 
cgit v1.2.3


From 1e6e7eae8a3a66f5fee4f44ef165ffcf31f0c40e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 4 May 2017 10:30:40 -0300
Subject: tools build: Fixup sched_getcpu feature test

We have tools/build/feature/test-all.c to speed up feature testing,
doing all tests at once, but then all tests in this file should normally
pass.

That is not the case with the sched-getcpu one, that wasn't passing when
included from test-all.c because it needs to have _GNU_SOURCE defined
before including sched.h, but _GNU_SOURCE is defined by a header
included from another feature test included earlier in test-all.d,
test-libpython.c, resulting in:

  $ cat /tmp/build/perf/feature/test-all.make.output
  In file included from test-all.c:121:0:
  test-sched_getcpu.c:1:0: error: "_GNU_SOURCE" redefined [-Werror]
   #define _GNU_SOURCE

  In file included from /usr/include/python2.7/pyconfig.h:6:0,
                   from /usr/include/python2.7/Python.h:8,
                   from test-libpython.c:1,
                   from test-all.c:13:
  /usr/include/python2.7/pyconfig-64.h:1177:0: note: this is the location of the previous definition
   #define _GNU_SOURCE 1

  cc1: all warnings being treated as errors

Which would trigger testing the tests individually, when that
_GNU_SOURCE redefinition would not take place, and the whole process
would continue, just slower... Fix it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 120010cb1eea ("tools build: Add test for sched_getcpu()")
Link: http://lkml.kernel.org/n/tip-3qp1it69xsc4w8gnuu1e9ayh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/feature/test-sched_getcpu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/build/feature/test-sched_getcpu.c b/tools/build/feature/test-sched_getcpu.c
index c4a148dd7104..9c6b4cbffb1c 100644
--- a/tools/build/feature/test-sched_getcpu.c
+++ b/tools/build/feature/test-sched_getcpu.c
@@ -1,4 +1,6 @@
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <sched.h>
 
 int main(void)
-- 
cgit v1.2.3


From 50730eb367147dc96394d25b3f55c33595663b6f Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Wed, 3 May 2017 11:33:57 +0100
Subject: power: supply: twl4030-charger: make
 twl4030_bci_property_is_writeable static

The function twl4030_bci_property_is_writeable can be made static
as it does not need to be in global scope.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Tested-by: "H. Nikolaus Schaller" <hns@goldelico.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/twl4030_charger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index abd10f6861ae..2f82d0e9ec1b 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -943,7 +943,7 @@ static int twl4030_bci_set_property(struct power_supply *psy,
 	return 0;
 }
 
-int twl4030_bci_property_is_writeable(struct power_supply *psy,
+static int twl4030_bci_property_is_writeable(struct power_supply *psy,
 				      enum power_supply_property psp)
 {
 	switch (psp) {
-- 
cgit v1.2.3


From 4f700a5285dc8f9dde4ad02542bc42fe9f97cfcf Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 3 May 2017 17:15:29 -0700
Subject: power: supply: cpcap-charger: Fix charger name

I noticed we have a different name from what Android is using.
Let's not break the /sys/class/power user space interface here
and use the same naming as Android has.

On Android we have the following for droid 4:

$ ls /sys/class/power_supply/
ac battery usb

So let's use the usb naming here for charger too.

Fixes: 0c9888e3c192 ("power: supply: cpcap-charger: Add minimal
CPCAP PMIC battery charger")
Cc: Marcel Partap <mpartap@gmx.net>
Cc: Michael Scott <michael.scott@linaro.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/cpcap-charger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c
index 543a1bd21ab9..17907fcaebc3 100644
--- a/drivers/power/supply/cpcap-charger.c
+++ b/drivers/power/supply/cpcap-charger.c
@@ -566,7 +566,7 @@ out_err:
 }
 
 static const struct power_supply_desc cpcap_charger_usb_desc = {
-	.name		= "cpcap_usb",
+	.name		= "usb",
 	.type		= POWER_SUPPLY_TYPE_USB,
 	.properties	= cpcap_charger_props,
 	.num_properties	= ARRAY_SIZE(cpcap_charger_props),
-- 
cgit v1.2.3


From 3ae5f06681fc0e9ce6a0ac4f909aa7695e1f3228 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 3 May 2017 17:15:30 -0700
Subject: power: supply: cpcap-charger: Fix charge voltage configuration

We have the charge voltage wrong, it should be 4.35V instead of 4.2V.
This will cause the battery to never get fully charged.

I noticed this when looking at the Andoid kernel battery and charger
status for a fully charged battery:

POWER_SUPPLY_VOLTAGE_NOW=4351000

Also the battery on droid 4 says "4.35, 1735/1785mAh (min/typ),
6.6/6.8 Wh (min/typ)". Presumably the 4.35 on the battery is the
charge voltage.

And finally, on Android the CPCAP CRM register is set to 0x03b5 where
the b is the charge voltage.

Let's fix the charge voltage define and update the charge configuration
to use the 4.35V setting.

Fixes: 0c9888e3c192 ("power: supply: cpcap-charger: Add minimal
CPCAP PMIC battery charger")
Cc: Marcel Partap <mpartap@gmx.net>
Cc: Michael Scott <michael.scott@linaro.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/cpcap-charger.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c
index 17907fcaebc3..33ca1174070b 100644
--- a/drivers/power/supply/cpcap-charger.c
+++ b/drivers/power/supply/cpcap-charger.c
@@ -76,7 +76,7 @@
 #define CPCAP_REG_CRM_VCHRG_4V30	CPCAP_REG_CRM_VCHRG(0x8)
 #define CPCAP_REG_CRM_VCHRG_4V32	CPCAP_REG_CRM_VCHRG(0x9)
 #define CPCAP_REG_CRM_VCHRG_4V34	CPCAP_REG_CRM_VCHRG(0xa)
-#define CPCAP_REG_CRM_VCHRG_4V36	CPCAP_REG_CRM_VCHRG(0xb)
+#define CPCAP_REG_CRM_VCHRG_4V35	CPCAP_REG_CRM_VCHRG(0xb)
 #define CPCAP_REG_CRM_VCHRG_4V38	CPCAP_REG_CRM_VCHRG(0xc)
 #define CPCAP_REG_CRM_VCHRG_4V40	CPCAP_REG_CRM_VCHRG(0xd)
 #define CPCAP_REG_CRM_VCHRG_4V42	CPCAP_REG_CRM_VCHRG(0xe)
@@ -433,7 +433,7 @@ static void cpcap_usb_detect(struct work_struct *work)
 			max_current = CPCAP_REG_CRM_ICHRG_0A528;
 
 		error = cpcap_charger_set_state(ddata,
-						CPCAP_REG_CRM_VCHRG_4V20,
+						CPCAP_REG_CRM_VCHRG_4V35,
 						max_current,
 						CPCAP_REG_CRM_TR_0A72);
 		if (error)
-- 
cgit v1.2.3


From 6ffa8ace7092a5414d5d387c54a66118eb90482b Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 3 May 2017 17:15:31 -0700
Subject: power: supply: cpcap-charger: Fix enable for 3.8V charge setting

Zero is a valid register value for for 3.8V charging.

Fixes: 0c9888e3c192 ("power: supply: cpcap-charger: Add minimal
CPCAP PMIC battery charger")
Cc: Marcel Partap <mpartap@gmx.net>
Cc: Michael Scott <michael.scott@linaro.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/cpcap-charger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c
index 33ca1174070b..378fbdaba85b 100644
--- a/drivers/power/supply/cpcap-charger.c
+++ b/drivers/power/supply/cpcap-charger.c
@@ -262,7 +262,7 @@ static int cpcap_charger_set_state(struct cpcap_charger_ddata *ddata,
 	bool enable;
 	int error;
 
-	enable = max_voltage && (charge_current || trickle_current);
+	enable = (charge_current || trickle_current);
 	dev_dbg(ddata->dev, "%s enable: %i\n", __func__, enable);
 
 	if (!enable) {
-- 
cgit v1.2.3


From 35f4f99ccbc1ccf3a1f1157a5f2f14d6a7d1848f Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 3 May 2017 17:15:32 -0700
Subject: power: supply: cpcap-charger: Keep trickle charger bits disabled

Android does not seem to set the trickle charger bits, and these
seem to be only used by the bootloader when bringing up a
completely discharged battery. So let's keep the trickle charging
bits disabled and avoid misconfiguring the hardware.

Fixes: 0c9888e3c192 ("power: supply: cpcap-charger: Add minimal
CPCAP PMIC battery charger")
Cc: Marcel Partap <mpartap@gmx.net>
Cc: Michael Scott <michael.scott@linaro.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/cpcap-charger.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c
index 378fbdaba85b..26a2dc7ac9a2 100644
--- a/drivers/power/supply/cpcap-charger.c
+++ b/drivers/power/supply/cpcap-charger.c
@@ -434,8 +434,7 @@ static void cpcap_usb_detect(struct work_struct *work)
 
 		error = cpcap_charger_set_state(ddata,
 						CPCAP_REG_CRM_VCHRG_4V35,
-						max_current,
-						CPCAP_REG_CRM_TR_0A72);
+						max_current, 0);
 		if (error)
 			goto out_err;
 	} else {
-- 
cgit v1.2.3


From 290d638e04e7b94ff34d2f5e9426e7ce617e9a59 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Tue, 2 May 2017 18:21:12 -0400
Subject: soc/qbman: Move dma-mapping.h include to qman_priv.h

With the include there it fixes all build failures in this
directory caused by commit 461a6946b1f9.

Fixes: 461a6946b1f9 ("iommu: Remove pci.h include from trace/events/iommu.h")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/soc/fsl/qbman/qman_portal.c | 2 --
 drivers/soc/fsl/qbman/qman_priv.h   | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c
index 4a6a8ae5e0aa..adbaa30d3c5a 100644
--- a/drivers/soc/fsl/qbman/qman_portal.c
+++ b/drivers/soc/fsl/qbman/qman_portal.c
@@ -30,8 +30,6 @@
 
 #include "qman_priv.h"
 
-#include <linux/dma-mapping.h>
-
 struct qman_portal *qman_dma_portal;
 EXPORT_SYMBOL(qman_dma_portal);
 
diff --git a/drivers/soc/fsl/qbman/qman_priv.h b/drivers/soc/fsl/qbman/qman_priv.h
index 53685b59718e..4f2ef3ebd88e 100644
--- a/drivers/soc/fsl/qbman/qman_priv.h
+++ b/drivers/soc/fsl/qbman/qman_priv.h
@@ -33,6 +33,7 @@
 #include "dpaa_sys.h"
 
 #include <soc/fsl/qman.h>
+#include <linux/dma-mapping.h>
 #include <linux/iommu.h>
 
 #if defined(CONFIG_FSL_PAMU)
-- 
cgit v1.2.3


From 758929005f06f954b7e1c87a1c9fdb44157b228f Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sun, 23 Apr 2017 13:43:24 +0800
Subject: rtc: snvs: fix an incorrect check of return value

Function devm_regmap_init_mmio() returns an ERR_PTR on error. However,
in function snvs_rtc_probe() its return value is checked against NULL.
This patch fixes it by checking the return value with IS_ERR().

Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-snvs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c
index d51b07d620f7..d8ef9e052c4f 100644
--- a/drivers/rtc/rtc-snvs.c
+++ b/drivers/rtc/rtc-snvs.c
@@ -258,7 +258,7 @@ static int snvs_rtc_probe(struct platform_device *pdev)
 		of_property_read_u32(pdev->dev.of_node, "offset", &data->offset);
 	}
 
-	if (!data->regmap) {
+	if (IS_ERR(data->regmap)) {
 		dev_err(&pdev->dev, "Can't find snvs syscon\n");
 		return -ENODEV;
 	}
-- 
cgit v1.2.3


From 332e0d13d3c7d1e4394a513585a3d3b15a0030b8 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sun, 23 Apr 2017 20:48:07 +0800
Subject: rtc: gemini: add return value validation

Function devm_ioremap() will return a NULL pointer if it fails to remap
IO address, and its return value should be validated before it is used.
However, in function gemini_rtc_probe(), its return value is not
checked. This may result in bad memory access bugs on future access,
e.g. calling the function gemini_rtc_read_time().

Signed-off-by: Pan Bian <bianpan2016@163.com>
Acked-by: Hans Ulli Kroll <ulli.kroll@googlemail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-gemini.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/rtc/rtc-gemini.c b/drivers/rtc/rtc-gemini.c
index ccf0dbadb62d..5279390bb42d 100644
--- a/drivers/rtc/rtc-gemini.c
+++ b/drivers/rtc/rtc-gemini.c
@@ -139,6 +139,8 @@ static int gemini_rtc_probe(struct platform_device *pdev)
 
 	rtc->rtc_base = devm_ioremap(dev, res->start,
 				     resource_size(res));
+	if (!rtc->rtc_base)
+		return -ENOMEM;
 
 	ret = devm_request_irq(dev, rtc->rtc_irq, gemini_rtc_interrupt,
 			       IRQF_SHARED, pdev->name, dev);
-- 
cgit v1.2.3


From 9bcaaea7418d09691f1ffab5c49aacafe3eef9d0 Mon Sep 17 00:00:00 2001
From: Chris Mason <clm@fb.com>
Date: Thu, 4 May 2017 16:08:08 -0700
Subject: btrfs: fix the gfp_mask for the reada_zones radix tree

Commits cc8385b59e17 and 7ef70b4d9987a7 added preallocation for the
reada radix trees and also switched them over to GFP_KERNEL for the
default gfp mask.

Since we're doing radix tree insertions under spinlocks, we need
to make sure the mask doesn't allow sleeping.  This fix keeps
the radix preallocation but switches back to the original gfp_mask.

Reported-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/disk-io.c | 2 +-
 fs/btrfs/volumes.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ba7bd65693a3..683194242deb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2693,7 +2693,7 @@ int open_ctree(struct super_block *sb,
 	fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
 	fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
 	/* readahead state */
-	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_KERNEL);
+	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 	spin_lock_init(&fs_info->reada_lock);
 
 	fs_info->thread_pool_size = min_t(unsigned long,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6cad3233181c..017b67daa3bb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -252,7 +252,7 @@ static struct btrfs_device *__alloc_device(void)
 	atomic_set(&dev->reada_in_flight, 0);
 	atomic_set(&dev->dev_stats_ccnt, 0);
 	btrfs_device_data_ordered_init(dev);
-	INIT_RADIX_TREE(&dev->reada_zones, GFP_KERNEL);
+	INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 	INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
 	return dev;
-- 
cgit v1.2.3


From a71a5dc7f833943998e97ca8fa6a4c708a0ed1a9 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Tue, 11 Apr 2017 16:24:16 -0700
Subject: target: Fix compare_and_write_callback handling for non GOOD status

Following the bugfix for handling non SAM_STAT_GOOD COMPARE_AND_WRITE
status during COMMIT phase in commit 9b2792c3da1, the same bug exists
for the READ phase as well.

This would manifest first as a lost SCSI response, and eventual
hung task during fabric driver logout or re-login, as existing
shutdown logic waited for the COMPARE_AND_WRITE se_cmd->cmd_kref
to reach zero.

To address this bug, compare_and_write_callback() has been changed
to set post_ret = 1 and return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE
as necessary to signal failure status.

Reported-by: Bill Borsari <wgb@datera.io>
Cc: Bill Borsari <wgb@datera.io>
Tested-by: Gary Guo <ghg@datera.io>
Cc: Gary Guo <ghg@datera.io>
Cc: <stable@vger.kernel.org> # v4.1+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index f9250b3c3fd4..a0ad618f1b1a 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -507,8 +507,11 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes
 	 * been failed with a non-zero SCSI status.
 	 */
 	if (cmd->scsi_status) {
-		pr_err("compare_and_write_callback: non zero scsi_status:"
+		pr_debug("compare_and_write_callback: non zero scsi_status:"
 			" 0x%02x\n", cmd->scsi_status);
+		*post_ret = 1;
+		if (cmd->scsi_status == SAM_STAT_CHECK_CONDITION)
+			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		goto out;
 	}
 
-- 
cgit v1.2.3


From 197b806ae5db60c6f609d74da04ddb62ea5e1b00 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Tue, 25 Apr 2017 10:55:12 -0700
Subject: iscsi-target: Set session_fall_back_to_erl0 when forcing
 reinstatement

While testing modification of per se_node_acl queue_depth forcing
session reinstatement via lio_target_nacl_cmdsn_depth_store() ->
core_tpg_set_initiator_node_queue_depth(), a hung task bug triggered
when changing cmdsn_depth invoked session reinstatement while an iscsi
login was already waiting for session reinstatement to complete.

This can happen when an outstanding se_cmd descriptor is taking a
long time to complete, and session reinstatement from iscsi login
or cmdsn_depth change occurs concurrently.

To address this bug, explicitly set session_fall_back_to_erl0 = 1
when forcing session reinstatement, so session reinstatement is
not attempted if an active session is already being shutdown.

This patch has been tested with two scenarios.  The first when
iscsi login is blocked waiting for iscsi session reinstatement
to complete followed by queue_depth change via configfs, and
second when queue_depth change via configfs us blocked followed
by a iscsi login driven session reinstatement.

Note this patch depends on commit d36ad77f702 to handle multiple
sessions per se_node_acl when changing cmdsn_depth, and for
pre v4.5 kernels will need to be included for stable as well.

Reported-by: Gary Guo <ghg@datera.io>
Tested-by: Gary Guo <ghg@datera.io>
Cc: Gary Guo <ghg@datera.io>
Cc: <stable@vger.kernel.org> # v4.1+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c          | 1 +
 drivers/target/iscsi/iscsi_target_configfs.c | 1 +
 drivers/target/iscsi/iscsi_target_login.c    | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 0f7ade04b583..26a9bcd5ee6a 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -4663,6 +4663,7 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force)
 			continue;
 		}
 		atomic_set(&sess->session_reinstatement, 1);
+		atomic_set(&sess->session_fall_back_to_erl0, 1);
 		spin_unlock(&sess->conn_lock);
 
 		list_move_tail(&se_sess->sess_list, &free_list);
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 344e8448869c..96d9c73af1ae 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1528,6 +1528,7 @@ static void lio_tpg_close_session(struct se_session *se_sess)
 		return;
 	}
 	atomic_set(&sess->session_reinstatement, 1);
+	atomic_set(&sess->session_fall_back_to_erl0, 1);
 	spin_unlock(&sess->conn_lock);
 
 	iscsit_stop_time2retain_timer(sess);
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index ad8f3011bdc2..66238477137b 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -208,6 +208,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn)
 			    initiatorname_param->value) &&
 		   (sess_p->sess_ops->SessionType == sessiontype))) {
 			atomic_set(&sess_p->session_reinstatement, 1);
+			atomic_set(&sess_p->session_fall_back_to_erl0, 1);
 			spin_unlock(&sess_p->conn_lock);
 			iscsit_inc_session_usage_count(sess_p);
 			iscsit_stop_time2retain_timer(sess_p);
-- 
cgit v1.2.3


From 46861cdd80e1a862dc65d10833a1450861fabef7 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Tue, 25 Apr 2017 13:52:45 -0700
Subject: target: Don't force session reset if queue_depth does not change

Keeping in the idempotent nature of target_core_fabric_configfs.c,
if a queue_depth value is set and it's the same as the existing
value, don't attempt to force session reinstatement.

Reported-by: Raghu Krishnamurthy <rk@datera.io>
Cc: Raghu Krishnamurthy <rk@datera.io>
Tested-by: Gary Guo <ghg@datera.io>
Cc: Gary Guo <ghg@datera.io>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_tpg.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index dfaef4d3b2d2..310d9e55c6eb 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -397,6 +397,13 @@ int core_tpg_set_initiator_node_queue_depth(
 {
 	struct se_portal_group *tpg = acl->se_tpg;
 
+	/*
+	 * Allow the setting of se_node_acl queue_depth to be idempotent,
+	 * and not force a session shutdown event if the value is not
+	 * changing.
+	 */
+	if (acl->queue_depth == queue_depth)
+		return 0;
 	/*
 	 * User has requested to change the queue depth for a Initiator Node.
 	 * Change the value in the Node's struct se_node_acl, and call
-- 
cgit v1.2.3


From d906d8af28e524bfa62c49cb2315f6ccdb910938 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 2 May 2017 23:57:05 -0500
Subject: tcmu: fix module removal due to stuck thread

We need to do a kthread_should_stop to check when kthread_stop has been
called.

This was a regression added in

b6df4b79a5514a9c6c53533436704129ef45bf76
tcmu: Add global data block pool support

so not sure if you wanted to merge it in with that patch or what.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 89b75ce563d8..9045837f748b 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1560,6 +1560,9 @@ static int unmap_thread_fn(void *data)
 		schedule();
 		finish_wait(&unmap_wait, &__wait);
 
+		if (kthread_should_stop())
+			break;
+
 		mutex_lock(&root_udev_mutex);
 		list_for_each_entry(udev, &root_udev, node) {
 			mutex_lock(&udev->cmdr_lock);
-- 
cgit v1.2.3


From 42fc6c6cb1662ba2fa727dd01c9473c63be4e3b6 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 4 May 2017 09:51:40 -0500
Subject: x86/asm: Don't use RBP as a temporary register in
 csum_partial_copy_generic()

Andrey Konovalov reported the following warning while fuzzing the kernel
with syzkaller:

  WARNING: kernel stack regs at ffff8800686869f8 in a.out:4933 has bad 'bp' value c3fc855a10167ec0

The unwinder dump revealed that RBP had a bad value when an interrupt
occurred in csum_partial_copy_generic().

That function saves RBP on the stack and then overwrites it, using it as
a scratch register.  That's problematic because it breaks stack traces
if an interrupt occurs in the middle of the function.

Replace the usage of RBP with another callee-saved register (R15) so
stack traces are no longer affected.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: David S . Miller <davem@davemloft.net>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Cc: linux-sctp@vger.kernel.org
Cc: netdev <netdev@vger.kernel.org>
Cc: syzkaller <syzkaller@googlegroups.com>
Link: http://lkml.kernel.org/r/4b03a961efda5ec9bfe46b7b9c9ad72d1efad343.1493909486.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/lib/csum-copy_64.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 7e48807b2fa1..45a53dfe1859 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -55,7 +55,7 @@ ENTRY(csum_partial_copy_generic)
 	movq  %r12, 3*8(%rsp)
 	movq  %r14, 4*8(%rsp)
 	movq  %r13, 5*8(%rsp)
-	movq  %rbp, 6*8(%rsp)
+	movq  %r15, 6*8(%rsp)
 
 	movq  %r8, (%rsp)
 	movq  %r9, 1*8(%rsp)
@@ -74,7 +74,7 @@ ENTRY(csum_partial_copy_generic)
 	/* main loop. clear in 64 byte blocks */
 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 	/* r11:	temp3, rdx: temp4, r12 loopcnt */
-	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
+	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
 	.p2align 4
 .Lloop:
 	source
@@ -89,7 +89,7 @@ ENTRY(csum_partial_copy_generic)
 	source
 	movq  32(%rdi), %r10
 	source
-	movq  40(%rdi), %rbp
+	movq  40(%rdi), %r15
 	source
 	movq  48(%rdi), %r14
 	source
@@ -103,7 +103,7 @@ ENTRY(csum_partial_copy_generic)
 	adcq  %r11, %rax
 	adcq  %rdx, %rax
 	adcq  %r10, %rax
-	adcq  %rbp, %rax
+	adcq  %r15, %rax
 	adcq  %r14, %rax
 	adcq  %r13, %rax
 
@@ -121,7 +121,7 @@ ENTRY(csum_partial_copy_generic)
 	dest
 	movq %r10, 32(%rsi)
 	dest
-	movq %rbp, 40(%rsi)
+	movq %r15, 40(%rsi)
 	dest
 	movq %r14, 48(%rsi)
 	dest
@@ -203,7 +203,7 @@ ENTRY(csum_partial_copy_generic)
 	movq 3*8(%rsp), %r12
 	movq 4*8(%rsp), %r14
 	movq 5*8(%rsp), %r13
-	movq 6*8(%rsp), %rbp
+	movq 6*8(%rsp), %r15
 	addq $7*8, %rsp
 	ret
 
-- 
cgit v1.2.3


From 5ea30e4e58040cfd6434c2f33dc3ea76e2c15b05 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 4 May 2017 09:32:09 -0400
Subject: stackprotector: Increase the per-task stack canary's random range
 from 32 bits to 64 bits on 64-bit platforms

The stack canary is an 'unsigned long' and should be fully initialized to
random data rather than only 32 bits of random data.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Arjan van Ven <arjan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-hardening@lists.openwall.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170504133209.3053-1-danielmicay@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 3a4343cdfe90..d681f8f10d2d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -536,7 +536,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	set_task_stack_end_magic(tsk);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-	tsk->stack_canary = get_random_int();
+	tsk->stack_canary = get_random_long();
 #endif
 
 	/*
-- 
cgit v1.2.3


From fc5f9d5f151c9fff21d3d1d2907b888a5aec3ff7 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Thu, 4 May 2017 10:25:47 +0800
Subject: x86/mm: Fix boot crash caused by incorrect loop count calculation in
 sync_global_pgds()

Jeff Moyer reported that on his system with two memory regions 0~64G and
1T~1T+192G, and kernel option "memmap=192G!1024G" added, enabling KASLR
will make the system hang intermittently during boot. While adding 'nokaslr'
won't.

The back trace is:

 Oops: 0000 [#1] SMP

 RIP: memcpy_erms()
 [ .... ]
 Call Trace:
  pmem_rw_page()
  bdev_read_page()
  do_mpage_readpage()
  mpage_readpages()
  blkdev_readpages()
  __do_page_cache_readahead()
  force_page_cache_readahead()
  page_cache_sync_readahead()
  generic_file_read_iter()
  blkdev_read_iter()
  __vfs_read()
  vfs_read()
  SyS_read()
  entry_SYSCALL_64_fastpath()

This crash happens because the for loop count calculation in sync_global_pgds()
is not correct. When a mapping area crosses PGD entries, we should
calculate the starting address of region which next PGD covers and assign
it to next for loop count, but not add PGDIR_SIZE directly. The old
code works right only if the mapping area is an exact multiple of PGDIR_SIZE,
otherwize the end region could be skipped so that it can't be synchronized
to all other processes from kernel PGD init_mm.pgd.

In Jeff's system, emulated pmem area [1024G, 1216G) is smaller than
PGDIR_SIZE. While 'nokaslr' works because PAGE_OFFSET is 1T aligned, it
makes this area be mapped inside one PGD entry. With KASLR enabled,
this area could cross two PGD entries, then the next PGD entry won't
be synced to all other processes. That is why we saw empty PGD.

Fix it.

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Baoquan He <bhe@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jinbum Park <jinb.park7@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1493864747-8506-1-git-send-email-bhe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init_64.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 745e5e183169..97fe88749e18 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -94,10 +94,10 @@ __setup("noexec32=", nonx32_setup);
  */
 void sync_global_pgds(unsigned long start, unsigned long end)
 {
-	unsigned long address;
+	unsigned long addr;
 
-	for (address = start; address <= end; address += PGDIR_SIZE) {
-		pgd_t *pgd_ref = pgd_offset_k(address);
+	for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+		pgd_t *pgd_ref = pgd_offset_k(addr);
 		const p4d_t *p4d_ref;
 		struct page *page;
 
@@ -106,7 +106,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 		 * handle synchonization on p4d level.
 		 */
 		BUILD_BUG_ON(pgd_none(*pgd_ref));
-		p4d_ref = p4d_offset(pgd_ref, address);
+		p4d_ref = p4d_offset(pgd_ref, addr);
 
 		if (p4d_none(*p4d_ref))
 			continue;
@@ -117,8 +117,8 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 			p4d_t *p4d;
 			spinlock_t *pgt_lock;
 
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			p4d = p4d_offset(pgd, address);
+			pgd = (pgd_t *)page_address(page) + pgd_index(addr);
+			p4d = p4d_offset(pgd, addr);
 			/* the pgt_lock only for Xen */
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 			spin_lock(pgt_lock);
-- 
cgit v1.2.3


From 61baf15555129f69720334f232b153890895ef71 Mon Sep 17 00:00:00 2001
From: Scott Wood <oss@buserror.net>
Date: Fri, 5 May 2017 01:22:06 -0500
Subject: powerpc/64e: Don't place the stack beyond TASK_SIZE

Commit f4ea6dcb08ea ("powerpc/mm: Enable mappings above 128TB") increased
the task size on book3s, and introduced a mechanism to dynamically
control whether a task uses these larger addresses.  While the change to
the task size itself was ifdef-protected to only apply on book3s, the
change to STACK_TOP_USER64 was not.  On book3e, this had the effect of
trying to use addresses up to 128TiB for the stack despite a 64TiB task
size limit -- which broke 64-bit userspace producing the following errors:

Starting init: /sbin/init exists but couldn't execute it (error -14)
Starting init: /bin/sh exists but couldn't execute it (error -14)
Kernel panic - not syncing: No working init found.  Try passing init= option to kernel. See Linux Documentation/admin-guide/init.rst for guidance.

Fixes: f4ea6dcb08ea ("powerpc/mm: Enable mappings above 128TB")
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/include/asm/processor.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index a4b1d8d6b793..a2123f291ab0 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -151,8 +151,13 @@ void release_thread(struct task_struct *);
 
 #ifdef __powerpc64__
 
+#ifdef CONFIG_PPC_BOOK3S_64
 /* Limit stack to 128TB */
 #define STACK_TOP_USER64 TASK_SIZE_128TB
+#else
+#define STACK_TOP_USER64 TASK_SIZE_USER64
+#endif
+
 #define STACK_TOP_USER32 TASK_SIZE_USER32
 
 #define STACK_TOP (is_32bit_task() ? \
-- 
cgit v1.2.3


From 121843eb02a6e2fa30aefab64bfe183c97230c75 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Mon, 1 May 2017 15:47:41 -0700
Subject: x86/mm/kaslr: Use the _ASM_MUL macro for multiplication to work
 around Clang incompatibility

The constraint "rm" allows the compiler to put mix_const into memory.
When the input operand is a memory location then MUL needs an operand
size suffix, since Clang can't infer the multiplication width from the
operand.

Add and use the _ASM_MUL macro which determines the operand size and
resolves to the NUL instruction with the corresponding suffix.

This fixes the following error when building with clang:

  CC      arch/x86/lib/kaslr.o
  /tmp/kaslr-dfe1ad.s: Assembler messages:
  /tmp/kaslr-dfe1ad.s:182: Error: no instruction mnemonic suffix given and no register operands; can't size instruction

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Cc: Grant Grundler <grundler@chromium.org>
Cc: Greg Hackmann <ghackmann@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Davidson <md@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170501224741.133938-1-mka@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/asm.h | 1 +
 arch/x86/lib/kaslr.c       | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 7acb51c49fec..7a9df3beb89b 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -32,6 +32,7 @@
 #define _ASM_ADD	__ASM_SIZE(add)
 #define _ASM_SUB	__ASM_SIZE(sub)
 #define _ASM_XADD	__ASM_SIZE(xadd)
+#define _ASM_MUL	__ASM_SIZE(mul)
 
 #define _ASM_AX		__ASM_REG(ax)
 #define _ASM_BX		__ASM_REG(bx)
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index 5761a4f19455..ab2d1d73e9e7 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -5,6 +5,7 @@
  * kernel starts. This file is included in the compressed kernel and
  * normally linked in the regular.
  */
+#include <asm/asm.h>
 #include <asm/kaslr.h>
 #include <asm/msr.h>
 #include <asm/archrandom.h>
@@ -79,7 +80,7 @@ unsigned long kaslr_get_random_long(const char *purpose)
 	}
 
 	/* Circular multiply for better bit diffusion */
-	asm("mul %3"
+	asm(_ASM_MUL "%3"
 	    : "=a" (random), "=d" (raw)
 	    : "a" (random), "rm" (mix_const));
 	random += raw;
-- 
cgit v1.2.3


From e34cab4cd1f98b4daed5bc98fe727e63f8dbf4e4 Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Thu, 4 May 2017 12:34:31 +0100
Subject: thermal: devfreq_cooling: refactor code and add get_voltage function

Move the code which gets the voltage for a given frequency.
This code will be resused in few places.

Acked-by: Javi Merino <javi.merino@kernel.org>
Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
---
 drivers/thermal/devfreq_cooling.c | 45 ++++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 4bf4ad58cffd..af9d32837a3a 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -164,27 +164,12 @@ freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq)
 	return THERMAL_CSTATE_INVALID;
 }
 
-/**
- * get_static_power() - calculate the static power
- * @dfc:	Pointer to devfreq cooling device
- * @freq:	Frequency in Hz
- *
- * Calculate the static power in milliwatts using the supplied
- * get_static_power().  The current voltage is calculated using the
- * OPP library.  If no get_static_power() was supplied, assume the
- * static power is negligible.
- */
-static unsigned long
-get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
+static unsigned long get_voltage(struct devfreq *df, unsigned long freq)
 {
-	struct devfreq *df = dfc->devfreq;
 	struct device *dev = df->dev.parent;
 	unsigned long voltage;
 	struct dev_pm_opp *opp;
 
-	if (!dfc->power_ops->get_static_power)
-		return 0;
-
 	opp = dev_pm_opp_find_freq_exact(dev, freq, true);
 	if (PTR_ERR(opp) == -ERANGE)
 		opp = dev_pm_opp_find_freq_exact(dev, freq, false);
@@ -202,9 +187,35 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
 		dev_err_ratelimited(dev,
 				    "Failed to get voltage for frequency %lu\n",
 				    freq);
-		return 0;
 	}
 
+	return voltage;
+}
+
+/**
+ * get_static_power() - calculate the static power
+ * @dfc:	Pointer to devfreq cooling device
+ * @freq:	Frequency in Hz
+ *
+ * Calculate the static power in milliwatts using the supplied
+ * get_static_power().  The current voltage is calculated using the
+ * OPP library.  If no get_static_power() was supplied, assume the
+ * static power is negligible.
+ */
+static unsigned long
+get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
+{
+	struct devfreq *df = dfc->devfreq;
+	unsigned long voltage;
+
+	if (!dfc->power_ops->get_static_power)
+		return 0;
+
+	voltage = get_voltage(df, freq);
+
+	if (voltage == 0)
+		return 0;
+
 	return dfc->power_ops->get_static_power(df, voltage);
 }
 
-- 
cgit v1.2.3


From 2be83da85a64773efaa407639de81bd1377f880e Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Thu, 4 May 2017 12:34:32 +0100
Subject: thermal: devfreq_cooling: add new interface for direct power read

This patch introduces a new interface for device drivers connected to
devfreq_cooling in the thermal framework: get_real_power().

Some devices have more sophisticated methods (like power counters)
to approximate the actual power that they use.
In the previous implementation we had a pre-calculated power
table which was then scaled by 'utilization'
('busy_time' and 'total_time' taken from devfreq 'last_status').

With this new interface the driver can provide more precise data
regarding actual power to the thermal governor every time the power
budget is calculated. We then use this value and calculate the real
resource utilization scaling factor.

Reviewed-by: Chris Diamand <chris.diamand@arm.com>
Acked-by: Javi Merino <javi.merino@kernel.org>
Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
---
 drivers/thermal/devfreq_cooling.c | 105 +++++++++++++++++++++++++++++---------
 include/linux/devfreq_cooling.h   |  19 +++++++
 2 files changed, 101 insertions(+), 23 deletions(-)

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index af9d32837a3a..26c31571a12c 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -28,6 +28,8 @@
 
 #include <trace/events/thermal.h>
 
+#define SCALE_ERROR_MITIGATION 100
+
 static DEFINE_IDA(devfreq_ida);
 
 /**
@@ -45,6 +47,12 @@ static DEFINE_IDA(devfreq_ida);
  * @freq_table_size:	Size of the @freq_table and @power_table
  * @power_ops:	Pointer to devfreq_cooling_power, used to generate the
  *		@power_table.
+ * @res_util:	Resource utilization scaling factor for the power.
+ *		It is multiplied by 100 to minimize the error. It is used
+ *		for estimation of the power budget instead of using
+ *		'utilization' (which is	'busy_time / 'total_time').
+ *		The 'res_util' range is from 100 to (power_table[state] * 100)
+ *		for the corresponding 'state'.
  */
 struct devfreq_cooling_device {
 	int id;
@@ -55,6 +63,8 @@ struct devfreq_cooling_device {
 	u32 *freq_table;
 	size_t freq_table_size;
 	struct devfreq_cooling_power *power_ops;
+	u32 res_util;
+	int capped_state;
 };
 
 /**
@@ -250,6 +260,16 @@ get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq,
 	return power;
 }
 
+
+static inline unsigned long get_total_power(struct devfreq_cooling_device *dfc,
+					    unsigned long freq,
+					    unsigned long voltage)
+{
+	return get_static_power(dfc, freq) + get_dynamic_power(dfc, freq,
+							       voltage);
+}
+
+
 static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
 					       struct thermal_zone_device *tz,
 					       u32 *power)
@@ -259,27 +279,55 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
 	struct devfreq_dev_status *status = &df->last_status;
 	unsigned long state;
 	unsigned long freq = status->current_frequency;
-	u32 dyn_power, static_power;
+	unsigned long voltage;
+	u32 dyn_power = 0;
+	u32 static_power = 0;
+	int res;
 
-	/* Get dynamic power for state */
 	state = freq_get_state(dfc, freq);
-	if (state == THERMAL_CSTATE_INVALID)
-		return -EAGAIN;
+	if (state == THERMAL_CSTATE_INVALID) {
+		res = -EAGAIN;
+		goto fail;
+	}
 
-	dyn_power = dfc->power_table[state];
+	if (dfc->power_ops->get_real_power) {
+		voltage = get_voltage(df, freq);
+		if (voltage == 0) {
+			res = -EINVAL;
+			goto fail;
+		}
 
-	/* Scale dynamic power for utilization */
-	dyn_power = (dyn_power * status->busy_time) / status->total_time;
+		res = dfc->power_ops->get_real_power(df, power, freq, voltage);
+		if (!res) {
+			state = dfc->capped_state;
+			dfc->res_util = dfc->power_table[state];
+			dfc->res_util *= SCALE_ERROR_MITIGATION;
 
-	/* Get static power */
-	static_power = get_static_power(dfc, freq);
+			if (*power > 1)
+				dfc->res_util /= *power;
+		} else {
+			goto fail;
+		}
+	} else {
+		dyn_power = dfc->power_table[state];
+
+		/* Scale dynamic power for utilization */
+		dyn_power *= status->busy_time;
+		dyn_power /= status->total_time;
+		/* Get static power */
+		static_power = get_static_power(dfc, freq);
+
+		*power = dyn_power + static_power;
+	}
 
 	trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power,
 					      static_power);
 
-	*power = dyn_power + static_power;
-
 	return 0;
+fail:
+	/* It is safe to set max in this case */
+	dfc->res_util = SCALE_ERROR_MITIGATION;
+	return res;
 }
 
 static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
@@ -312,26 +360,34 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
 	unsigned long busy_time;
 	s32 dyn_power;
 	u32 static_power;
+	s32 est_power;
 	int i;
 
-	static_power = get_static_power(dfc, freq);
+	if (dfc->power_ops->get_real_power) {
+		/* Scale for resource utilization */
+		est_power = power * dfc->res_util;
+		est_power /= SCALE_ERROR_MITIGATION;
+	} else {
+		static_power = get_static_power(dfc, freq);
 
-	dyn_power = power - static_power;
-	dyn_power = dyn_power > 0 ? dyn_power : 0;
+		dyn_power = power - static_power;
+		dyn_power = dyn_power > 0 ? dyn_power : 0;
 
-	/* Scale dynamic power for utilization */
-	busy_time = status->busy_time ?: 1;
-	dyn_power = (dyn_power * status->total_time) / busy_time;
+		/* Scale dynamic power for utilization */
+		busy_time = status->busy_time ?: 1;
+		est_power = (dyn_power * status->total_time) / busy_time;
+	}
 
 	/*
 	 * Find the first cooling state that is within the power
 	 * budget for dynamic power.
 	 */
 	for (i = 0; i < dfc->freq_table_size - 1; i++)
-		if (dyn_power >= dfc->power_table[i])
+		if (est_power >= dfc->power_table[i])
 			break;
 
 	*state = i;
+	dfc->capped_state = i;
 	trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
 	return 0;
 }
@@ -387,7 +443,7 @@ static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc)
 	}
 
 	for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
-		unsigned long power_dyn, voltage;
+		unsigned long power, voltage;
 		struct dev_pm_opp *opp;
 
 		opp = dev_pm_opp_find_freq_floor(dev, &freq);
@@ -400,12 +456,15 @@ static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc)
 		dev_pm_opp_put(opp);
 
 		if (dfc->power_ops) {
-			power_dyn = get_dynamic_power(dfc, freq, voltage);
+			if (dfc->power_ops->get_real_power)
+				power = get_total_power(dfc, freq, voltage);
+			else
+				power = get_dynamic_power(dfc, freq, voltage);
 
-			dev_dbg(dev, "Dynamic power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
-				freq / 1000000, voltage, power_dyn, power_dyn);
+			dev_dbg(dev, "Power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
+				freq / 1000000, voltage, power, power);
 
-			power_table[i] = power_dyn;
+			power_table[i] = power;
 		}
 
 		freq_table[i] = freq;
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
index c35d0c0e0ada..4635f95000a4 100644
--- a/include/linux/devfreq_cooling.h
+++ b/include/linux/devfreq_cooling.h
@@ -34,6 +34,23 @@
  *			If get_dynamic_power() is NULL, then the
  *			dynamic power is calculated as
  *			@dyn_power_coeff * frequency * voltage^2
+ * @get_real_power:	When this is set, the framework uses it to ask the
+ *			device driver for the actual power.
+ *			Some devices have more sophisticated methods
+ *			(like power counters) to approximate the actual power
+ *			that they use.
+ *			This function provides more accurate data to the
+ *			thermal governor. When the driver does not provide
+ *			such function, framework just uses pre-calculated
+ *			table and scale the power by 'utilization'
+ *			(based on 'busy_time' and 'total_time' taken from
+ *			devfreq 'last_status').
+ *			The value returned by this function must be lower
+ *			or equal than the maximum power value
+ *			for the current	state
+ *			(which can be found in power_table[state]).
+ *			When this interface is used, the power_table holds
+ *			max total (static + dynamic) power value for each OPP.
  */
 struct devfreq_cooling_power {
 	unsigned long (*get_static_power)(struct devfreq *devfreq,
@@ -41,6 +58,8 @@ struct devfreq_cooling_power {
 	unsigned long (*get_dynamic_power)(struct devfreq *devfreq,
 					   unsigned long freq,
 					   unsigned long voltage);
+	int (*get_real_power)(struct devfreq *df, u32 *power,
+			      unsigned long freq, unsigned long voltage);
 	unsigned long dyn_power_coeff;
 };
 
-- 
cgit v1.2.3


From 771ffa14ead18887bed400c09f4bde5bca5bf342 Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Thu, 4 May 2017 12:34:33 +0100
Subject: trace: thermal: add another parameter 'power' to the tracing function

This patch adds another parameter to the trace function:
trace_thermal_power_devfreq_get_power().

In case when we call directly driver's code for the real power,
we do not have static/dynamic_power values. Instead we get total
power in the '*power' value. The 'static_power' and
'dynamic_power' are set to 0.

Therefore, we have to trace that '*power' value in this scenario.

CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ingo Molnar <mingo@redhat.com>
CC: Zhang Rui <rui.zhang@intel.com>
CC: Eduardo Valentin <edubezval@gmail.com>
Acked-by: Javi Merino <javi.merino@kernel.org>
Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
---
 drivers/thermal/devfreq_cooling.c |  2 +-
 include/trace/events/thermal.h    | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 26c31571a12c..ef59256887ff 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -321,7 +321,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
 	}
 
 	trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power,
-					      static_power);
+					      static_power, *power);
 
 	return 0;
 fail:
diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h
index 2b4a8ff72d0d..6cde5b3514c2 100644
--- a/include/trace/events/thermal.h
+++ b/include/trace/events/thermal.h
@@ -151,9 +151,9 @@ TRACE_EVENT(thermal_power_cpu_limit,
 TRACE_EVENT(thermal_power_devfreq_get_power,
 	TP_PROTO(struct thermal_cooling_device *cdev,
 		 struct devfreq_dev_status *status, unsigned long freq,
-		u32 dynamic_power, u32 static_power),
+		u32 dynamic_power, u32 static_power, u32 power),
 
-	TP_ARGS(cdev, status,  freq, dynamic_power, static_power),
+	TP_ARGS(cdev, status,  freq, dynamic_power, static_power, power),
 
 	TP_STRUCT__entry(
 		__string(type,         cdev->type    )
@@ -161,6 +161,7 @@ TRACE_EVENT(thermal_power_devfreq_get_power,
 		__field(u32,           load          )
 		__field(u32,           dynamic_power )
 		__field(u32,           static_power  )
+		__field(u32,           power)
 	),
 
 	TP_fast_assign(
@@ -169,11 +170,13 @@ TRACE_EVENT(thermal_power_devfreq_get_power,
 		__entry->load = (100 * status->busy_time) / status->total_time;
 		__entry->dynamic_power = dynamic_power;
 		__entry->static_power = static_power;
+		__entry->power = power;
 	),
 
-	TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u",
+	TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u power=%u",
 		__get_str(type), __entry->freq,
-		__entry->load, __entry->dynamic_power, __entry->static_power)
+		__entry->load, __entry->dynamic_power, __entry->static_power,
+		__entry->power)
 );
 
 TRACE_EVENT(thermal_power_devfreq_limit,
-- 
cgit v1.2.3


From 68b2440b2a4beaa393d8ed97ac619fd1c94a549e Mon Sep 17 00:00:00 2001
From: Brian Bian <brian.bian@intel.com>
Date: Wed, 12 Apr 2017 14:07:17 -0700
Subject: Thermal: Intel SoC DTS: Change interrupt request behavior

The interrupt request call in Intel SoC DTS driver may fail if
there is no underlying BIOS support. However, the user space
thermal daemon can still use the thermal zones created by the
SoC DTS driver in polling mode, therefore, instead of bailing
out on interrupt request failures, it is better just to log
a warning message and continue the init process.

Signed-off-by: Brian Bian <brian.bian@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/intel_soc_dts_thermal.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c
index b2bbaa1c60b0..c27868b2c6af 100644
--- a/drivers/thermal/intel_soc_dts_thermal.c
+++ b/drivers/thermal/intel_soc_dts_thermal.c
@@ -73,8 +73,12 @@ static int __init intel_soc_thermal_init(void)
 					   IRQF_TRIGGER_RISING | IRQF_ONESHOT,
 					   "soc_dts", soc_dts);
 		if (err) {
-			pr_err("request_threaded_irq ret %d\n", err);
-			goto error_irq;
+			/*
+			 * Do not just error out because the user space thermal
+			 * daemon such as DPTF may use polling instead of being
+			 * interrupt driven.
+			 */
+			pr_warn("request_threaded_irq ret %d\n", err);
 		}
 	}
 
@@ -88,7 +92,6 @@ static int __init intel_soc_thermal_init(void)
 error_trips:
 	if (soc_dts_thres_irq)
 		free_irq(soc_dts_thres_irq, soc_dts);
-error_irq:
 	intel_soc_dts_iosf_exit(soc_dts);
 
 	return err;
-- 
cgit v1.2.3


From e441fd68663e298e99a99e215e0144a0eda6250d Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 18 Apr 2017 09:59:58 +0530
Subject: thermal: core: Allow orderly_poweroff to be called only once

thermal_zone_device_check --> thermal_zone_device_update -->
handle_thermal_trip --> handle_critical_trips --> orderly_poweroff

The above sequence happens every 250/500 mS based on the configuration.
The orderly_poweroff function is getting called every 250/500 mS.
With a full fledged file system it takes at least 5-10 Seconds to
power off gracefully.

In that period due to the thermal_zone_device_check triggering
periodically the thermal work queues bombard with
orderly_poweroff calls multiple times eventually leading to
failures in gracefully powering off the system.

Make sure that orderly_poweroff is called only once.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_core.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 11f0675cb7e5..8337c272d3ec 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -45,8 +45,10 @@ static LIST_HEAD(thermal_governor_list);
 
 static DEFINE_MUTEX(thermal_list_lock);
 static DEFINE_MUTEX(thermal_governor_lock);
+static DEFINE_MUTEX(poweroff_lock);
 
 static atomic_t in_suspend;
+static bool power_off_triggered;
 
 static struct thermal_governor *def_governor;
 
@@ -342,7 +344,12 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
 		dev_emerg(&tz->device,
 			  "critical temperature reached(%d C),shutting down\n",
 			  tz->temperature / 1000);
-		orderly_poweroff(true);
+		mutex_lock(&poweroff_lock);
+		if (!power_off_triggered) {
+			orderly_poweroff(true);
+			power_off_triggered = true;
+		}
+		mutex_unlock(&poweroff_lock);
 	}
 }
 
@@ -1463,6 +1470,7 @@ static int __init thermal_init(void)
 {
 	int result;
 
+	mutex_init(&poweroff_lock);
 	result = thermal_register_governors();
 	if (result)
 		goto error;
@@ -1497,6 +1505,7 @@ error:
 	ida_destroy(&thermal_cdev_ida);
 	mutex_destroy(&thermal_list_lock);
 	mutex_destroy(&thermal_governor_lock);
+	mutex_destroy(&poweroff_lock);
 	return result;
 }
 
-- 
cgit v1.2.3


From ef1d87e06ab4d3f9a95f02517ecc50902dc233a7 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 18 Apr 2017 09:59:59 +0530
Subject: thermal: core: Add a back up thermal shutdown mechanism

orderly_poweroff is triggered when a graceful shutdown
of system is desired. This may be used in many critical states of the
kernel such as when subsystems detects conditions such as critical
temperature conditions. However, in certain conditions in system
boot up sequences like those in the middle of driver probes being
initiated, userspace will be unable to power off the system in a clean
manner and leaves the system in a critical state. In cases like these,
the /sbin/poweroff will return success (having forked off to attempt
powering off the system. However, the system overall will fail to
completely poweroff (since other modules will be probed) and the system
is still functional with no userspace (since that would have shut itself
off).

However, there is no clean way of detecting such failure of userspace
powering off the system. In such scenarios, it is necessary for a backup
workqueue to be able to force a shutdown of the system when orderly
shutdown is not successful after a configurable time period.

Reported-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 Documentation/thermal/sysfs-api.txt | 21 +++++++++++++++
 drivers/thermal/Kconfig             | 17 ++++++++++++
 drivers/thermal/thermal_core.c      | 53 +++++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+)

diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index ef473dc7f55e..bb9a0a53e76b 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -582,3 +582,24 @@ platform data is provided, this uses the step_wise throttling policy.
 This function serves as an arbitrator to set the state of a cooling
 device. It sets the cooling device to the deepest cooling state if
 possible.
+
+6. thermal_emergency_poweroff:
+
+On an event of critical trip temperature crossing. Thermal framework
+allows the system to shutdown gracefully by calling orderly_poweroff().
+In the event of a failure of orderly_poweroff() to shut down the system
+we are in danger of keeping the system alive at undesirably high
+temperatures. To mitigate this high risk scenario we program a work
+queue to fire after a pre-determined number of seconds to start
+an emergency shutdown of the device using the kernel_power_off()
+function. In case kernel_power_off() fails then finally
+emergency_restart() is called in the worst case.
+
+The delay should be carefully profiled so as to give adequate time for
+orderly_poweroff(). In case of failure of an orderly_poweroff() the
+emergency poweroff kicks in after the delay has elapsed and shuts down
+the system.
+
+If set to 0 emergency poweroff will not be supported. So a carefully
+profiled non-zero positive value is a must for emergerncy poweroff to be
+triggered.
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 776b34396144..74ef51dfb816 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -15,6 +15,23 @@ menuconfig THERMAL
 
 if THERMAL
 
+config THERMAL_EMERGENCY_POWEROFF_DELAY_MS
+	int "Emergency poweroff delay in milli-seconds"
+	depends on THERMAL
+	default 0
+	help
+	  Thermal subsystem will issue a graceful shutdown when
+	  critical temperatures are reached using orderly_poweroff(). In
+	  case of failure of an orderly_poweroff(), the thermal emergency
+	  poweroff kicks in after a delay has elapsed and shuts down the system.
+	  This config is number of milliseconds to delay before emergency
+	  poweroff kicks in. Similarly to the critical trip point,
+	  the delay should be carefully profiled so as to give adequate
+	  time for orderly_poweroff() to finish on regular execution.
+	  If set to 0 emergency poweroff will not be supported.
+
+	  In doubt, leave as 0.
+
 config THERMAL_HWMON
 	bool
 	prompt "Expose thermal sensors as hwmon device"
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 8337c272d3ec..b21b9cc2c8d6 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -324,6 +324,54 @@ static void handle_non_critical_trips(struct thermal_zone_device *tz,
 		       def_governor->throttle(tz, trip);
 }
 
+/**
+ * thermal_emergency_poweroff_func - emergency poweroff work after a known delay
+ * @work: work_struct associated with the emergency poweroff function
+ *
+ * This function is called in very critical situations to force
+ * a kernel poweroff after a configurable timeout value.
+ */
+static void thermal_emergency_poweroff_func(struct work_struct *work)
+{
+	/*
+	 * We have reached here after the emergency thermal shutdown
+	 * Waiting period has expired. This means orderly_poweroff has
+	 * not been able to shut off the system for some reason.
+	 * Try to shut down the system immediately using kernel_power_off
+	 * if populated
+	 */
+	WARN(1, "Attempting kernel_power_off: Temperature too high\n");
+	kernel_power_off();
+
+	/*
+	 * Worst of the worst case trigger emergency restart
+	 */
+	WARN(1, "Attempting emergency_restart: Temperature too high\n");
+	emergency_restart();
+}
+
+static DECLARE_DELAYED_WORK(thermal_emergency_poweroff_work,
+			    thermal_emergency_poweroff_func);
+
+/**
+ * thermal_emergency_poweroff - Trigger an emergency system poweroff
+ *
+ * This may be called from any critical situation to trigger a system shutdown
+ * after a known period of time. By default this is not scheduled.
+ */
+void thermal_emergency_poweroff(void)
+{
+	int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
+	/*
+	 * poweroff_delay_ms must be a carefully profiled positive value.
+	 * Its a must for thermal_emergency_poweroff_work to be scheduled
+	 */
+	if (poweroff_delay_ms <= 0)
+		return;
+	schedule_delayed_work(&thermal_emergency_poweroff_work,
+			      msecs_to_jiffies(poweroff_delay_ms));
+}
+
 static void handle_critical_trips(struct thermal_zone_device *tz,
 				  int trip, enum thermal_trip_type trip_type)
 {
@@ -346,6 +394,11 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
 			  tz->temperature / 1000);
 		mutex_lock(&poweroff_lock);
 		if (!power_off_triggered) {
+			/*
+			 * Queue a backup emergency shutdown in the event of
+			 * orderly_poweroff failure
+			 */
+			thermal_emergency_poweroff();
 			orderly_poweroff(true);
 			power_off_triggered = true;
 		}
-- 
cgit v1.2.3


From 40f4ac0b510dbf9f00668ded5474290940616d09 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 25 Apr 2017 08:47:40 +0200
Subject: x86/xen: fix xsave capability setting

Commit 690b7f10b4f9f ("x86/xen: use capabilities instead of fake cpuid
values for xsave") introduced a regression as it tried to make use of
the fixup feature before it being available.

Fall back to the old variant testing via cpuid().

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten_pv.c | 32 +++++++++-----------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index a732bc2b9dfc..a1af4f68278f 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -277,26 +277,15 @@ static bool __init xen_check_mwait(void)
 
 static bool __init xen_check_xsave(void)
 {
-	unsigned int err, eax, edx;
+	unsigned int cx, xsave_mask;
 
-	/*
-	 * Xen 4.0 and older accidentally leaked the host XSAVE flag into guest
-	 * view, despite not being able to support guests using the
-	 * functionality. Probe for the actual availability of XSAVE by seeing
-	 * whether xgetbv executes successfully or raises #UD.
-	 */
-	asm volatile("1: .byte 0x0f,0x01,0xd0\n\t" /* xgetbv */
-		     "xor %[err], %[err]\n"
-		     "2:\n\t"
-		     ".pushsection .fixup,\"ax\"\n\t"
-		     "3: movl $1,%[err]\n\t"
-		     "jmp 2b\n\t"
-		     ".popsection\n\t"
-		     _ASM_EXTABLE(1b, 3b)
-		     : [err] "=r" (err), "=a" (eax), "=d" (edx)
-		     : "c" (0));
-
-	return err == 0;
+	cx = cpuid_ecx(1);
+
+	xsave_mask = (1 << (X86_FEATURE_XSAVE % 32)) |
+		     (1 << (X86_FEATURE_OSXSAVE % 32));
+
+	/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
+	return (cx & xsave_mask) == xsave_mask;
 }
 
 static void __init xen_init_capabilities(void)
@@ -317,10 +306,7 @@ static void __init xen_init_capabilities(void)
 	else
 		setup_clear_cpu_cap(X86_FEATURE_MWAIT);
 
-	if (xen_check_xsave()) {
-		setup_force_cpu_cap(X86_FEATURE_XSAVE);
-		setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
-	} else {
+	if (!xen_check_xsave()) {
 		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 		setup_clear_cpu_cap(X86_FEATURE_OSXSAVE);
 	}
-- 
cgit v1.2.3


From d162809f85b4f54ef075517ffa2f3d02e55d5a53 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Wed, 3 May 2017 16:20:51 -0400
Subject: xen/x86: Do not call xen_init_time_ops() until shared_info is
 initialized

Routines that are set by xen_init_time_ops() use shared_info's
pvclock_vcpu_time_info area. This area is not properly available until
shared_info is mapped in xen_setup_shared_info().

This became especially problematic due to commit dd759d93f4dd ("x86/timers:
Add simple udelay calibration") where we end up reading tsc_to_system_mul
from xen_dummy_shared_info (i.e. getting zero value) and then trying
to divide by it in pvclock_tsc_khz().

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten_pv.c | 9 +++++++--
 arch/x86/xen/time.c         | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index a1af4f68278f..b5c54ab0e195 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -974,6 +974,13 @@ void xen_setup_shared_info(void)
 #endif
 
 	xen_setup_mfn_list_list();
+
+	/*
+	 * Now that shared info is set up we can start using routines that
+	 * point to pvclock area.
+	 */
+	if (system_state == SYSTEM_BOOTING)
+		xen_init_time_ops();
 }
 
 /* This is called once we have the cpu_possible_mask */
@@ -1272,8 +1279,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	x86_init.oem.arch_setup = xen_arch_setup;
 	x86_init.oem.banner = xen_banner;
 
-	xen_init_time_ops();
-
 	/*
 	 * Set up some pagetable state before starting to set any ptes.
 	 */
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 090c7eb4dca9..a1895a8e85c1 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -406,7 +406,7 @@ static void __init xen_time_init(void)
 		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 }
 
-void __init xen_init_time_ops(void)
+void __ref xen_init_time_ops(void)
 {
 	pv_time_ops = xen_time_ops;
 
-- 
cgit v1.2.3


From 7bcd74b98d7bac3e5149894caaf72de6989af7f0 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 22 Mar 2017 08:42:21 -0400
Subject: ovl: check if all layers are on the same fs

Some features can only work when all layers are on the same fs.  Test this
condition during mount time, so features can check them later.

Add helper ovl_same_sb() to return the common super block in case all
layers are on the same fs.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h | 1 +
 fs/overlayfs/ovl_entry.h | 2 ++
 fs/overlayfs/super.c     | 8 ++++++++
 fs/overlayfs/util.c      | 7 +++++++
 4 files changed, 18 insertions(+)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 741dc0b6931f..c851158324e2 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -151,6 +151,7 @@ int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
 struct dentry *ovl_workdir(struct dentry *dentry);
 const struct cred *ovl_override_creds(struct super_block *sb);
+struct super_block *ovl_same_sb(struct super_block *sb);
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 bool ovl_dentry_remote(struct dentry *dentry);
 bool ovl_dentry_weird(struct dentry *dentry);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 59614faa14c3..b2023ddb8532 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -29,6 +29,8 @@ struct ovl_fs {
 	const struct cred *creator_cred;
 	bool tmpfile;
 	wait_queue_head_t copyup_wq;
+	/* sb common to all layers */
+	struct super_block *same_sb;
 };
 
 /* private information held for every overlayfs dentry */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 6faefa54cb5e..9828b7de8999 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -914,11 +914,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 		ufs->lower_mnt[ufs->numlower] = mnt;
 		ufs->numlower++;
+
+		/* Check if all lower layers are on same sb */
+		if (i == 0)
+			ufs->same_sb = mnt->mnt_sb;
+		else if (ufs->same_sb != mnt->mnt_sb)
+			ufs->same_sb = NULL;
 	}
 
 	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
 	if (!ufs->upper_mnt)
 		sb->s_flags |= MS_RDONLY;
+	else if (ufs->upper_mnt->mnt_sb != ufs->same_sb)
+		ufs->same_sb = NULL;
 
 	if (remote)
 		sb->s_d_op = &ovl_reval_dentry_operations;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 35b8959db5ee..2bd4c264ccbe 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -41,6 +41,13 @@ const struct cred *ovl_override_creds(struct super_block *sb)
 	return override_creds(ofs->creator_cred);
 }
 
+struct super_block *ovl_same_sb(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->same_sb;
+}
+
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
 {
 	size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
-- 
cgit v1.2.3


From 3a1e819b4e80216e00ef6a4dfe67fa142450c5e1 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 30 Mar 2017 15:22:16 +0300
Subject: ovl: store file handle of lower inode on copy up

Sometimes it is interesting to know if an upper file is pure upper or a
copy up target, and if it is a copy up target, it may be interesting to
find the copy up origin.

This will be used to preserve lower inode numbers across copy up.

Store the lower inode file handle in upper inode extended attribute
overlay.origin on copy up to use it later for these cases.  Store the lower
filesystem uuid along side the file handle, so we can validate that we are
looking for the origin file in the original fs.

If lower fs does not support NFS export ops store a zero sized xattr so we
can always use the overlay.origin xattr to distinguish between a copy up
and a pure upper inode.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c   | 82 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/overlayfs/overlayfs.h | 36 +++++++++++++++++++++
 2 files changed, 118 insertions(+)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 906ea6c93260..9008ab9fbd2e 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -20,6 +20,7 @@
 #include <linux/namei.h>
 #include <linux/fdtable.h>
 #include <linux/ratelimit.h>
+#include <linux/exportfs.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 
@@ -232,6 +233,79 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
+static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_be *uuid)
+{
+	struct ovl_fh *fh;
+	int fh_type, fh_len, dwords;
+	void *buf;
+	int buflen = MAX_HANDLE_SZ;
+
+	buf = kmalloc(buflen, GFP_TEMPORARY);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * We encode a non-connectable file handle for non-dir, because we
+	 * only need to find the lower inode number and we don't want to pay
+	 * the price or reconnecting the dentry.
+	 */
+	dwords = buflen >> 2;
+	fh_type = exportfs_encode_fh(lower, buf, &dwords, 0);
+	buflen = (dwords << 2);
+
+	fh = ERR_PTR(-EIO);
+	if (WARN_ON(fh_type < 0) ||
+	    WARN_ON(buflen > MAX_HANDLE_SZ) ||
+	    WARN_ON(fh_type == FILEID_INVALID))
+		goto out;
+
+	BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
+	fh_len = offsetof(struct ovl_fh, fid) + buflen;
+	fh = kmalloc(fh_len, GFP_KERNEL);
+	if (!fh) {
+		fh = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	fh->version = OVL_FH_VERSION;
+	fh->magic = OVL_FH_MAGIC;
+	fh->type = fh_type;
+	fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+	fh->len = fh_len;
+	fh->uuid = *uuid;
+	memcpy(fh->fid, buf, buflen);
+
+out:
+	kfree(buf);
+	return fh;
+}
+
+static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+			  struct dentry *upper)
+{
+	struct super_block *sb = lower->d_sb;
+	uuid_be *uuid = (uuid_be *) &sb->s_uuid;
+	const struct ovl_fh *fh = NULL;
+	int err;
+
+	/*
+	 * When lower layer doesn't support export operations store a 'null' fh,
+	 * so we can use the overlay.origin xattr to distignuish between a copy
+	 * up and a pure upper inode.
+	 */
+	if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
+	    uuid_be_cmp(*uuid, NULL_UUID_BE)) {
+		fh = ovl_encode_fh(lower, uuid);
+		if (IS_ERR(fh))
+			return PTR_ERR(fh);
+	}
+
+	err = ovl_do_setxattr(upper, OVL_XATTR_ORIGIN, fh, fh ? fh->len : 0, 0);
+	kfree(fh);
+
+	return err;
+}
+
 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			      struct dentry *dentry, struct path *lowerpath,
 			      struct kstat *stat, const char *link,
@@ -316,6 +390,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (err)
 		goto out_cleanup;
 
+	/*
+	 * Store identifier of lower inode in upper inode xattr to
+	 * allow lookup of the copy up origin inode.
+	 */
+	err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+	if (err)
+		goto out_cleanup;
+
 	if (tmpfile)
 		err = ovl_do_link(temp, udir, upper, true);
 	else
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index c851158324e2..77405e27cd64 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/uuid.h>
 
 enum ovl_path_type {
 	__OVL_PATH_UPPER	= (1 << 0),
@@ -20,6 +21,41 @@ enum ovl_path_type {
 #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
 #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
 #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
+#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
+
+/*
+ * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
+ * where:
+ * origin.fh	- exported file handle of the lower file
+ * origin.uuid	- uuid of the lower filesystem
+ */
+#define OVL_FH_VERSION	0
+#define OVL_FH_MAGIC	0xfb
+
+/* CPU byte order required for fid decoding:  */
+#define OVL_FH_FLAG_BIG_ENDIAN	(1 << 0)
+#define OVL_FH_FLAG_ANY_ENDIAN	(1 << 1)
+
+#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN)
+
+#if defined(__LITTLE_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN 0
+#elif defined(__BIG_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN
+#else
+#error Endianness not defined
+#endif
+
+/* On-disk and in-memeory format for redirect by file handle */
+struct ovl_fh {
+	u8 version;	/* 0 */
+	u8 magic;	/* 0xfb */
+	u8 len;		/* size of this header + size of fid */
+	u8 flags;	/* OVL_FH_FLAG_* */
+	u8 type;	/* fid_type of fid */
+	uuid_be uuid;	/* uuid of filesystem */
+	u8 fid[0];	/* file identifier */
+} __packed;
 
 #define OVL_ISUPPER_MASK 1UL
 
-- 
cgit v1.2.3


From c22205d0584bc65cfc9a65db0e15a9b69f5cdf64 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 26 Apr 2017 23:40:52 +0300
Subject: ovl: use an auxiliary var for overlay root entry

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index b8b077821fb0..d0a3e4ad3042 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -220,6 +220,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	const struct cred *old_cred;
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
+	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
 	struct path *stack = NULL;
 	struct dentry *upperdir, *upperdentry = NULL;
 	unsigned int ctr = 0;
@@ -259,7 +260,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			if (!upperredirect)
 				goto out_put_upper;
 			if (d.redirect[0] == '/')
-				poe = dentry->d_sb->s_root->d_fsdata;
+				poe = roe;
 		}
 		upperopaque = d.opaque;
 	}
@@ -290,10 +291,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		if (d.stop)
 			break;
 
-		if (d.redirect &&
-		    d.redirect[0] == '/' &&
-		    poe != dentry->d_sb->s_root->d_fsdata) {
-			poe = dentry->d_sb->s_root->d_fsdata;
+		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
+			poe = roe;
 
 			/* Find the current layer on the root dentry */
 			for (i = 0; i < poe->numlower; i++)
-- 
cgit v1.2.3


From a9d019573e881472aa62f093fa599ad68cd0fc1e Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 30 Apr 2017 14:46:31 +0300
Subject: ovl: lookup non-dir copy-up-origin by file handle

If overlay.origin xattr is found on a non-dir upper inode try to get lower
dentry by calling exportfs_decode_fh().

On failure to lookup by file handle to lower layer, do not lookup the copy
up origin by name, because the lower found by name could be another file in
case the upper file was renamed.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index d0a3e4ad3042..bad0f665a635 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -12,6 +12,8 @@
 #include <linux/namei.h>
 #include <linux/xattr.h>
 #include <linux/ratelimit.h>
+#include <linux/mount.h>
+#include <linux/exportfs.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 
@@ -81,6 +83,90 @@ invalid:
 	goto err_free;
 }
 
+static int ovl_acceptable(void *ctx, struct dentry *dentry)
+{
+	return 1;
+}
+
+static struct dentry *ovl_get_origin(struct dentry *dentry,
+				     struct vfsmount *mnt)
+{
+	int res;
+	struct ovl_fh *fh = NULL;
+	struct dentry *origin = NULL;
+	int bytes;
+
+	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+	if (res < 0) {
+		if (res == -ENODATA || res == -EOPNOTSUPP)
+			return NULL;
+		goto fail;
+	}
+	/* Zero size value means "copied up but origin unknown" */
+	if (res == 0)
+		return NULL;
+
+	fh  = kzalloc(res, GFP_TEMPORARY);
+	if (!fh)
+		return ERR_PTR(-ENOMEM);
+
+	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res);
+	if (res < 0)
+		goto fail;
+
+	if (res < sizeof(struct ovl_fh) || res < fh->len)
+		goto invalid;
+
+	if (fh->magic != OVL_FH_MAGIC)
+		goto invalid;
+
+	/* Treat larger version and unknown flags as "origin unknown" */
+	if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+		goto out;
+
+	/* Treat endianness mismatch as "origin unknown" */
+	if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+		goto out;
+
+	bytes = (fh->len - offsetof(struct ovl_fh, fid));
+
+	/*
+	 * Make sure that the stored uuid matches the uuid of the lower
+	 * layer where file handle will be decoded.
+	 */
+	if (uuid_be_cmp(fh->uuid, *(uuid_be *) &mnt->mnt_sb->s_uuid))
+		goto out;
+
+	origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
+				    bytes >> 2, (int)fh->type,
+				    ovl_acceptable, NULL);
+	if (IS_ERR(origin)) {
+		/* Treat stale file handle as "origin unknown" */
+		if (origin == ERR_PTR(-ESTALE))
+			origin = NULL;
+		goto out;
+	}
+
+	if (ovl_dentry_weird(origin) ||
+	    ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) {
+		dput(origin);
+		origin = NULL;
+		goto invalid;
+	}
+
+out:
+	kfree(fh);
+	return origin;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+	goto out;
+invalid:
+	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+	goto out;
+}
+
 static bool ovl_is_opaquedir(struct dentry *dentry)
 {
 	int res;
@@ -192,6 +278,45 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 	return 0;
 }
 
+
+static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
+			    struct path **stackp, unsigned int *ctrp)
+{
+	struct super_block *same_sb = ovl_same_sb(dentry->d_sb);
+	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
+	struct vfsmount *mnt;
+	struct dentry *origin;
+
+	if (!same_sb || !roe->numlower)
+		return 0;
+
+       /*
+	* Since all layers are on the same fs, we use the first layer for
+	* decoding the file handle.  We may get a disconnected dentry,
+	* which is fine, because we only need to hold the origin inode in
+	* cache and use its inode number.  We may even get a connected dentry,
+	* that is not under the first layer's root.  That is also fine for
+	* using it's inode number - it's the same as if we held a reference
+	* to a dentry in first layer that was moved under us.
+	*/
+	mnt = roe->lowerstack[0].mnt;
+
+	origin = ovl_get_origin(upperdentry, mnt);
+	if (IS_ERR_OR_NULL(origin))
+		return PTR_ERR(origin);
+
+	BUG_ON(*stackp || *ctrp);
+	*stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
+	if (!*stackp) {
+		dput(origin);
+		return -ENOMEM;
+	}
+	**stackp = (struct path) { .dentry = origin, .mnt = mnt };
+	*ctrp = 1;
+
+	return 0;
+}
+
 /*
  * Returns next layer in stack starting from top.
  * Returns -1 if this is the last layer.
@@ -254,6 +379,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			err = -EREMOTE;
 			goto out;
 		}
+		if (upperdentry && !d.is_dir) {
+			BUG_ON(!d.stop || d.redirect);
+			err = ovl_check_origin(dentry, upperdentry,
+					       &stack, &ctr);
+			if (err)
+				goto out;
+		}
 
 		if (d.redirect) {
 			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
-- 
cgit v1.2.3


From 595485033db2c24178257698254fd4182fdb4123 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 23 Apr 2017 23:12:34 +0300
Subject: ovl: set the ORIGIN type flag

For directory entries, non zero oe->numlower implies OVL_TYPE_MERGE.
Define a new type flag OVL_TYPE_ORIGIN to indicate that an entry holds a
reference to its lower copy up origin.

For directory entries ORIGIN := MERGE && UPPER. For non-dir entries ORIGIN
means that a lower type dentry has been recently copied up or that we were
able to find the copy up origin from overlay.origin xattr.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h |  2 ++
 fs/overlayfs/util.c      | 10 ++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 77405e27cd64..98af19828695 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -13,10 +13,12 @@
 enum ovl_path_type {
 	__OVL_PATH_UPPER	= (1 << 0),
 	__OVL_PATH_MERGE	= (1 << 1),
+	__OVL_PATH_ORIGIN	= (1 << 2),
 };
 
 #define OVL_TYPE_UPPER(type)	((type) & __OVL_PATH_UPPER)
 #define OVL_TYPE_MERGE(type)	((type) & __OVL_PATH_MERGE)
+#define OVL_TYPE_ORIGIN(type)	((type) & __OVL_PATH_ORIGIN)
 
 #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
 #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 2bd4c264ccbe..1c1b6ea7bf7c 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -83,11 +83,13 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
 		type = __OVL_PATH_UPPER;
 
 		/*
-		 * Non-dir dentry can hold lower dentry from previous
-		 * location.
+		 * Non-dir dentry can hold lower dentry of its copy up origin.
 		 */
-		if (oe->numlower && d_is_dir(dentry))
-			type |= __OVL_PATH_MERGE;
+		if (oe->numlower) {
+			type |= __OVL_PATH_ORIGIN;
+			if (d_is_dir(dentry))
+				type |= __OVL_PATH_MERGE;
+		}
 	} else {
 		if (oe->numlower > 1)
 			type |= __OVL_PATH_MERGE;
-- 
cgit v1.2.3


From b7a807dc2010334e62e0afd89d6f7a8913eb14ff Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 24 Apr 2017 09:37:28 +0300
Subject: ovl: persistent inode number for directories

stat(2) on overlay directories reports the overlay temp inode
number, which is constant across copy up, but is not persistent.

When all layers are on the same fs, report the copy up origin inode
number for directories.

This inode number is persistent, unique across the overlay mount and
constant across copy up.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c | 37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index bfabc65fdc74..0c5e79966957 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -150,12 +150,39 @@ static int ovl_dir_getattr(const struct path *path, struct kstat *stat,
 	type = ovl_path_real(dentry, &realpath);
 	old_cred = ovl_override_creds(dentry->d_sb);
 	err = vfs_getattr(&realpath, stat, request_mask, flags);
-	revert_creds(old_cred);
 	if (err)
-		return err;
+		goto out;
+
+	/*
+	 * When all layers are on the same fs, use the copy-up-origin st_ino,
+	 * which is persistent, unique and constant across copy up.
+	 *
+	 * Otherwise the pair {real st_ino; overlay st_dev} is not unique, so
+	 * use the non persistent overlay st_ino.
+	 */
+	if (ovl_same_sb(dentry->d_sb)) {
+		if (OVL_TYPE_ORIGIN(type)) {
+			struct kstat lowerstat;
+
+			ovl_path_lower(dentry, &realpath);
+			err = vfs_getattr(&realpath, &lowerstat,
+					  STATX_INO, flags);
+			if (err)
+				goto out;
+
+			WARN_ON_ONCE(stat->dev != lowerstat.dev);
+			stat->ino = lowerstat.ino;
+		}
+	} else {
+		stat->ino = dentry->d_inode->i_ino;
+	}
 
+	/*
+	 * Always use the overlay st_dev for directories, so 'find -xdev' will
+	 * scan the entire overlay mount and won't cross the overlay mount
+	 * boundaries.
+	 */
 	stat->dev = dentry->d_sb->s_dev;
-	stat->ino = dentry->d_inode->i_ino;
 
 	/*
 	 * It's probably not worth it to count subdirs to get the
@@ -164,8 +191,10 @@ static int ovl_dir_getattr(const struct path *path, struct kstat *stat,
 	 */
 	if (OVL_TYPE_MERGE(type))
 		stat->nlink = 1;
+out:
+	revert_creds(old_cred);
 
-	return 0;
+	return err;
 }
 
 /* Common operations required to be done after creation of file on upper */
-- 
cgit v1.2.3


From 72b608f08528458334218a809d66ea94d924c378 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 24 Apr 2017 00:25:49 +0300
Subject: ovl: constant st_ino/st_dev across copy up

When all layers are on the same underlying filesystem, let stat(2) return
st_dev/st_ino values of the copy up origin inode if it is known.

This results in constant st_ino/st_dev representation of files in an
overlay mount before and after copy up.

When the underlying filesystem support NFS exportfs, the result is also
persistent st_ino/st_dev representation before and after mount cycle.

Lower hardlinks are broken on copy up to different upper files, so we
cannot use the lower origin st_ino for those different files, even for the
same fs case.

When all overlay layers are on the same fs, use overlay st_dev for non-dirs
to get the correct result from du -x.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 17b8418358ed..3dc693a78de2 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -61,14 +61,51 @@ static int ovl_getattr(const struct path *path, struct kstat *stat,
 		       u32 request_mask, unsigned int flags)
 {
 	struct dentry *dentry = path->dentry;
+	enum ovl_path_type type;
 	struct path realpath;
 	const struct cred *old_cred;
 	int err;
 
-	ovl_path_real(dentry, &realpath);
+	type = ovl_path_real(dentry, &realpath);
 	old_cred = ovl_override_creds(dentry->d_sb);
 	err = vfs_getattr(&realpath, stat, request_mask, flags);
+	if (err)
+		goto out;
+
+	/*
+	 * When all layers are on the same fs, all real inode number are
+	 * unique, so we use the overlay st_dev, which is friendly to du -x.
+	 *
+	 * We also use st_ino of the copy up origin, if we know it.
+	 * This guaranties constant st_dev/st_ino across copy up.
+	 *
+	 * If filesystem supports NFS export ops, this also guaranties
+	 * persistent st_ino across mount cycle.
+	 */
+	if (ovl_same_sb(dentry->d_sb)) {
+		if (OVL_TYPE_ORIGIN(type)) {
+			struct kstat lowerstat;
+
+			ovl_path_lower(dentry, &realpath);
+			err = vfs_getattr(&realpath, &lowerstat,
+					  STATX_INO | STATX_NLINK, flags);
+			if (err)
+				goto out;
+
+			WARN_ON_ONCE(stat->dev != lowerstat.dev);
+			/*
+			 * Lower hardlinks are broken on copy up to different
+			 * upper files, so we cannot use the lower origin st_ino
+			 * for those different files, even for the same fs case.
+			 */
+			if (lowerstat.nlink == 1)
+				stat->ino = lowerstat.ino;
+		}
+		stat->dev = dentry->d_sb->s_dev;
+	}
+out:
 	revert_creds(old_cred);
+
 	return err;
 }
 
-- 
cgit v1.2.3


From 5b712091a3a3904b0ae8311e18e6b540a070d464 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 5 May 2017 11:38:58 +0200
Subject: ovl: merge getattr for dir and nondir

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c       | 61 +-----------------------------------------------
 fs/overlayfs/inode.c     | 31 ++++++++++++++++++++----
 fs/overlayfs/overlayfs.h |  2 ++
 3 files changed, 30 insertions(+), 64 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 0c5e79966957..f4cf2928cf8e 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -138,65 +138,6 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
 	return err;
 }
 
-static int ovl_dir_getattr(const struct path *path, struct kstat *stat,
-			   u32 request_mask, unsigned int flags)
-{
-	struct dentry *dentry = path->dentry;
-	int err;
-	enum ovl_path_type type;
-	struct path realpath;
-	const struct cred *old_cred;
-
-	type = ovl_path_real(dentry, &realpath);
-	old_cred = ovl_override_creds(dentry->d_sb);
-	err = vfs_getattr(&realpath, stat, request_mask, flags);
-	if (err)
-		goto out;
-
-	/*
-	 * When all layers are on the same fs, use the copy-up-origin st_ino,
-	 * which is persistent, unique and constant across copy up.
-	 *
-	 * Otherwise the pair {real st_ino; overlay st_dev} is not unique, so
-	 * use the non persistent overlay st_ino.
-	 */
-	if (ovl_same_sb(dentry->d_sb)) {
-		if (OVL_TYPE_ORIGIN(type)) {
-			struct kstat lowerstat;
-
-			ovl_path_lower(dentry, &realpath);
-			err = vfs_getattr(&realpath, &lowerstat,
-					  STATX_INO, flags);
-			if (err)
-				goto out;
-
-			WARN_ON_ONCE(stat->dev != lowerstat.dev);
-			stat->ino = lowerstat.ino;
-		}
-	} else {
-		stat->ino = dentry->d_inode->i_ino;
-	}
-
-	/*
-	 * Always use the overlay st_dev for directories, so 'find -xdev' will
-	 * scan the entire overlay mount and won't cross the overlay mount
-	 * boundaries.
-	 */
-	stat->dev = dentry->d_sb->s_dev;
-
-	/*
-	 * It's probably not worth it to count subdirs to get the
-	 * correct link count.  nlink=1 seems to pacify 'find' and
-	 * other utilities.
-	 */
-	if (OVL_TYPE_MERGE(type))
-		stat->nlink = 1;
-out:
-	revert_creds(old_cred);
-
-	return err;
-}
-
 /* Common operations required to be done after creation of file on upper */
 static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
 			    struct dentry *newdentry, bool hardlink)
@@ -1099,7 +1040,7 @@ const struct inode_operations ovl_dir_inode_operations = {
 	.create		= ovl_create,
 	.mknod		= ovl_mknod,
 	.permission	= ovl_permission,
-	.getattr	= ovl_dir_getattr,
+	.getattr	= ovl_getattr,
 	.listxattr	= ovl_listxattr,
 	.get_acl	= ovl_get_acl,
 	.update_time	= ovl_update_time,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 3dc693a78de2..ad9547f82da5 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -57,13 +57,14 @@ out:
 	return err;
 }
 
-static int ovl_getattr(const struct path *path, struct kstat *stat,
-		       u32 request_mask, unsigned int flags)
+int ovl_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int flags)
 {
 	struct dentry *dentry = path->dentry;
 	enum ovl_path_type type;
 	struct path realpath;
 	const struct cred *old_cred;
+	bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
 	int err;
 
 	type = ovl_path_real(dentry, &realpath);
@@ -85,10 +86,11 @@ static int ovl_getattr(const struct path *path, struct kstat *stat,
 	if (ovl_same_sb(dentry->d_sb)) {
 		if (OVL_TYPE_ORIGIN(type)) {
 			struct kstat lowerstat;
+			u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
 
 			ovl_path_lower(dentry, &realpath);
 			err = vfs_getattr(&realpath, &lowerstat,
-					  STATX_INO | STATX_NLINK, flags);
+					  lowermask, flags);
 			if (err)
 				goto out;
 
@@ -98,11 +100,32 @@ static int ovl_getattr(const struct path *path, struct kstat *stat,
 			 * upper files, so we cannot use the lower origin st_ino
 			 * for those different files, even for the same fs case.
 			 */
-			if (lowerstat.nlink == 1)
+			if (is_dir || lowerstat.nlink == 1)
 				stat->ino = lowerstat.ino;
 		}
 		stat->dev = dentry->d_sb->s_dev;
+	} else if (is_dir) {
+		/*
+		 * If not all layers are on the same fs the pair {real st_ino;
+		 * overlay st_dev} is not unique, so use the non persistent
+		 * overlay st_ino.
+		 *
+		 * Always use the overlay st_dev for directories, so 'find
+		 * -xdev' will scan the entire overlay mount and won't cross the
+		 * overlay mount boundaries.
+		 */
+		stat->dev = dentry->d_sb->s_dev;
+		stat->ino = dentry->d_inode->i_ino;
 	}
+
+	/*
+	 * It's probably not worth it to count subdirs to get the
+	 * correct link count.  nlink=1 seems to pacify 'find' and
+	 * other utilities.
+	 */
+	if (is_dir && OVL_TYPE_MERGE(type))
+		stat->nlink = 1;
+
 out:
 	revert_creds(old_cred);
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 98af19828695..caa36cb9c46d 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -236,6 +236,8 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
 
 /* inode.c */
 int ovl_setattr(struct dentry *dentry, struct iattr *attr);
+int ovl_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int flags);
 int ovl_permission(struct inode *inode, int mask);
 int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
 		  size_t size, int flags);
-- 
cgit v1.2.3


From 5b6c9053fb38a66fd5c6177fcf5022b24767811a Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 24 Apr 2017 19:54:13 +0300
Subject: ovl: persistent inode numbers for upper hardlinks

An upper type non directory dentry that is a copy up target
should have a reference to its lower copy up origin.

There are three ways for an upper type dentry to be instantiated:
1. A lower type dentry that is being copied up
2. An entry that is found in upper dir by ovl_lookup()
3. A negative dentry is hardlinked to an upper type dentry

In the first case, the lower reference is set before copy up.
In the second case, the lower reference is found by ovl_lookup().
In the last case of hardlinked upper dentry, it is not easy to
update the lower reference of the negative dentry.  Instead,
drop the newly hardlinked negative dentry from dcache and let
the next access call ovl_lookup() to find its lower reference.

This makes sure that the inode number reported by stat(2) after
the hardlink is created is the same inode number that will be
reported by stat(2) after mount cycle, which is the inode number
of the lower copy up origin of the hardlink source.

NOTE that this does not fix breaking of lower hardlinks on copy
up, but only fixes the case of lower nlink == 1, whose upper copy
up inode is hardlinked in upper dir.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index f4cf2928cf8e..723b98b90698 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -152,6 +152,9 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
 		inc_nlink(inode);
 	}
 	d_instantiate(dentry, inode);
+	/* Force lookup of new upper hardlink to find its lower */
+	if (hardlink)
+		d_drop(dentry);
 }
 
 static bool ovl_type_merge(struct dentry *dentry)
-- 
cgit v1.2.3


From 65f2673832d4647f6d46783d8d745d2b15d090c4 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 25 Apr 2017 17:28:31 +0300
Subject: ovl: update documentation w.r.t. constant inode numbers

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/overlayfs.txt | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index 634d03e20c2d..c9e884b52698 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -21,12 +21,19 @@ from accessing the corresponding object from the original filesystem.
 This is most obvious from the 'st_dev' field returned by stat(2).
 
 While directories will report an st_dev from the overlay-filesystem,
-all non-directory objects will report an st_dev from the lower or
+non-directory objects may report an st_dev from the lower filesystem or
 upper filesystem that is providing the object.  Similarly st_ino will
 only be unique when combined with st_dev, and both of these can change
 over the lifetime of a non-directory object.  Many applications and
 tools ignore these values and will not be affected.
 
+In the special case of all overlay layers on the same underlying
+filesystem, all objects will report an st_dev from the overlay
+filesystem and st_ino from the underlying filesystem.  This will
+make the overlay mount more compliant with filesystem scanners and
+overlay objects will be distinguishable from the corresponding
+objects in the original filesystem.
+
 Upper and Lower
 ---------------
 
-- 
cgit v1.2.3


From 9051247dcf9ecee156d2ddac39a4f1ca591f8428 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 5 May 2017 16:36:53 +0200
Subject: bridge: netlink: account for IFLA_BRPORT_{B, M}CAST_FLOOD size and
 policy

The attribute sizes for IFLA_BRPORT_MCAST_FLOOD and
IFLA_BRPORT_BCAST_FLOOD weren't accounted for in br_port_info_size()
when they were added. Do so now and also add the corresponding policy
entries:

Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Cc: Mike Manning <mmanning@brocade.com>
Fixes: b6cb5ac8331b ("net: bridge: add per-port multicast flood flag")
Fixes: 99f906e9ad7b ("bridge: add per-port broadcast flood flag")
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a572db710d4e..c5ce7745b230 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -133,6 +133,8 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(1)	/* IFLA_BRPORT_MCAST_TO_UCAST */
 		+ nla_total_size(1)	/* IFLA_BRPORT_LEARNING */
 		+ nla_total_size(1)	/* IFLA_BRPORT_UNICAST_FLOOD */
+		+ nla_total_size(1)	/* IFLA_BRPORT_MCAST_FLOOD */
+		+ nla_total_size(1)	/* IFLA_BRPORT_BCAST_FLOOD */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROXYARP */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROXYARP_WIFI */
 		+ nla_total_size(1)	/* IFLA_BRPORT_VLAN_TUNNEL */
@@ -633,6 +635,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 },
 	[IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 },
 	[IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
+	[IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
+	[IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
 };
 
 /* Change the state of the port and notify spanning tree */
-- 
cgit v1.2.3


From 34bf129a7f068e3108dbb051b4b05674e2a270e7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 5 May 2017 17:25:12 +0200
Subject: fbdev: sti: don't select CONFIG_VT

While working on another build error, I ran into several variations of
this dependency loop:

subsection "Kconfig recursive dependency limitations"
drivers/input/Kconfig:8:	symbol INPUT is selected by VT
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/tty/Kconfig:12:	symbol VT is selected by FB_STI
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/video/fbdev/Kconfig:677:	symbol FB_STI depends on FB
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/video/fbdev/Kconfig:5:	symbol FB is selected by DRM_KMS_FB_HELPER
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/gpu/drm/Kconfig:72:	symbol DRM_KMS_FB_HELPER is selected by DRM_KMS_CMA_HELPER
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/gpu/drm/Kconfig:137:	symbol DRM_KMS_CMA_HELPER is selected by DRM_HDLCD
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/gpu/drm/arm/Kconfig:6:	symbol DRM_HDLCD depends on OF
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/of/Kconfig:4:	symbol OF is selected by X86_INTEL_CE
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
arch/x86/Kconfig:523:	symbol X86_INTEL_CE depends on X86_IO_APIC
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
arch/x86/Kconfig:1011:	symbol X86_IO_APIC depends on X86_LOCAL_APIC
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
arch/x86/Kconfig:1005:	symbol X86_LOCAL_APIC depends on X86_UP_APIC
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
arch/x86/Kconfig:980:	symbol X86_UP_APIC depends on PCI_MSI
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/pci/Kconfig:11:	symbol PCI_MSI is selected by AMD_IOMMU
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/iommu/Kconfig:106:	symbol AMD_IOMMU depends on IOMMU_SUPPORT
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/iommu/Kconfig:5:	symbol IOMMU_SUPPORT is selected by DRM_ETNAVIV
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/gpu/drm/etnaviv/Kconfig:2:	symbol DRM_ETNAVIV depends on THERMAL
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/thermal/Kconfig:5:	symbol THERMAL is selected by ACPI_VIDEO
For a resolution refer to Documentation/kbuild/kconfig-language.txt
subsection "Kconfig recursive dependency limitations"
drivers/acpi/Kconfig:183:	symbol ACPI_VIDEO is selected by INPUT

This doesn't currently show up as I fixed the 'THERMAL' part of it,
but I noticed that the FB_STI dependency should not be there but
was introduced by slightly incorrect bug-fix patch that tried to
fix a link error.

Instead of selecting 'VT' to make us enter the drivers/video/console
directory at compile-time, it's sufficient to build the
drivers/video/console/sticore.c file by adding its directory
to when CONFIG_FB_STI is enabled. Alternatively, we could move the
sticore code to another directory that is always built when we
have at STI_CONSOLE or FB_STI enabled.

Fixes: 17085a934592 ("parisc: stifb: should depend on STI_CONSOLE")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/Makefile      | 1 +
 drivers/video/fbdev/Kconfig | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 9ad3c17d6456..445b2c230b56 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_VGASTATE)            += vgastate.o
 obj-$(CONFIG_HDMI)                += hdmi.o
 
 obj-$(CONFIG_VT)		  += console/
+obj-$(CONFIG_FB_STI)		  += console/
 obj-$(CONFIG_LOGO)		  += logo/
 obj-y				  += backlight/
 
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 922e4eaed9c5..5c6696bb56da 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -689,8 +689,6 @@ config FB_STI
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
-	select STI_CONSOLE
-	select VT
 	default y
 	---help---
 	  STI refers to the HP "Standard Text Interface" which is a set of
-- 
cgit v1.2.3


From 84b114b98452c431299d99c135f751659e517acb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 5 May 2017 06:56:54 -0700
Subject: tcp: randomize timestamps on syncookies

Whole point of randomization was to hide server uptime, but an attacker
can simply start a syn flood and TCP generates 'old style' timestamps,
directly revealing server jiffies value.

Also, TSval sent by the server to a particular remote address vary
depending on syncookies being sent or not, potentially triggering PAWS
drops for innocent clients.

Lets implement proper randomization, including for SYNcookies.

Also we do not need to export sysctl_tcp_timestamps, since it is not
used from a module.

In v2, I added Florian feedback and contribution, adding tsoff to
tcp_get_cookie_sock().

v3 removed one unused variable in tcp_v4_connect() as Florian spotted.

Fixes: 95a22caee396c ("tcp: randomize tcp timestamp offsets for each connection")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Tested-by: Florian Westphal <fw@strlen.de>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/secure_seq.h | 10 ++++++----
 include/net/tcp.h        |  5 +++--
 net/core/secure_seq.c    | 31 +++++++++++++++++++------------
 net/ipv4/syncookies.c    | 12 ++++++++++--
 net/ipv4/tcp_input.c     |  8 +++-----
 net/ipv4/tcp_ipv4.c      | 32 +++++++++++++++++++-------------
 net/ipv6/syncookies.c    | 10 +++++++++-
 net/ipv6/tcp_ipv6.c      | 32 +++++++++++++++++++-------------
 8 files changed, 88 insertions(+), 52 deletions(-)

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index fe236b3429f0..b94006f6fbdd 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -6,10 +6,12 @@
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport);
-u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
-			     __be16 sport, __be16 dport, u32 *tsoff);
-u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
-			       __be16 sport, __be16 dport, u32 *tsoff);
+u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+		   __be16 sport, __be16 dport);
+u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr);
+u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+		     __be16 sport, __be16 dport);
+u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr);
 u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 				__be16 sport, __be16 dport);
 u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 270e5cc43c99..8c0e5a901d64 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -470,7 +470,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
 /* From syncookies.c */
 struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 				 struct request_sock *req,
-				 struct dst_entry *dst);
+				 struct dst_entry *dst, u32 tsoff);
 int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
 		      u32 cookie);
 struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
@@ -1822,7 +1822,8 @@ struct tcp_request_sock_ops {
 #endif
 	struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
 				       const struct request_sock *req);
-	__u32 (*init_seq_tsoff)(const struct sk_buff *skb, u32 *tsoff);
+	u32 (*init_seq)(const struct sk_buff *skb);
+	u32 (*init_ts_off)(const struct sk_buff *skb);
 	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
 			   struct flowi *fl, struct request_sock *req,
 			   struct tcp_fastopen_cookie *foc,
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6bd2f8fb0476..ae35cce3a40d 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -24,9 +24,13 @@ static siphash_key_t ts_secret __read_mostly;
 
 static __always_inline void net_secret_init(void)
 {
-	net_get_random_once(&ts_secret, sizeof(ts_secret));
 	net_get_random_once(&net_secret, sizeof(net_secret));
 }
+
+static __always_inline void ts_secret_init(void)
+{
+	net_get_random_once(&ts_secret, sizeof(ts_secret));
+}
 #endif
 
 #ifdef CONFIG_INET
@@ -47,7 +51,7 @@ static u32 seq_scale(u32 seq)
 #endif
 
 #if IS_ENABLED(CONFIG_IPV6)
-static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
+u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
 {
 	const struct {
 		struct in6_addr saddr;
@@ -60,12 +64,14 @@ static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
 	if (sysctl_tcp_timestamps != 1)
 		return 0;
 
+	ts_secret_init();
 	return siphash(&combined, offsetofend(typeof(combined), daddr),
 		       &ts_secret);
 }
+EXPORT_SYMBOL(secure_tcpv6_ts_off);
 
-u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
-			       __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+		     __be16 sport, __be16 dport)
 {
 	const struct {
 		struct in6_addr saddr;
@@ -78,14 +84,14 @@ u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
 		.sport = sport,
 		.dport = dport
 	};
-	u64 hash;
+	u32 hash;
+
 	net_secret_init();
 	hash = siphash(&combined, offsetofend(typeof(combined), dport),
 		       &net_secret);
-	*tsoff = secure_tcpv6_ts_off(saddr, daddr);
 	return seq_scale(hash);
 }
-EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff);
+EXPORT_SYMBOL(secure_tcpv6_seq);
 
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport)
@@ -107,11 +113,12 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #endif
 
 #ifdef CONFIG_INET
-static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
+u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
 {
 	if (sysctl_tcp_timestamps != 1)
 		return 0;
 
+	ts_secret_init();
 	return siphash_2u32((__force u32)saddr, (__force u32)daddr,
 			    &ts_secret);
 }
@@ -121,15 +128,15 @@ static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
  * it would be easy enough to have the former function use siphash_4u32, passing
  * the arguments as separate u32.
  */
-u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
-			     __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+		   __be16 sport, __be16 dport)
 {
-	u64 hash;
+	u32 hash;
+
 	net_secret_init();
 	hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
 			    (__force u32)sport << 16 | (__force u32)dport,
 			    &net_secret);
-	*tsoff = secure_tcp_ts_off(saddr, daddr);
 	return seq_scale(hash);
 }
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 496b97e17aaf..0257d965f111 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -16,6 +16,7 @@
 #include <linux/siphash.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <net/secure_seq.h>
 #include <net/tcp.h>
 #include <net/route.h>
 
@@ -203,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check);
 
 struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 				 struct request_sock *req,
-				 struct dst_entry *dst)
+				 struct dst_entry *dst, u32 tsoff)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sock *child;
@@ -213,6 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 						 NULL, &own_req);
 	if (child) {
 		atomic_set(&req->rsk_refcnt, 1);
+		tcp_sk(child)->tsoffset = tsoff;
 		sock_rps_save_rxhash(child, skb);
 		inet_csk_reqsk_queue_add(sk, req, child);
 	} else {
@@ -292,6 +294,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	struct rtable *rt;
 	__u8 rcv_wscale;
 	struct flowi4 fl4;
+	u32 tsoff = 0;
 
 	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -311,6 +314,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, 0, NULL);
 
+	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
+		tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
+		tcp_opt.rcv_tsecr -= tsoff;
+	}
+
 	if (!cookie_timestamp_decode(&tcp_opt))
 		goto out;
 
@@ -381,7 +389,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	ireq->rcv_wscale  = rcv_wscale;
 	ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
 
-	ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
+	ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff);
 	/* ip_queue_xmit() depends on our flow being setup
 	 * Normal sockets get it right from inet_csk_route_child_sock()
 	 */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9739962bfb3f..5a3ad09e2786 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -85,7 +85,6 @@ int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-EXPORT_SYMBOL(sysctl_tcp_timestamps);
 
 /* rfc5961 challenge ack rate limiting */
 int sysctl_tcp_challenge_ack_limit = 1000;
@@ -6347,8 +6346,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
-	if (isn && tmp_opt.tstamp_ok)
-		af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
+	if (tmp_opt.tstamp_ok)
+		tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb);
 
 	if (!want_cookie && !isn) {
 		/* Kill the following clause, if you dislike this way. */
@@ -6368,7 +6367,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 			goto drop_and_release;
 		}
 
-		isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
+		isn = af_ops->init_seq(skb);
 	}
 	if (!dst) {
 		dst = af_ops->route_req(sk, &fl, req);
@@ -6380,7 +6379,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
 	if (want_cookie) {
 		isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
-		tcp_rsk(req)->ts_off = 0;
 		req->cookie_ts = tmp_opt.tstamp_ok;
 		if (!tmp_opt.tstamp_ok)
 			inet_rsk(req)->ecn_ok = 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cbbafe546c0f..3a51582bef55 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -94,12 +94,18 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
 
-static u32 tcp_v4_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v4_init_seq(const struct sk_buff *skb)
 {
-	return secure_tcp_seq_and_tsoff(ip_hdr(skb)->daddr,
-					ip_hdr(skb)->saddr,
-					tcp_hdr(skb)->dest,
-					tcp_hdr(skb)->source, tsoff);
+	return secure_tcp_seq(ip_hdr(skb)->daddr,
+			      ip_hdr(skb)->saddr,
+			      tcp_hdr(skb)->dest,
+			      tcp_hdr(skb)->source);
+}
+
+static u32 tcp_v4_init_ts_off(const struct sk_buff *skb)
+{
+	return secure_tcp_ts_off(ip_hdr(skb)->daddr,
+				 ip_hdr(skb)->saddr);
 }
 
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -145,7 +151,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct flowi4 *fl4;
 	struct rtable *rt;
 	int err;
-	u32 seq;
 	struct ip_options_rcu *inet_opt;
 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 
@@ -232,13 +237,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	rt = NULL;
 
 	if (likely(!tp->repair)) {
-		seq = secure_tcp_seq_and_tsoff(inet->inet_saddr,
-					       inet->inet_daddr,
-					       inet->inet_sport,
-					       usin->sin_port,
-					       &tp->tsoffset);
 		if (!tp->write_seq)
-			tp->write_seq = seq;
+			tp->write_seq = secure_tcp_seq(inet->inet_saddr,
+						       inet->inet_daddr,
+						       inet->inet_sport,
+						       usin->sin_port);
+		tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr,
+						 inet->inet_daddr);
 	}
 
 	inet->inet_id = tp->write_seq ^ jiffies;
@@ -1239,7 +1244,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 	.cookie_init_seq =	cookie_v4_init_sequence,
 #endif
 	.route_req	=	tcp_v4_route_req,
-	.init_seq_tsoff	=	tcp_v4_init_seq_and_tsoff,
+	.init_seq	=	tcp_v4_init_seq,
+	.init_ts_off	=	tcp_v4_init_ts_off,
 	.send_synack	=	tcp_v4_send_synack,
 };
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 895ff650db43..5abc3692b901 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -18,6 +18,7 @@
 #include <linux/random.h>
 #include <linux/siphash.h>
 #include <linux/kernel.h>
+#include <net/secure_seq.h>
 #include <net/ipv6.h>
 #include <net/tcp.h>
 
@@ -143,6 +144,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	int mss;
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
+	u32 tsoff = 0;
 
 	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -162,6 +164,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, 0, NULL);
 
+	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
+		tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32,
+					    ipv6_hdr(skb)->saddr.s6_addr32);
+		tcp_opt.rcv_tsecr -= tsoff;
+	}
+
 	if (!cookie_timestamp_decode(&tcp_opt))
 		goto out;
 
@@ -242,7 +250,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq->rcv_wscale = rcv_wscale;
 	ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
 
-	ret = tcp_get_cookie_sock(sk, skb, req, dst);
+	ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff);
 out:
 	return ret;
 out_free:
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8e42e8f54b70..aeb9497b5bb7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -101,12 +101,18 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 	}
 }
 
-static u32 tcp_v6_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v6_init_seq(const struct sk_buff *skb)
 {
-	return secure_tcpv6_seq_and_tsoff(ipv6_hdr(skb)->daddr.s6_addr32,
-					  ipv6_hdr(skb)->saddr.s6_addr32,
-					  tcp_hdr(skb)->dest,
-					  tcp_hdr(skb)->source, tsoff);
+	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
+				ipv6_hdr(skb)->saddr.s6_addr32,
+				tcp_hdr(skb)->dest,
+				tcp_hdr(skb)->source);
+}
+
+static u32 tcp_v6_init_ts_off(const struct sk_buff *skb)
+{
+	return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32,
+				   ipv6_hdr(skb)->saddr.s6_addr32);
 }
 
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -122,7 +128,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_type;
-	u32 seq;
 	int err;
 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 
@@ -282,13 +287,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	sk_set_txhash(sk);
 
 	if (likely(!tp->repair)) {
-		seq = secure_tcpv6_seq_and_tsoff(np->saddr.s6_addr32,
-						 sk->sk_v6_daddr.s6_addr32,
-						 inet->inet_sport,
-						 inet->inet_dport,
-						 &tp->tsoffset);
 		if (!tp->write_seq)
-			tp->write_seq = seq;
+			tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
+							 sk->sk_v6_daddr.s6_addr32,
+							 inet->inet_sport,
+							 inet->inet_dport);
+		tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32,
+						   sk->sk_v6_daddr.s6_addr32);
 	}
 
 	if (tcp_fastopen_defer_connect(sk, &err))
@@ -749,7 +754,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 	.cookie_init_seq =	cookie_v6_init_sequence,
 #endif
 	.route_req	=	tcp_v6_route_req,
-	.init_seq_tsoff	=	tcp_v6_init_seq_and_tsoff,
+	.init_seq	=	tcp_v6_init_seq,
+	.init_ts_off	=	tcp_v6_init_ts_off,
 	.send_synack	=	tcp_v6_send_synack,
 };
 
-- 
cgit v1.2.3


From 0048fdd06614a4ea088f9fcad11511956b795698 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 4 May 2017 13:44:04 -0400
Subject: NFSv4.1: RECLAIM_COMPLETE must handle
 NFS4ERR_CONN_NOT_BOUND_TO_SESSION

If the server returns NFS4ERR_CONN_NOT_BOUND_TO_SESSION because we
are trunking, then RECLAIM_COMPLETE must handle that by calling
nfs4_schedule_session_recovery() and then retrying.

Reported-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Tested-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/nfs4proc.c  |  7 ++++++-
 fs/nfs/nfs4state.c | 10 +++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 158c3d52146f..62fc8eed53cc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8153,6 +8153,12 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
 		/* fall through */
 	case -NFS4ERR_RETRY_UNCACHED_REP:
 		return -EAGAIN;
+	case -NFS4ERR_BADSESSION:
+	case -NFS4ERR_DEADSESSION:
+	case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+		nfs4_schedule_session_recovery(clp->cl_session,
+				task->tk_status);
+		break;
 	default:
 		nfs4_schedule_lease_recovery(clp);
 	}
@@ -8231,7 +8237,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
 	if (status == 0)
 		status = task->tk_status;
 	rpc_put_task(task);
-	return 0;
 out:
 	dprintk("<-- %s status=%d\n", __func__, status);
 	return status;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8156bad6b441..b34de036501b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1649,13 +1649,14 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
 	nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
 }
 
-static void nfs4_reclaim_complete(struct nfs_client *clp,
+static int nfs4_reclaim_complete(struct nfs_client *clp,
 				 const struct nfs4_state_recovery_ops *ops,
 				 struct rpc_cred *cred)
 {
 	/* Notify the server we're done reclaiming our state */
 	if (ops->reclaim_complete)
-		(void)ops->reclaim_complete(clp, cred);
+		return ops->reclaim_complete(clp, cred);
+	return 0;
 }
 
 static void nfs4_clear_reclaim_server(struct nfs_server *server)
@@ -1702,13 +1703,16 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
 {
 	const struct nfs4_state_recovery_ops *ops;
 	struct rpc_cred *cred;
+	int err;
 
 	if (!nfs4_state_clear_reclaim_reboot(clp))
 		return;
 	ops = clp->cl_mvops->reboot_recovery_ops;
 	cred = nfs4_get_clid_cred(clp);
-	nfs4_reclaim_complete(clp, ops, cred);
+	err = nfs4_reclaim_complete(clp, ops, cred);
 	put_rpccred(cred);
+	if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
+		set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
 }
 
 static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
-- 
cgit v1.2.3


From 5c737cb29977896eb0f373d3adbcdea73b815808 Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Thu, 27 Apr 2017 10:45:15 -0400
Subject: Fix nfs_client refcounting if kmalloc fails in nfs4_proc_exchange_id
 and nfs4_proc_async_renew

If memory allocation fails for the callback data, we need to put the nfs_client
or we end up with an elevated refcount.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 62fc8eed53cc..f48dfb7f3691 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4807,8 +4807,10 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred,
 	if (!atomic_inc_not_zero(&clp->cl_count))
 		return -EIO;
 	data = kmalloc(sizeof(*data), GFP_NOFS);
-	if (data == NULL)
+	if (data == NULL) {
+		nfs_put_client(clp);
 		return -ENOMEM;
+	}
 	data->client = clp;
 	data->timestamp = jiffies;
 	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT,
@@ -7454,8 +7456,10 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 		return -EIO;
 
 	calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
-	if (!calldata)
+	if (!calldata) {
+		nfs_put_client(clp);
 		return -ENOMEM;
+	}
 
 	if (!xprt)
 		nfs4_init_boot_verifier(clp, &verifier);
-- 
cgit v1.2.3


From 59b86d85a7a30ba56289cdac83cd66a7dd2adebf Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 28 Apr 2017 18:35:19 +0800
Subject: NFS: always treat the invocation of nfs_getattr as cache hit when
 noac is on

When using 'ls -l' to display a large directory, if noac option is used,
in function nfs_getattr() nfs_need_revalidate_inode() will always be true
for NFSv3 and the nfs_entry cache of the directory will be flushed. The
flush will lead to a fully reread of the directory entries from server.

To prevent the unnecessary RPCs, we need to check whether or not the
noac option is used, and always report the invocation of nfs_getattr()
as cache hit instead cache miss when it's on.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f489a5a71bd5..1de93ba78dc9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -734,7 +734,10 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
 	if (need_atime || nfs_need_revalidate_inode(inode)) {
 		struct nfs_server *server = NFS_SERVER(inode);
 
-		nfs_readdirplus_parent_cache_miss(path->dentry);
+		if (!(server->flags & NFS_MOUNT_NOAC))
+			nfs_readdirplus_parent_cache_miss(path->dentry);
+		else
+			nfs_readdirplus_parent_cache_hit(path->dentry);
 		err = __nfs_revalidate_inode(server, inode);
 	} else
 		nfs_readdirplus_parent_cache_hit(path->dentry);
-- 
cgit v1.2.3


From 0795bf8357c1887e2a95e6e4f5b89d0896a0d929 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 3 May 2017 20:52:21 +0200
Subject: nfs: use kmap/kunmap directly

This patch removes useless nfs_readdir_get_array() and
nfs_readdir_release_array() as suggested by Trond Myklebust

nfs_readdir() calls nfs_revalidate_mapping() before
readdir_search_pagecache() , nfs_do_filldir(), uncached_readdir()
so mapping should be correct.

While kmap() can't fail, all subsequent error checks were removed
as well as unused labels.

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c | 67 +++++++++++-------------------------------------------------
 1 file changed, 12 insertions(+), 55 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3a188cb5ebce..32ccd7754f8a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -169,27 +169,6 @@ typedef struct {
 	unsigned int	eof:1;
 } nfs_readdir_descriptor_t;
 
-/*
- * The caller is responsible for calling nfs_readdir_release_array(page)
- */
-static
-struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
-{
-	void *ptr;
-	if (page == NULL)
-		return ERR_PTR(-EIO);
-	ptr = kmap(page);
-	if (ptr == NULL)
-		return ERR_PTR(-ENOMEM);
-	return ptr;
-}
-
-static
-void nfs_readdir_release_array(struct page *page)
-{
-	kunmap(page);
-}
-
 /*
  * we are freeing strings created by nfs_add_to_readdir_array()
  */
@@ -229,13 +208,10 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le
 static
 int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 {
-	struct nfs_cache_array *array = nfs_readdir_get_array(page);
+	struct nfs_cache_array *array = kmap(page);
 	struct nfs_cache_array_entry *cache_entry;
 	int ret;
 
-	if (IS_ERR(array))
-		return PTR_ERR(array);
-
 	cache_entry = &array->array[array->size];
 
 	/* Check that this entry lies within the page bounds */
@@ -254,7 +230,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 	if (entry->eof != 0)
 		array->eof_index = array->size;
 out:
-	nfs_readdir_release_array(page);
+	kunmap(page);
 	return ret;
 }
 
@@ -343,11 +319,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 	struct nfs_cache_array *array;
 	int status;
 
-	array = nfs_readdir_get_array(desc->page);
-	if (IS_ERR(array)) {
-		status = PTR_ERR(array);
-		goto out;
-	}
+	array = kmap(desc->page);
 
 	if (*desc->dir_cookie == 0)
 		status = nfs_readdir_search_for_pos(array, desc);
@@ -359,8 +331,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 		desc->current_index += array->size;
 		desc->page_index++;
 	}
-	nfs_readdir_release_array(desc->page);
-out:
+	kunmap(desc->page);
 	return status;
 }
 
@@ -596,13 +567,10 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 
 out_nopages:
 	if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
-		array = nfs_readdir_get_array(page);
-		if (!IS_ERR(array)) {
-			array->eof_index = array->size;
-			status = 0;
-			nfs_readdir_release_array(page);
-		} else
-			status = PTR_ERR(array);
+		array = kmap(page);
+		array->eof_index = array->size;
+		status = 0;
+		kunmap(page);
 	}
 
 	put_page(scratch);
@@ -664,11 +632,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		goto out;
 	}
 
-	array = nfs_readdir_get_array(page);
-	if (IS_ERR(array)) {
-		status = PTR_ERR(array);
-		goto out_label_free;
-	}
+	array = kmap(page);
 	memset(array, 0, sizeof(struct nfs_cache_array));
 	array->eof_index = -1;
 
@@ -692,8 +656,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 
 	nfs_readdir_free_pages(pages, array_size);
 out_release_array:
-	nfs_readdir_release_array(page);
-out_label_free:
+	kunmap(page);
 	nfs4_label_free(entry.label);
 out:
 	nfs_free_fattr(entry.fattr);
@@ -791,12 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
 	struct nfs_cache_array *array = NULL;
 	struct nfs_open_dir_context *ctx = file->private_data;
 
-	array = nfs_readdir_get_array(desc->page);
-	if (IS_ERR(array)) {
-		res = PTR_ERR(array);
-		goto out;
-	}
-
+	array = kmap(desc->page);
 	for (i = desc->cache_entry_index; i < array->size; i++) {
 		struct nfs_cache_array_entry *ent;
 
@@ -817,8 +775,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
 	if (array->eof_index >= 0)
 		desc->eof = 1;
 
-	nfs_readdir_release_array(desc->page);
-out:
+	kunmap(desc->page);
 	cache_page_release(desc);
 	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
 			(unsigned long long)*desc->dir_cookie, res);
-- 
cgit v1.2.3


From 2c041afc5af93a47b8bae2b1190a7b6ed028b706 Mon Sep 17 00:00:00 2001
From: Rakesh Pandit <rakesh@tuxera.com>
Date: Fri, 5 May 2017 14:28:23 +0300
Subject: net: alx: handle pci_alloc_irq_vectors return correctly

It was introduced while switching to pci_alloc_irq_vectors recently
and fixes:

[   60.527052] alx 0000:03:00.0 enp3s0: Enabling MSI-X interrupts failed!
[   60.529323] BUG: unable to handle kernel NULL pointer dereference at 00000000000000b8
[   60.531589] IP: alx_alloc_napis+0xe6/0x1e0 [alx]
[   60.533831] PGD 0
[   60.533833] P4D 0

[   60.540559] Oops: 0002 [#1] SMP
[   60.542759] Modules linked in: ebtables ip6table_filter ip6_tables.....
[   60.549990]  drm_kms_helper drm crc32c_intel alx serio_raw mdio wmi video i2c_hid uas usb_storage
[   60.551404] CPU: 0 PID: 999 Comm: NetworkManager Not tainted 4.11.0+ #1
[   60.552813] Hardware name: Acer Predator G9-591/Mustang_SLS, BIOS V1.10 03/03/2016
[   60.554219] task: ffff8804ae833c00 task.stack: ffffc90003eec000
[   60.555383] RIP: 0010:alx_alloc_napis+0xe6/0x1e0 [alx]
[   60.556615] RSP: 0018:ffffc90003eef660 EFLAGS: 00010286
[   60.557787] RAX: ffff8804962835a0 RBX: ffff8804aee8a8c0 RCX: 0000000000000000
[   60.558987] RDX: 0000000000000060 RSI: 0000000000000000 RDI: ffff880496283600
[   60.559979] RBP: ffffc90003eef688 R08: ffff8804c1c1e7e0 R09: ffff8804962835a0
[   60.560978] R10: ffff8804962835a0 R11: 0000000000000102 R12: 0000000000000000
[   60.561974] R13: 0000000000000000 R14: ffff8804aee8aaf0 R15: ffffffffa0052ea0
[   60.562974] FS:  00007f1cecbc9940(0000) GS:ffff8804c1c00000(0000) knlGS:0000000000000000
[   60.564003] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   60.564884] CR2: 00000000000000b8 CR3: 0000000496025000 CR4: 00000000003406f0
[   60.565782] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   60.566676] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   60.567560] Call Trace:
[   60.568500]  __alx_open+0xa2/0x570 [alx]
[   60.569355]  ? notifier_call_chain+0x4a/0x70
[   60.570144]  alx_open+0x17/0x20 [alx]
[   60.570909]  __dev_open+0xc6/0x140
[   60.571682]  ? _raw_spin_unlock_bh+0x1a/0x20
[   60.572469]  __dev_change_flags+0x9d/0x160
[   60.573252]  dev_change_flags+0x29/0x60
[   60.574070]  do_setlink+0x32e/0xc90
[   60.574815]  ? ttwu_do_activate+0x77/0x80
[   60.575544]  ? drm_fb_helper_dirty.isra.17+0xc7/0xe0 [drm_kms_helper]
[   60.576273]  ? drm_fb_helper_cfb_imageblit+0x30/0x40 [drm_kms_helper]
[   60.577004]  ? bit_putcs+0x2f7/0x560
[   60.577729]  ? nla_parse+0x35/0x140
[   60.578518]  rtnl_newlink+0x7d3/0x900
[   60.579280]  ? security_capset+0x30/0x80
[   60.580029]  ? ns_capable_common+0x68/0x80
[   60.580747]  ? ns_capable+0x13/0x20
[   60.581453]  rtnetlink_rcv_msg+0xee/0x220
[   60.582198]  ? rtnl_newlink+0x900/0x900
[   60.582909]  netlink_rcv_skb+0xe7/0x120
[   60.583601]  rtnetlink_rcv+0x28/0x30
[   60.584303]  netlink_unicast+0x18c/0x220
[   60.585002]  netlink_sendmsg+0x2ba/0x3b0
[   60.585703]  sock_sendmsg+0x38/0x50
[   60.586436]  ___sys_sendmsg+0x2b6/0x2d0
[   60.587123]  ? lockref_put_or_lock+0x5e/0x80
[   60.587822]  ? dput+0x155/0x1d0
[   60.588518]  ? mntput+0x24/0x40
[   60.589215]  __sys_sendmsg+0x54/0x90
[   60.589907]  ? __sys_sendmsg+0x54/0x90
[   60.590627]  SyS_sendmsg+0x12/0x20
[   60.591333]  entry_SYSCALL_64_fastpath+0x1a/0xa5
[   60.592021] RIP: 0033:0x7f1ceb44e3b0
[   60.592697] RSP: 002b:00007fffd7f0a2d0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e
[   60.593385] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f1ceb44e3b0
[   60.594107] RDX: 0000000000000000 RSI: 00007fffd7f0a380 RDI: 000000000000000c
[   60.594798] RBP: 00007fffd7f0a800 R08: 0000000000000000 R09: 0000000000000000
[   60.595502] R10: 0000564ffbae6e20 R11: 0000000000000293 R12: 0000000000000001
[   60.596200] R13: 0000000000000002 R14: 0000000000000010 R15: 00007fffd7f0a4d0
[   60.596899] Code: ed 85 c9 0f 8f ec 00 00 00 48 8b 3d 9d 97 1a e2 ba 50 00 00 00 be c0 80 40 01 4c 8b a3 30 02 00 00 e8 ff e5 1d e1 48 85 c0 74 a3 <49> 89 84 24 b8 00 00 00 48 8b 93 30 02 00 00 48 8b 4b 08 48 89
[   60.597642] RIP: alx_alloc_napis+0xe6/0x1e0 [alx] RSP: ffffc90003eef660
[   60.598427] CR2: 00000000000000b8

Fixes: f3297f68 ("net: alx: switch to pci_alloc_irq_vectors")
Signed-off-by: Rakesh Pandit <rakesh@tuxera.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index a8c2db881b75..567ee54504bc 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -838,7 +838,7 @@ static int alx_enable_msix(struct alx_priv *alx)
 
 	err = pci_alloc_irq_vectors(alx->hw.pdev, num_vec, num_vec,
 			PCI_IRQ_MSIX);
-	if (err) {
+	if (err < 0) {
 		netdev_warn(alx->dev, "Enabling MSI-X interrupts failed!\n");
 		return err;
 	}
@@ -904,7 +904,7 @@ static int alx_init_intr(struct alx_priv *alx)
 
 	ret = pci_alloc_irq_vectors(alx->hw.pdev, 1, 1,
 			PCI_IRQ_MSI | PCI_IRQ_LEGACY);
-	if (ret)
+	if (ret < 0)
 		return ret;
 
 	alx->num_vec = 1;
-- 
cgit v1.2.3


From 868a65307d3015064db2a6fb2d6e451293e3047f Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 5 May 2017 11:59:41 -0700
Subject: ARC: mm: fix build failure in linux-next for UP builds

kisskb build service reported ARC defconfig build failures in linux-next

| arch/arc/include/asm/mmu.h:75:21: error: 'NR_CPUS' undeclared here (not in a function)
| make[3]: *** [arch/arc/mm/ioremap.o] Error 1
| make[2]: *** [arch/arc/mm] Error 2
| make[1]: *** [arch/arc] Error 2

which I bisected to a subtle side-effect of a totally benign mm patch
("mm, vmalloc: properly track vmalloc users") which caused a header
include chain deviation - asm/mmu.h using NR_CPUS before including
linux/threads.h

Fix that by adding the dependnet header and while it at fix a related
header to include linux headers aheads of asm headers as sometimes that
slso triggers such issues !

Reported-by: noreply@ellerman.id.au
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/mmu.h     | 4 ++++
 arch/arc/include/asm/pgtable.h | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index b144d7ca7d20..db7319e9b506 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -9,6 +9,10 @@
 #ifndef _ASM_ARC_MMU_H
 #define _ASM_ARC_MMU_H
 
+#ifndef __ASSEMBLY__
+#include <linux/threads.h>	/* NR_CPUS */
+#endif
+
 #if defined(CONFIG_ARC_MMU_V1)
 #define CONFIG_ARC_MMU_VER 1
 #elif defined(CONFIG_ARC_MMU_V2)
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index ee22d40afef4..08fe33830d4b 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -35,11 +35,11 @@
 #ifndef _ASM_ARC_PGTABLE_H
 #define _ASM_ARC_PGTABLE_H
 
-#include <asm/page.h>
-#include <asm/mmu.h>
+#include <linux/const.h>
 #define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
-#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/mmu.h>	/* to propagate CONFIG_ARC_MMU_VER <n> */
 
 /**************************************************************************
  * Page Table Flags
-- 
cgit v1.2.3


From cf4100d1cddcd243f80ac2af2e4c4273919ff225 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
Date: Fri, 5 May 2017 23:20:29 +0300
Subject: Revert "ARCv2: Allow enabling PAE40 w/o HIGHMEM"

This reverts commit 7cab91b87dd8eeee5911ec34be8bb0288ebba18b.

Now when we have a real hardware platform with PAE40 enabled
(here I mean axs103 with firmware v1.2) and 1 Gb of DDR mapped to
0x1_a000_0000-0x1_ffff_ffff we're really targeting memory above 4Gb
when PAE40 is enabled. This in its turn requires HIGHMEM to be enabled
otherwise user won't see any difference with enabling PAE in
kernel configuration as only lowmem will be used anyways.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 5d7fb3e7cb97..a5459698f0ee 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -444,6 +444,7 @@ config ARC_HAS_PAE40
 	bool "Support for the 40-bit Physical Address Extension"
 	default n
 	depends on ISA_ARCV2
+	select HIGHMEM
 	help
 	  Enable access to physical memory beyond 4G, only supported on
 	  ARC cores with 40 bit Physical Addressing support
-- 
cgit v1.2.3


From 8a537ece3d946227e4afa81eae0e43fa47439c7d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 26 Apr 2017 23:22:09 +0200
Subject: PM / wakeup: Integrate mechanism to abort transitions in progress

The system wakeup framework is not very consistent with respect to
the way it handles suspend-to-idle and generally wakeup events
occurring during transitions to system low-power states.

First off, system transitions in progress are aborted by the event
reporting helpers like pm_wakeup_event() only if the wakeup_count
sysfs attribute is in use (as documented), but there are cases in
which system-wide transitions should be aborted even if that is
not the case.  For example, a wakeup signal from a designated
wakeup device during system-wide PM transition, it should cause
the transition to be aborted right away.

Moreover, there is a freeze_wake() call in wakeup_source_activate(),
but that really is only effective after suspend_freeze_state has
been set to FREEZE_STATE_ENTER by freeze_enter().  However, it
is very unlikely that wakeup_source_activate() will ever be called
at that time, as it could only be triggered by a IRQF_NO_SUSPEND
interrupt handler, so wakeups from suspend-to-idle don't really
occur in wakeup_source_activate().

At the same time there is a way to abort a system suspend in
progress (or wake up the system from suspend-to-idle), which is by
calling pm_system_wakeup(), but in turn that doesn't cause any
wakeup source objects to be activated, so it will not be covered
by wakeup source statistics and will not prevent the system from
suspending again immediately (in case autosleep is used, for
example).  Consequently, if anyone wants to abort system transitions
in progress and allow the wakeup_count mechanism to work, they need
to use both pm_system_wakeup() and pm_wakeup_event(), say, at the
same time which is awkward.

For the above reasons, make it possible to trigger
pm_system_wakeup() from within wakeup_source_activate() and
provide a new pm_wakeup_hard_event() helper to do so within the
wakeup framework.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeup.c | 36 ++++++++++++++++++------------------
 include/linux/pm_wakeup.h   | 25 +++++++++++++++++++++----
 2 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 136854970489..84c41c98d6fc 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -512,12 +512,13 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws)
 /**
  * wakup_source_activate - Mark given wakeup source as active.
  * @ws: Wakeup source to handle.
+ * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
  *
  * Update the @ws' statistics and, if @ws has just been activated, notify the PM
  * core of the event by incrementing the counter of of wakeup events being
  * processed.
  */
-static void wakeup_source_activate(struct wakeup_source *ws)
+static void wakeup_source_activate(struct wakeup_source *ws, bool hard)
 {
 	unsigned int cec;
 
@@ -525,11 +526,8 @@ static void wakeup_source_activate(struct wakeup_source *ws)
 			"unregistered wakeup source\n"))
 		return;
 
-	/*
-	 * active wakeup source should bring the system
-	 * out of PM_SUSPEND_FREEZE state
-	 */
-	freeze_wake();
+	if (hard)
+		pm_system_wakeup();
 
 	ws->active = true;
 	ws->active_count++;
@@ -546,8 +544,9 @@ static void wakeup_source_activate(struct wakeup_source *ws)
 /**
  * wakeup_source_report_event - Report wakeup event using the given source.
  * @ws: Wakeup source to report the event for.
+ * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
  */
-static void wakeup_source_report_event(struct wakeup_source *ws)
+static void wakeup_source_report_event(struct wakeup_source *ws, bool hard)
 {
 	ws->event_count++;
 	/* This is racy, but the counter is approximate anyway. */
@@ -555,7 +554,7 @@ static void wakeup_source_report_event(struct wakeup_source *ws)
 		ws->wakeup_count++;
 
 	if (!ws->active)
-		wakeup_source_activate(ws);
+		wakeup_source_activate(ws, hard);
 }
 
 /**
@@ -573,7 +572,7 @@ void __pm_stay_awake(struct wakeup_source *ws)
 
 	spin_lock_irqsave(&ws->lock, flags);
 
-	wakeup_source_report_event(ws);
+	wakeup_source_report_event(ws, false);
 	del_timer(&ws->timer);
 	ws->timer_expires = 0;
 
@@ -739,9 +738,10 @@ static void pm_wakeup_timer_fn(unsigned long data)
 }
 
 /**
- * __pm_wakeup_event - Notify the PM core of a wakeup event.
+ * pm_wakeup_ws_event - Notify the PM core of a wakeup event.
  * @ws: Wakeup source object associated with the event source.
  * @msec: Anticipated event processing time (in milliseconds).
+ * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
  *
  * Notify the PM core of a wakeup event whose source is @ws that will take
  * approximately @msec milliseconds to be processed by the kernel.  If @ws is
@@ -750,7 +750,7 @@ static void pm_wakeup_timer_fn(unsigned long data)
  *
  * It is safe to call this function from interrupt context.
  */
-void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
+void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard)
 {
 	unsigned long flags;
 	unsigned long expires;
@@ -760,7 +760,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 
 	spin_lock_irqsave(&ws->lock, flags);
 
-	wakeup_source_report_event(ws);
+	wakeup_source_report_event(ws, hard);
 
 	if (!msec) {
 		wakeup_source_deactivate(ws);
@@ -779,17 +779,17 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
  unlock:
 	spin_unlock_irqrestore(&ws->lock, flags);
 }
-EXPORT_SYMBOL_GPL(__pm_wakeup_event);
-
+EXPORT_SYMBOL_GPL(pm_wakeup_ws_event);
 
 /**
  * pm_wakeup_event - Notify the PM core of a wakeup event.
  * @dev: Device the wakeup event is related to.
  * @msec: Anticipated event processing time (in milliseconds).
+ * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
  *
- * Call __pm_wakeup_event() for the @dev's wakeup source object.
+ * Call pm_wakeup_ws_event() for the @dev's wakeup source object.
  */
-void pm_wakeup_event(struct device *dev, unsigned int msec)
+void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard)
 {
 	unsigned long flags;
 
@@ -797,10 +797,10 @@ void pm_wakeup_event(struct device *dev, unsigned int msec)
 		return;
 
 	spin_lock_irqsave(&dev->power.lock, flags);
-	__pm_wakeup_event(dev->power.wakeup, msec);
+	pm_wakeup_ws_event(dev->power.wakeup, msec, hard);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
-EXPORT_SYMBOL_GPL(pm_wakeup_event);
+EXPORT_SYMBOL_GPL(pm_wakeup_dev_event);
 
 void pm_print_active_wakeup_sources(void)
 {
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index a3447932df1f..4c2cba7ec1d4 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -106,8 +106,8 @@ extern void __pm_stay_awake(struct wakeup_source *ws);
 extern void pm_stay_awake(struct device *dev);
 extern void __pm_relax(struct wakeup_source *ws);
 extern void pm_relax(struct device *dev);
-extern void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec);
-extern void pm_wakeup_event(struct device *dev, unsigned int msec);
+extern void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard);
+extern void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard);
 
 #else /* !CONFIG_PM_SLEEP */
 
@@ -182,9 +182,11 @@ static inline void __pm_relax(struct wakeup_source *ws) {}
 
 static inline void pm_relax(struct device *dev) {}
 
-static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) {}
+static inline void pm_wakeup_ws_event(struct wakeup_source *ws,
+				      unsigned int msec, bool hard) {}
 
-static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {}
+static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec,
+				       bool hard) {}
 
 #endif /* !CONFIG_PM_SLEEP */
 
@@ -201,4 +203,19 @@ static inline void wakeup_source_trash(struct wakeup_source *ws)
 	wakeup_source_drop(ws);
 }
 
+static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
+{
+	return pm_wakeup_ws_event(ws, msec, false);
+}
+
+static inline void pm_wakeup_event(struct device *dev, unsigned int msec)
+{
+	return pm_wakeup_dev_event(dev, msec, false);
+}
+
+static inline void pm_wakeup_hard_event(struct device *dev)
+{
+	return pm_wakeup_dev_event(dev, 0, true);
+}
+
 #endif /* _LINUX_PM_WAKEUP_H */
-- 
cgit v1.2.3


From eed4d47efe9508b855b09754cf6de4325d8a2f0d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 26 Apr 2017 23:23:03 +0200
Subject: ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle

The ACPI SCI (System Control Interrupt) is set up as a wakeup IRQ
during suspend-to-idle transitions and, consequently, any events
signaled through it wake up the system from that state.  However,
on some systems some of the events signaled via the ACPI SCI while
suspended to idle should not cause the system to wake up.  In fact,
quite often they should just be discarded.

Arguably, systems should not resume entirely on such events, but in
order to decide which events really should cause the system to resume
and which are spurious, it is necessary to resume up to the point
when ACPI SCIs are actually handled and processed, which is after
executing dpm_resume_noirq() in the system resume path.

For this reasons, add a loop around freeze_enter() in which the
platforms can process events signaled via multiplexed IRQ lines
like the ACPI SCI and add suspend-to-idle hooks that can be
used for this purpose to struct platform_freeze_ops.

In the ACPI case, the ->wake hook is used for checking if the SCI
has triggered while suspended and deferring the interrupt-induced
system wakeup until the events signaled through it are actually
processed sufficiently to decide whether or not the system should
resume.  In turn, the ->sync hook allows all of the relevant event
queues to be flushed so as to prevent events from being missed due
to race conditions.

In addition to that, some ACPI code processing wakeup events needs
to be modified to use the "hard" version of wakeup triggers, so that
it will cause a system resume to happen on device-induced wakeup
events even if the "soft" mechanism to prevent the system from
suspending is not enabled (that also helps to catch device-induced
wakeup events occurring during suspend transitions in progress).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/battery.c      |  2 +-
 drivers/acpi/button.c       |  5 +++--
 drivers/acpi/device_pm.c    |  3 ++-
 drivers/acpi/sleep.c        | 28 ++++++++++++++++++++++++++++
 drivers/base/power/main.c   |  5 -----
 drivers/base/power/wakeup.c | 18 ++++++++++++------
 include/linux/suspend.h     |  7 +++++--
 kernel/power/process.c      |  2 +-
 kernel/power/suspend.c      | 29 +++++++++++++++++++++++++----
 9 files changed, 77 insertions(+), 22 deletions(-)

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 4ef1e4624b2b..83ab17e4a795 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -776,7 +776,7 @@ static int acpi_battery_update(struct acpi_battery *battery, bool resume)
 	if ((battery->state & ACPI_BATTERY_STATE_CRITICAL) ||
 	    (test_bit(ACPI_BATTERY_ALARM_PRESENT, &battery->flags) &&
             (battery->capacity_now <= battery->alarm)))
-		pm_wakeup_event(&battery->device->dev, 0);
+		pm_wakeup_hard_event(&battery->device->dev);
 
 	return result;
 }
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 668137e4a069..b7c2a06963d6 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -216,7 +216,7 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state)
 	}
 
 	if (state)
-		pm_wakeup_event(&device->dev, 0);
+		pm_wakeup_hard_event(&device->dev);
 
 	ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
 	if (ret == NOTIFY_DONE)
@@ -398,7 +398,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event)
 		} else {
 			int keycode;
 
-			pm_wakeup_event(&device->dev, 0);
+			pm_wakeup_hard_event(&device->dev);
 			if (button->suspended)
 				break;
 
@@ -530,6 +530,7 @@ static int acpi_button_add(struct acpi_device *device)
 		lid_device = device;
 	}
 
+	device_init_wakeup(&device->dev, true);
 	printk(KERN_INFO PREFIX "%s [%s]\n", name, acpi_device_bid(device));
 	return 0;
 
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 993fd31394c8..798d5003a039 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -24,6 +24,7 @@
 #include <linux/pm_qos.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
+#include <linux/suspend.h>
 
 #include "internal.h"
 
@@ -399,7 +400,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
 	mutex_lock(&acpi_pm_notifier_lock);
 
 	if (adev->wakeup.flags.notifier_present) {
-		__pm_wakeup_event(adev->wakeup.ws, 0);
+		pm_wakeup_ws_event(adev->wakeup.ws, 0, true);
 		if (adev->wakeup.context.work.func)
 			queue_pm_work(&adev->wakeup.context.work);
 	}
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index a4327af676fe..e84005d642e6 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -662,14 +662,40 @@ static int acpi_freeze_prepare(void)
 	acpi_os_wait_events_complete();
 	if (acpi_sci_irq_valid())
 		enable_irq_wake(acpi_sci_irq);
+
 	return 0;
 }
 
+static void acpi_freeze_wake(void)
+{
+	/*
+	 * If IRQD_WAKEUP_ARMED is not set for the SCI at this point, it means
+	 * that the SCI has triggered while suspended, so cancel the wakeup in
+	 * case it has not been a wakeup event (the GPEs will be checked later).
+	 */
+	if (acpi_sci_irq_valid() &&
+	    !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
+		pm_system_cancel_wakeup();
+}
+
+static void acpi_freeze_sync(void)
+{
+	/*
+	 * Process all pending events in case there are any wakeup ones.
+	 *
+	 * The EC driver uses the system workqueue, so that one needs to be
+	 * flushed too.
+	 */
+	acpi_os_wait_events_complete();
+	flush_scheduled_work();
+}
+
 static void acpi_freeze_restore(void)
 {
 	acpi_disable_wakeup_devices(ACPI_STATE_S0);
 	if (acpi_sci_irq_valid())
 		disable_irq_wake(acpi_sci_irq);
+
 	acpi_enable_all_runtime_gpes();
 }
 
@@ -681,6 +707,8 @@ static void acpi_freeze_end(void)
 static const struct platform_freeze_ops acpi_freeze_ops = {
 	.begin = acpi_freeze_begin,
 	.prepare = acpi_freeze_prepare,
+	.wake = acpi_freeze_wake,
+	.sync = acpi_freeze_sync,
 	.restore = acpi_freeze_restore,
 	.end = acpi_freeze_end,
 };
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 9faee1c893e5..e987a6f55d36 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1091,11 +1091,6 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
 	if (async_error)
 		goto Complete;
 
-	if (pm_wakeup_pending()) {
-		async_error = -EBUSY;
-		goto Complete;
-	}
-
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 84c41c98d6fc..f62082fdd670 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -28,8 +28,8 @@ bool events_check_enabled __read_mostly;
 /* First wakeup IRQ seen by the kernel in the last cycle. */
 unsigned int pm_wakeup_irq __read_mostly;
 
-/* If set and the system is suspending, terminate the suspend. */
-static bool pm_abort_suspend __read_mostly;
+/* If greater than 0 and the system is suspending, terminate the suspend. */
+static atomic_t pm_abort_suspend __read_mostly;
 
 /*
  * Combined counters of registered wakeup events and wakeup events in progress.
@@ -856,20 +856,26 @@ bool pm_wakeup_pending(void)
 		pm_print_active_wakeup_sources();
 	}
 
-	return ret || pm_abort_suspend;
+	return ret || atomic_read(&pm_abort_suspend) > 0;
 }
 
 void pm_system_wakeup(void)
 {
-	pm_abort_suspend = true;
+	atomic_inc(&pm_abort_suspend);
 	freeze_wake();
 }
 EXPORT_SYMBOL_GPL(pm_system_wakeup);
 
-void pm_wakeup_clear(void)
+void pm_system_cancel_wakeup(void)
+{
+	atomic_dec(&pm_abort_suspend);
+}
+
+void pm_wakeup_clear(bool reset)
 {
-	pm_abort_suspend = false;
 	pm_wakeup_irq = 0;
+	if (reset)
+		atomic_set(&pm_abort_suspend, 0);
 }
 
 void pm_system_irq_wakeup(unsigned int irq_number)
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d9718378a8be..0b1cf32edfd7 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -189,6 +189,8 @@ struct platform_suspend_ops {
 struct platform_freeze_ops {
 	int (*begin)(void);
 	int (*prepare)(void);
+	void (*wake)(void);
+	void (*sync)(void);
 	void (*restore)(void);
 	void (*end)(void);
 };
@@ -428,7 +430,8 @@ extern unsigned int pm_wakeup_irq;
 
 extern bool pm_wakeup_pending(void);
 extern void pm_system_wakeup(void);
-extern void pm_wakeup_clear(void);
+extern void pm_system_cancel_wakeup(void);
+extern void pm_wakeup_clear(bool reset);
 extern void pm_system_irq_wakeup(unsigned int irq_number);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
@@ -478,7 +481,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 
 static inline bool pm_wakeup_pending(void) { return false; }
 static inline void pm_system_wakeup(void) {}
-static inline void pm_wakeup_clear(void) {}
+static inline void pm_wakeup_clear(bool reset) {}
 static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
 
 static inline void lock_system_sleep(void) {}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index c7209f060eeb..78672d324a6e 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -132,7 +132,7 @@ int freeze_processes(void)
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
-	pm_wakeup_clear();
+	pm_wakeup_clear(true);
 	pr_info("Freezing user space processes ... ");
 	pm_freezing = true;
 	error = try_to_freeze_tasks(true);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 15e6baef5c73..c0248c74d6d4 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -72,6 +72,8 @@ static void freeze_begin(void)
 
 static void freeze_enter(void)
 {
+	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true);
+
 	spin_lock_irq(&suspend_freeze_lock);
 	if (pm_wakeup_pending())
 		goto out;
@@ -98,6 +100,27 @@ static void freeze_enter(void)
  out:
 	suspend_freeze_state = FREEZE_STATE_NONE;
 	spin_unlock_irq(&suspend_freeze_lock);
+
+	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false);
+}
+
+static void s2idle_loop(void)
+{
+	do {
+		freeze_enter();
+
+		if (freeze_ops && freeze_ops->wake)
+			freeze_ops->wake();
+
+		dpm_resume_noirq(PMSG_RESUME);
+		if (freeze_ops && freeze_ops->sync)
+			freeze_ops->sync();
+
+		if (pm_wakeup_pending())
+			break;
+
+		pm_wakeup_clear(false);
+	} while (!dpm_suspend_noirq(PMSG_SUSPEND));
 }
 
 void freeze_wake(void)
@@ -371,10 +394,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	 * all the devices are suspended.
 	 */
 	if (state == PM_SUSPEND_FREEZE) {
-		trace_suspend_resume(TPS("machine_suspend"), state, true);
-		freeze_enter();
-		trace_suspend_resume(TPS("machine_suspend"), state, false);
-		goto Platform_wake;
+		s2idle_loop();
+		goto Platform_early_resume;
 	}
 
 	error = disable_nonboot_cpus();
-- 
cgit v1.2.3


From ef181f268e0e8d868f353b764799d96d6a8a2d86 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 2 May 2017 16:01:00 +0800
Subject: drm/amd/powerplay: refine code in vega10_smumgr.c

1. return error code instand of -1.
2. print msg info if send msg failed

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c   | 78 +++++++++++-----------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 5980f02f2ce9..05669cb5ab4e 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -85,7 +85,7 @@ static uint32_t vega10_wait_for_response(struct pp_smumgr *smumgr)
 	uint32_t reg;
 
 	if (!vega10_is_smc_ram_running(smumgr))
-		return -1;
+		return -EINVAL;
 
 	reg = soc15_get_register_offset(MP1_HWID, 0,
 			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
@@ -109,7 +109,7 @@ int vega10_send_msg_to_smc_without_waiting(struct pp_smumgr *smumgr,
 	uint32_t reg;
 
 	if (!vega10_is_smc_ram_running(smumgr))
-		return -1;
+		return -EINVAL;
 
 	reg = soc15_get_register_offset(MP1_HWID, 0,
 			mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66);
@@ -130,7 +130,7 @@ int vega10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 	uint32_t reg;
 
 	if (!vega10_is_smc_ram_running(smumgr))
-		return -1;
+		return -EINVAL;
 
 	vega10_wait_for_response(smumgr);
 
@@ -140,9 +140,8 @@ int vega10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 
 	vega10_send_msg_to_smc_without_waiting(smumgr, msg);
 
-	PP_ASSERT_WITH_CODE(vega10_wait_for_response(smumgr) == 1,
-			"Failed to send Message.",
-			return -1);
+	if (vega10_wait_for_response(smumgr) != 1)
+		pr_err("Failed to send Message: 0x%x\n.", msg);
 
 	return 0;
 }
@@ -160,7 +159,7 @@ int vega10_send_msg_to_smc_with_parameter(struct pp_smumgr *smumgr,
 	uint32_t reg;
 
 	if (!vega10_is_smc_ram_running(smumgr))
-		return -1;
+		return -EINVAL;
 
 	vega10_wait_for_response(smumgr);
 
@@ -174,9 +173,8 @@ int vega10_send_msg_to_smc_with_parameter(struct pp_smumgr *smumgr,
 
 	vega10_send_msg_to_smc_without_waiting(smumgr, msg);
 
-	PP_ASSERT_WITH_CODE(vega10_wait_for_response(smumgr) == 1,
-			"Failed to send Message.",
-			return -1);
+	if (vega10_wait_for_response(smumgr) != 1)
+		pr_err("Failed to send Message: 0x%x\n.", msg);
 
 	return 0;
 }
@@ -233,25 +231,25 @@ int vega10_copy_table_from_smc(struct pp_smumgr *smumgr,
 			(struct vega10_smumgr *)(smumgr->backend);
 
 	PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE,
-			"Invalid SMU Table ID!", return -1;);
+			"Invalid SMU Table ID!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].version != 0,
-			"Invalid SMU Table version!", return -1;);
+			"Invalid SMU Table version!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].size != 0,
-			"Invalid SMU Table Length!", return -1;);
+			"Invalid SMU Table Length!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
 			priv->smu_tables.entry[table_id].table_addr_high) == 0,
-			"[CopyTableFromSMC] Attempt to Set Dram Addr High Failed!", return -1;);
+			"[CopyTableFromSMC] Attempt to Set Dram Addr High Failed!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
 			priv->smu_tables.entry[table_id].table_addr_low) == 0,
 			"[CopyTableFromSMC] Attempt to Set Dram Addr Low Failed!",
-			return -1;);
+			return -EINVAL);
 	PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr,
 			PPSMC_MSG_TransferTableSmu2Dram,
 			priv->smu_tables.entry[table_id].table_id) == 0,
 			"[CopyTableFromSMC] Attempt to Transfer Table From SMU Failed!",
-			return -1;);
+			return -EINVAL);
 
 	memcpy(table, priv->smu_tables.entry[table_id].table,
 			priv->smu_tables.entry[table_id].size);
@@ -271,11 +269,11 @@ int vega10_copy_table_to_smc(struct pp_smumgr *smumgr,
 			(struct vega10_smumgr *)(smumgr->backend);
 
 	PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE,
-			"Invalid SMU Table ID!", return -1;);
+			"Invalid SMU Table ID!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].version != 0,
-			"Invalid SMU Table version!", return -1;);
+			"Invalid SMU Table version!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].size != 0,
-			"Invalid SMU Table Length!", return -1;);
+			"Invalid SMU Table Length!", return -EINVAL);
 
 	memcpy(priv->smu_tables.entry[table_id].table, table,
 			priv->smu_tables.entry[table_id].size);
@@ -284,17 +282,17 @@ int vega10_copy_table_to_smc(struct pp_smumgr *smumgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
 			priv->smu_tables.entry[table_id].table_addr_high) == 0,
 			"[CopyTableToSMC] Attempt to Set Dram Addr High Failed!",
-			return -1;);
+			return -EINVAL;);
 	PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
 			priv->smu_tables.entry[table_id].table_addr_low) == 0,
 			"[CopyTableToSMC] Attempt to Set Dram Addr Low Failed!",
-			return -1;);
+			return -EINVAL);
 	PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr,
 			PPSMC_MSG_TransferTableDram2Smu,
 			priv->smu_tables.entry[table_id].table_id) == 0,
 			"[CopyTableToSMC] Attempt to Transfer Table To SMU Failed!",
-			return -1;);
+			return -EINVAL);
 
 	return 0;
 }
@@ -303,7 +301,7 @@ int vega10_save_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table)
 {
 	PP_ASSERT_WITH_CODE(avfs_table,
 			"No access to SMC AVFS Table",
-			return -1);
+			return -EINVAL);
 
 	return vega10_copy_table_from_smc(smumgr, avfs_table, AVFSTABLE);
 }
@@ -312,7 +310,7 @@ int vega10_restore_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table)
 {
 	PP_ASSERT_WITH_CODE(avfs_table,
 			"No access to SMC AVFS Table",
-			return -1);
+			return -EINVAL);
 
 	return vega10_copy_table_to_smc(smumgr, avfs_table, AVFSTABLE);
 }
@@ -330,13 +328,16 @@ int vega10_enable_smc_features(struct pp_smumgr *smumgr,
 int vega10_get_smc_features(struct pp_smumgr *smumgr,
 		uint32_t *features_enabled)
 {
+	if (features_enabled == NULL)
+		return -EINVAL;
+
 	if (!vega10_send_msg_to_smc(smumgr,
 			PPSMC_MSG_GetEnabledSmuFeatures)) {
-		if (!vega10_read_arg_from_smc(smumgr, features_enabled))
-			return 0;
+		vega10_read_arg_from_smc(smumgr, features_enabled);
+		return 0;
 	}
 
-	return -1;
+	return -EINVAL;
 }
 
 int vega10_set_tools_address(struct pp_smumgr *smumgr)
@@ -363,15 +364,14 @@ static int vega10_verify_smc_interface(struct pp_smumgr *smumgr)
 	PP_ASSERT_WITH_CODE(!vega10_send_msg_to_smc(smumgr,
 			PPSMC_MSG_GetDriverIfVersion),
 			"Attempt to get SMC IF Version Number Failed!",
-			return -1);
-	PP_ASSERT_WITH_CODE(!vega10_read_arg_from_smc(smumgr,
-			&smc_driver_if_version),
-			"Attempt to read SMC IF Version Number Failed!",
-			return -1);
+			return -EINVAL);
+	vega10_read_arg_from_smc(smumgr, &smc_driver_if_version);
 
 	if (smc_driver_if_version != SMU9_DRIVER_IF_VERSION) {
-		pr_err("Your firmware(0x%x) doesn't match SMU9_DRIVER_IF_VERSION(0x%x). Please update your firmware!\n",
-		       smc_driver_if_version, SMU9_DRIVER_IF_VERSION);
+		pr_err("Your firmware(0x%x) doesn't match \
+			SMU9_DRIVER_IF_VERSION(0x%x). \
+			Please update your firmware!\n",
+			smc_driver_if_version, SMU9_DRIVER_IF_VERSION);
 		return -EINVAL;
 	}
 
@@ -421,7 +421,7 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 			kfree(smumgr->backend);
 			cgs_free_gpu_mem(smumgr->device,
 			(cgs_handle_t)handle);
-			return -1);
+			return -EINVAL);
 
 	priv->smu_tables.entry[PPTABLE].version = 0x01;
 	priv->smu_tables.entry[PPTABLE].size = sizeof(PPTable_t);
@@ -449,7 +449,7 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 			(cgs_handle_t)priv->smu_tables.entry[PPTABLE].handle);
 			cgs_free_gpu_mem(smumgr->device,
 			(cgs_handle_t)handle);
-			return -1);
+			return -EINVAL);
 
 	priv->smu_tables.entry[WMTABLE].version = 0x01;
 	priv->smu_tables.entry[WMTABLE].size = sizeof(Watermarks_t);
@@ -479,7 +479,7 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 			(cgs_handle_t)priv->smu_tables.entry[WMTABLE].handle);
 			cgs_free_gpu_mem(smumgr->device,
 			(cgs_handle_t)handle);
-			return -1);
+			return -EINVAL);
 
 	priv->smu_tables.entry[AVFSTABLE].version = 0x01;
 	priv->smu_tables.entry[AVFSTABLE].size = sizeof(AvfsTable_t);
@@ -537,7 +537,7 @@ static int vega10_smu_init(struct pp_smumgr *smumgr)
 			(cgs_handle_t)priv->smu_tables.entry[TOOLSTABLE].handle);
 			cgs_free_gpu_mem(smumgr->device,
 			(cgs_handle_t)handle);
-			return -1);
+			return -EINVAL);
 
 	priv->smu_tables.entry[AVFSFUSETABLE].version = 0x01;
 	priv->smu_tables.entry[AVFSFUSETABLE].size = sizeof(AvfsFuseOverride_t);
@@ -579,7 +579,7 @@ static int vega10_start_smu(struct pp_smumgr *smumgr)
 {
 	PP_ASSERT_WITH_CODE(!vega10_verify_smc_interface(smumgr),
 			"Failed to verify SMC interface!",
-			return -1);
+			return -EINVAL);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 05ee3215110303b3ffacfb44fdae59a151799e89 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 2 May 2017 16:51:49 +0800
Subject: drm/amd/powerplay: set soc floor voltage on boot on vega10.

Send the VBIOS bootup VDDC as a SOC floor voltage to SMU
before populating the PPTABLE. After DPM is enabled, This
floor voltage will be removed. This will prevent SMC from
going to Vmin upon receiving PPTable causing a violation.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c | 28 ++++++++++++++++++++++
 drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h | 15 ++++++++++++
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 25 +++++++++++++++++++
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h |  3 +++
 4 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
index de3d8f3f052b..56023114ad6f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
@@ -387,3 +387,31 @@ int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr,
 
 	return 0;
 }
+
+int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr,
+			struct pp_atomfwctrl_bios_boot_up_values *boot_values)
+{
+	struct atom_firmware_info_v3_1 *info = NULL;
+	uint16_t ix;
+
+	ix = GetIndexIntoMasterDataTable(firmwareinfo);
+	info = (struct atom_firmware_info_v3_1 *)
+		cgs_atom_get_data_table(hwmgr->device,
+				ix, NULL, NULL, NULL);
+
+	if (!info) {
+		pr_info("Error retrieving BIOS firmwareinfo!");
+		return -EINVAL;
+	}
+
+	boot_values->ulRevision = info->firmware_revision;
+	boot_values->ulGfxClk   = info->bootup_sclk_in10khz;
+	boot_values->ulUClk     = info->bootup_mclk_in10khz;
+	boot_values->ulSocClk   = 0;
+	boot_values->usVddc     = info->bootup_vddc_mv;
+	boot_values->usVddci    = info->bootup_vddci_mv;
+	boot_values->usMvddc    = info->bootup_mvddc_mv;
+	boot_values->usVddGfx   = info->bootup_vddgfx_mv;
+
+	return 0;
+}
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
index be1579e87caf..43a6711e3c06 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
@@ -119,6 +119,18 @@ struct pp_atomfwctrl_gpio_parameters {
 	uint8_t   ucFwCtfGpio;
 	uint8_t   ucFwCtfPolarity;
 };
+
+struct pp_atomfwctrl_bios_boot_up_values {
+	uint32_t   ulRevision;
+	uint32_t   ulGfxClk;
+	uint32_t   ulUClk;
+	uint32_t   ulSocClk;
+	uint16_t   usVddc;
+	uint16_t   usVddci;
+	uint16_t   usMvddc;
+	uint16_t   usVddGfx;
+};
+
 int pp_atomfwctrl_get_gpu_pll_dividers_vega10(struct pp_hwmgr *hwmgr,
 		uint32_t clock_type, uint32_t clock_value,
 		struct pp_atomfwctrl_clock_dividers_soc15 *dividers);
@@ -136,5 +148,8 @@ int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
 int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr,
 		struct pp_atomfwctrl_gpio_parameters *param);
 
+int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr,
+			struct pp_atomfwctrl_bios_boot_up_values *boot_values);
+
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 68eae529df50..dbc1c03cb40a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -2298,6 +2298,7 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 			(struct phm_ppt_v2_information *)(hwmgr->pptable);
 	PPTable_t *pp_table = &(data->smc_state_table.pp_table);
 	struct pp_atomfwctrl_voltage_table voltage_table;
+	struct pp_atomfwctrl_bios_boot_up_values boot_up_values;
 
 	result = vega10_setup_default_dpm_tables(hwmgr);
 	PP_ASSERT_WITH_CODE(!result,
@@ -2369,6 +2370,24 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 				return result);
 	}
 
+	result = pp_atomfwctrl_get_vbios_bootup_values(hwmgr, &boot_up_values);
+	if (!result) {
+		data->vbios_boot_state.vddc     = boot_up_values.usVddc;
+		data->vbios_boot_state.vddci    = boot_up_values.usVddci;
+		data->vbios_boot_state.mvddc    = boot_up_values.usMvddc;
+		data->vbios_boot_state.gfx_clock = boot_up_values.ulGfxClk;
+		data->vbios_boot_state.mem_clock = boot_up_values.ulUClk;
+		data->vbios_boot_state.soc_clock = boot_up_values.ulSocClk;
+		if (0 != boot_up_values.usVddc) {
+			smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+						PPSMC_MSG_SetFloorSocVoltage,
+						(boot_up_values.usVddc * 4));
+			data->vbios_boot_state.bsoc_vddc_lock = true;
+		} else {
+			data->vbios_boot_state.bsoc_vddc_lock = false;
+		}
+	}
+
 	result = vega10_populate_avfs_parameters(hwmgr);
 	PP_ASSERT_WITH_CODE(!result,
 			"Failed to initialize AVFS Parameters!",
@@ -2590,6 +2609,12 @@ static int vega10_start_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap)
 		data->smu_features[GNLD_LED_DISPLAY].enabled = true;
 	}
 
+	if (data->vbios_boot_state.bsoc_vddc_lock) {
+		smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+						PPSMC_MSG_SetFloorSocVoltage, 0);
+		data->vbios_boot_state.bsoc_vddc_lock = false;
+	}
+
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_Falcon_QuickTransition)) {
 		if (data->smu_features[GNLD_ACDC].supported) {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
index 83c67b9262ff..1912e086c0cf 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
@@ -177,8 +177,11 @@ struct vega10_dpmlevel_enable_mask {
 };
 
 struct vega10_vbios_boot_state {
+	bool        bsoc_vddc_lock;
 	uint16_t    vddc;
 	uint16_t    vddci;
+	uint16_t    mvddc;
+	uint16_t    vdd_gfx;
 	uint32_t    gfx_clock;
 	uint32_t    mem_clock;
 	uint32_t    soc_clock;
-- 
cgit v1.2.3


From 56a2f08c418ec01d46851683ee0463a2d8a2504d Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 2 May 2017 17:11:29 +0800
Subject: drm/amd/powerplay: set fan target temperature by msg on vega10.

SMU not support FanTargetTemperature in pptable,
so send msg instand.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
index 7062ec8cc4ac..5da88ba4a53c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
@@ -561,6 +561,11 @@ int tf_vega10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr,
 			advanceFanControlParameters.ulMinFanSCLKAcousticLimit);
 	table->FanTargetTemperature = hwmgr->thermal_controller.
 			advanceFanControlParameters.usTMax;
+
+	smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+				PPSMC_MSG_SetFanTemperatureTarget,
+				(uint32_t)table->FanTargetTemperature);
+
 	table->FanPwmMin = hwmgr->thermal_controller.
 			advanceFanControlParameters.usPWMMin * 255 / 100;
 	table->FanTargetGfxclk = (uint16_t)(hwmgr->thermal_controller.
-- 
cgit v1.2.3


From b7a1f0e3cca40044952901e659939133e7cd1efe Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 2 May 2017 14:30:39 +0800
Subject: drm/amd/powerplay: Allow duplicate enteries in pptable.

This is a valid configuration.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index dbc1c03cb40a..8baa890579d8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -1137,7 +1137,7 @@ static void vega10_setup_default_single_dpm_table(struct pp_hwmgr *hwmgr,
 	int i;
 
 	for (i = 0; i < dep_table->count; i++) {
-		if (i == 0 || dpm_table->dpm_levels[dpm_table->count - 1].value !=
+		if (i == 0 || dpm_table->dpm_levels[dpm_table->count - 1].value <=
 				dep_table->entries[i].clk) {
 			dpm_table->dpm_levels[dpm_table->count].value =
 					dep_table->entries[i].clk;
@@ -1274,7 +1274,7 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 	dpm_table = &(data->dpm_table.eclk_table);
 	for (i = 0; i < dep_mm_table->count; i++) {
 		if (i == 0 || dpm_table->dpm_levels
-				[dpm_table->count - 1].value !=
+				[dpm_table->count - 1].value <=
 						dep_mm_table->entries[i].eclk) {
 			dpm_table->dpm_levels[dpm_table->count].value =
 					dep_mm_table->entries[i].eclk;
@@ -1290,7 +1290,7 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 	dpm_table = &(data->dpm_table.vclk_table);
 	for (i = 0; i < dep_mm_table->count; i++) {
 		if (i == 0 || dpm_table->dpm_levels
-				[dpm_table->count - 1].value !=
+				[dpm_table->count - 1].value <=
 						dep_mm_table->entries[i].vclk) {
 			dpm_table->dpm_levels[dpm_table->count].value =
 					dep_mm_table->entries[i].vclk;
@@ -1304,7 +1304,7 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 	dpm_table = &(data->dpm_table.dclk_table);
 	for (i = 0; i < dep_mm_table->count; i++) {
 		if (i == 0 || dpm_table->dpm_levels
-				[dpm_table->count - 1].value !=
+				[dpm_table->count - 1].value <=
 						dep_mm_table->entries[i].dclk) {
 			dpm_table->dpm_levels[dpm_table->count].value =
 					dep_mm_table->entries[i].dclk;
-- 
cgit v1.2.3


From 0dca7047512cb31e7a08a677a69fa7d47959dbf8 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 28 Apr 2017 13:49:50 +0800
Subject: drm/amd/powerplay: correct LoadLineResistance value in pptable.

this value is used by avfs to adjust inversion voltage.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c       | 4 ++--
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
index 692f752fbb27..3f72268e99bb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
@@ -48,8 +48,8 @@ void vega10_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr)
 	table->Tliquid1Limit = cpu_to_le16(tdp_table->usTemperatureLimitLiquid1);
 	table->Tliquid2Limit = cpu_to_le16(tdp_table->usTemperatureLimitLiquid2);
 	table->TplxLimit = cpu_to_le16(tdp_table->usTemperatureLimitPlx);
-	table->LoadLineResistance = cpu_to_le16(
-			hwmgr->platform_descriptor.LoadLineSlope);
+	table->LoadLineResistance =
+			hwmgr->platform_descriptor.LoadLineSlope * 256;
 	table->FitLimit = 0; /* Not used for Vega10 */
 
 	table->Liquid1_I2C_address = tdp_table->ucLiquid1_I2C_address;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index 8b55ae01132d..00e95511e19a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -407,7 +407,7 @@ static int get_tdp_table(
 		tdp_table->ucPlx_I2C_address = power_tune_table->ucPlx_I2C_address;
 		tdp_table->ucPlx_I2C_Line = power_tune_table->ucPlx_I2C_LineSCL;
 		tdp_table->ucPlx_I2C_LineSDA = power_tune_table->ucPlx_I2C_LineSDA;
-		hwmgr->platform_descriptor.LoadLineSlope = power_tune_table->usLoadLineResistance;
+		hwmgr->platform_descriptor.LoadLineSlope = le16_to_cpu(power_tune_table->usLoadLineResistance);
 	} else {
 		power_tune_table_v2 = (ATOM_Vega10_PowerTune_Table_V2 *)table;
 		tdp_table->usMaximumPowerDeliveryLimit = le16_to_cpu(power_tune_table_v2->usSocketPowerLimit);
@@ -453,7 +453,7 @@ static int get_tdp_table(
 		tdp_table->ucPlx_I2C_LineSDA = sda;
 
 		hwmgr->platform_descriptor.LoadLineSlope =
-					power_tune_table_v2->usLoadLineResistance;
+					le16_to_cpu(power_tune_table_v2->usLoadLineResistance);
 	}
 
 	*info_tdp_table = tdp_table;
-- 
cgit v1.2.3


From 1d1a2cd58f05da1167c04394292aef9fbcf29bff Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Thu, 27 Apr 2017 17:14:57 +0800
Subject: drm/amdgpu:PTE flag should be 64 bit width
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

otherwise we'll lost the high 32 bit for pte, which lead
to incorrect MTYPE for vega10.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7bb1bf76319a..5db0230e45c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -765,7 +765,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
 {
 	struct amdgpu_ttm_tt *gtt, *tmp;
 	struct ttm_mem_reg bo_mem;
-	uint32_t flags;
+	uint64_t flags;
 	int r;
 
 	bo_mem.mem_type = TTM_PL_TT;
-- 
cgit v1.2.3


From 236763d340d6190c56554caee61c2bd5cfdf5217 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Mon, 1 May 2017 16:15:31 +0800
Subject: drm/amdgpu:fix waiting on dirty fence

if bo->shadow is NULL (race issue:BO shadow was just released
and gpu-reset kick in but BO hasn't yet) recover_vram_from_shadow
won't set @next, so the following "fence=next"
will wrongly use a fence pointer which may already dirty.
fixing it by set next to NULL prior to recover_vram_from_shadow

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Chunming Zhou<david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3c1754df4c40..367811cd0763 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2522,6 +2522,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
 	ring = adev->mman.buffer_funcs_ring;
 	mutex_lock(&adev->shadow_list_lock);
 	list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+		next = NULL;
 		amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
 		if (fence) {
 			r = dma_fence_wait(fence, false);
@@ -2668,6 +2669,7 @@ retry:
 			DRM_INFO("recover vram bo from shadow\n");
 			mutex_lock(&adev->shadow_list_lock);
 			list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+				next = NULL;
 				amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
 				if (fence) {
 					r = dma_fence_wait(fence, false);
-- 
cgit v1.2.3


From b4a33e325c58579a2017d3b1c1a1227f78ae52cd Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 3 May 2017 12:48:52 +0800
Subject: drm/amd/powerplay: clean up code in vega10_smumgr.c

1. fix typo in print message info.
2. fix block comments's coding style.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c   | 101 +++++++++------------
 1 file changed, 45 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 05669cb5ab4e..115f0e4b1603 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -74,12 +74,12 @@ static bool vega10_is_smc_ram_running(struct pp_smumgr *smumgr)
 	return false;
 }
 
-/**
-* Check if SMC has responded to previous message.
-*
-* @param    smumgr  the address of the powerplay hardware manager.
-* @return   TRUE    SMC has responded, FALSE otherwise.
-*/
+/*
+ * Check if SMC has responded to previous message.
+ *
+ * @param    smumgr  the address of the powerplay hardware manager.
+ * @return   TRUE    SMC has responded, FALSE otherwise.
+ */
 static uint32_t vega10_wait_for_response(struct pp_smumgr *smumgr)
 {
 	uint32_t reg;
@@ -96,13 +96,12 @@ static uint32_t vega10_wait_for_response(struct pp_smumgr *smumgr)
 	return cgs_read_register(smumgr->device, reg);
 }
 
-/**
-* Send a message to the SMC, and do not wait for its response.
-*
-* @param    smumgr  the address of the powerplay hardware manager.
-* @param    msg the message to send.
-* @return   Always return 0.
-*/
+/*
+ * Send a message to the SMC, and do not wait for its response.
+ * @param    smumgr  the address of the powerplay hardware manager.
+ * @param    msg the message to send.
+ * @return   Always return 0.
+ */
 int vega10_send_msg_to_smc_without_waiting(struct pp_smumgr *smumgr,
 		uint16_t msg)
 {
@@ -118,13 +117,12 @@ int vega10_send_msg_to_smc_without_waiting(struct pp_smumgr *smumgr,
 	return 0;
 }
 
-/**
-* Send a message to the SMC, and wait for its response.
-*
-* @param    smumgr  the address of the powerplay hardware manager.
-* @param    msg the message to send.
-* @return   The response that came from the SMC.
-*/
+/*
+ * Send a message to the SMC, and wait for its response.
+ * @param    smumgr  the address of the powerplay hardware manager.
+ * @param    msg the message to send.
+ * @return   Always return 0.
+ */
 int vega10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 {
 	uint32_t reg;
@@ -141,17 +139,17 @@ int vega10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 	vega10_send_msg_to_smc_without_waiting(smumgr, msg);
 
 	if (vega10_wait_for_response(smumgr) != 1)
-		pr_err("Failed to send Message: 0x%x\n.", msg);
+		pr_err("Failed to send message: 0x%x\n", msg);
 
 	return 0;
 }
 
-/**
+/*
  * Send a message to the SMC with parameter
  * @param    smumgr:  the address of the powerplay hardware manager.
  * @param    msg: the message to send.
  * @param    parameter: the parameter to send
- * @return   The response that came from the SMC.
+ * @return   Always return 0.
  */
 int vega10_send_msg_to_smc_with_parameter(struct pp_smumgr *smumgr,
 		uint16_t msg, uint32_t parameter)
@@ -174,20 +172,19 @@ int vega10_send_msg_to_smc_with_parameter(struct pp_smumgr *smumgr,
 	vega10_send_msg_to_smc_without_waiting(smumgr, msg);
 
 	if (vega10_wait_for_response(smumgr) != 1)
-		pr_err("Failed to send Message: 0x%x\n.", msg);
+		pr_err("Failed to send message: 0x%x\n", msg);
 
 	return 0;
 }
 
 
-/**
-* Send a message to the SMC with parameter, do not wait for response
-*
-* @param    smumgr:  the address of the powerplay hardware manager.
-* @param    msg: the message to send.
-* @param    parameter: the parameter to send
-* @return   The response that came from the SMC.
-*/
+/*
+ * Send a message to the SMC with parameter, do not wait for response
+ * @param    smumgr:  the address of the powerplay hardware manager.
+ * @param    msg: the message to send.
+ * @param    parameter: the parameter to send
+ * @return   The response that came from the SMC.
+ */
 int vega10_send_msg_to_smc_with_parameter_without_waiting(
 		struct pp_smumgr *smumgr, uint16_t msg, uint32_t parameter)
 {
@@ -200,13 +197,12 @@ int vega10_send_msg_to_smc_with_parameter_without_waiting(
 	return vega10_send_msg_to_smc_without_waiting(smumgr, msg);
 }
 
-/**
-* Retrieve an argument from SMC.
-*
-* @param    smumgr  the address of the powerplay hardware manager.
-* @param    arg     pointer to store the argument from SMC.
-* @return   Always return 0.
-*/
+/*
+ * Retrieve an argument from SMC.
+ * @param    smumgr  the address of the powerplay hardware manager.
+ * @param    arg     pointer to store the argument from SMC.
+ * @return   Always return 0.
+ */
 int vega10_read_arg_from_smc(struct pp_smumgr *smumgr, uint32_t *arg)
 {
 	uint32_t reg;
@@ -219,11 +215,11 @@ int vega10_read_arg_from_smc(struct pp_smumgr *smumgr, uint32_t *arg)
 	return 0;
 }
 
-/**
-* Copy table from SMC into driver FB
-* @param   smumgr    the address of the SMC manager
-* @param   table_id    the driver's table ID to copy from
-*/
+/*
+ * Copy table from SMC into driver FB
+ * @param   smumgr    the address of the SMC manager
+ * @param   table_id    the driver's table ID to copy from
+ */
 int vega10_copy_table_from_smc(struct pp_smumgr *smumgr,
 		uint8_t *table, int16_t table_id)
 {
@@ -257,11 +253,11 @@ int vega10_copy_table_from_smc(struct pp_smumgr *smumgr,
 	return 0;
 }
 
-/**
-* Copy table from Driver FB into SMC
-* @param   smumgr    the address of the SMC manager
-* @param   table_id    the table to copy from
-*/
+/*
+ * Copy table from Driver FB into SMC
+ * @param   smumgr    the address of the SMC manager
+ * @param   table_id    the table to copy from
+ */
 int vega10_copy_table_to_smc(struct pp_smumgr *smumgr,
 		uint8_t *table, int16_t table_id)
 {
@@ -378,13 +374,6 @@ static int vega10_verify_smc_interface(struct pp_smumgr *smumgr)
 	return 0;
 }
 
-/**
-* Write a 32bit value to the SMC SRAM space.
-* ALL PARAMETERS ARE IN HOST BYTE ORDER.
-* @param    smumgr  the address of the powerplay hardware manager.
-* @param    smc_addr the address in the SMC RAM to access.
-* @param    value to write to the SMC SRAM.
-*/
 static int vega10_smu_init(struct pp_smumgr *smumgr)
 {
 	struct vega10_smumgr *priv;
-- 
cgit v1.2.3


From 327fce0c0d265b14be575bfe21a98c7c0dd932d8 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 4 May 2017 11:07:02 +0800
Subject: drm/amd/powerplay: disable engine spread spectrum feature on Vega10.

Vega10 atomfirmware do not have ASIC_InternalSS_Info table
so disable this feature by default in driver.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Ken Wang <Qingqing.wang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 8baa890579d8..e24e54c294bd 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -1535,7 +1535,11 @@ static int vega10_populate_single_gfx_level(struct pp_hwmgr *hwmgr,
 	current_gfxclk_level->FbMult =
 			cpu_to_le32(dividers.ulPll_fb_mult);
 	/* Spread FB Multiplier bit: bit 0:8 int, bit 31:16 frac */
-	current_gfxclk_level->SsOn = dividers.ucPll_ss_enable;
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+				PHM_PlatformCaps_EngineSpreadSpectrumSupport))
+		current_gfxclk_level->SsOn = dividers.ucPll_ss_enable;
+	else
+		current_gfxclk_level->SsOn = 0;
 	current_gfxclk_level->SsFbMult =
 			cpu_to_le32(dividers.ulPll_ss_fbsmult);
 	current_gfxclk_level->SsSlewFrac =
-- 
cgit v1.2.3


From f47b77b4e4baeb7922b5652ae7040d4d5de684a7 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 2 May 2017 15:49:36 -0400
Subject: drm/amdgpu/gfx: drop max_gs_waves_per_vgt

We already have this info: max_gs_threads.  Drop the duplicate.

Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c9f935710d40..8033001b913a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -969,7 +969,6 @@ struct amdgpu_gfx_config {
 	unsigned num_rbs;
 	unsigned gs_vgt_table_depth;
 	unsigned gs_prim_buffer_depth;
-	unsigned max_gs_waves_per_vgt;
 
 	uint32_t tile_mode_array[32];
 	uint32_t macrotile_mode_array[16];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 4f2b6679f72f..1a7830a291d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -560,7 +560,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches;
 		dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth;
 		dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth;
-		dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_waves_per_vgt;
+		dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads;
 
 		return copy_to_user(out, &dev_info,
 				    min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5f2ae4cb510f..484ead2a20a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -786,7 +786,6 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
 		adev->gfx.config.gs_vgt_table_depth = 32;
 		adev->gfx.config.gs_prim_buffer_depth = 1792;
-		adev->gfx.config.max_gs_waves_per_vgt = 32;
 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	default:
-- 
cgit v1.2.3


From ad7d0ff3e79a100a24b66e8908a45402c20c3685 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 2 May 2017 16:15:06 -0400
Subject: drm/amdgpu/gfx9: derive tile pipes from golden settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rather than hardcoding it.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 484ead2a20a4..741b56f996c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -771,7 +771,6 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 		adev->gfx.config.max_shader_engines = 4;
-		adev->gfx.config.max_tile_pipes = 8; //??
 		adev->gfx.config.max_cu_per_sh = 16;
 		adev->gfx.config.max_sh_per_se = 1;
 		adev->gfx.config.max_backends_per_se = 4;
@@ -800,6 +799,10 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 					adev->gfx.config.gb_addr_config,
 					GB_ADDR_CONFIG,
 					NUM_PIPES);
+
+	adev->gfx.config.max_tile_pipes =
+		adev->gfx.config.gb_addr_config_fields.num_pipes;
+
 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
 			REG_GET_FIELD(
 					adev->gfx.config.gb_addr_config,
-- 
cgit v1.2.3


From af8baf1518d8b3d086ac6d11d8f6acd57e9cab99 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 3 May 2017 23:49:18 -0700
Subject: drm/amdgpu: Use less generic enum definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

alpha:allmodconfig fails to build as follows.

drivers/gpu/drm/amd/amdgpu/amdgpu.h:1006:2: error:
	expected identifier before '(' token
drivers/gpu/drm/amd/amdgpu/amdgpu.h:1011:28: error:
	'NGG_BUF_MAX' undeclared here

The problem is not really the enum definition of NGG_BUF_MAX but PARAM,
which happens to be defined differently for alpha and a couple of other
architectures.

Use less generic defines for NGG enums to solve the problem.

Fixes: bce23e00f3369 ("drm/amdgpu: add NGG parameters")
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 16 ++++++++--------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 28 ++++++++++++++--------------
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8033001b913a..86923c57908b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1004,10 +1004,10 @@ struct amdgpu_ngg_buf {
 };
 
 enum {
-	PRIM = 0,
-	POS,
-	CNTL,
-	PARAM,
+	NGG_PRIM = 0,
+	NGG_POS,
+	NGG_CNTL,
+	NGG_PARAM,
 	NGG_BUF_MAX
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 1a7830a291d6..96c341670782 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -545,14 +545,14 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			adev->gfx.config.double_offchip_lds_buf;
 
 		if (amdgpu_ngg) {
-			dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr;
-			dev_info.prim_buf_size = adev->gfx.ngg.buf[PRIM].size;
-			dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr;
-			dev_info.pos_buf_size = adev->gfx.ngg.buf[POS].size;
-			dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr;
-			dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[CNTL].size;
-			dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr;
-			dev_info.param_buf_size = adev->gfx.ngg.buf[PARAM].size;
+			dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr;
+			dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size;
+			dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr;
+			dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size;
+			dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr;
+			dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size;
+			dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr;
+			dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size;
 		}
 		dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size;
 		dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 741b56f996c4..0c16b7563b73 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -890,7 +890,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
 	adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size;
 
 	/* Primitive Buffer */
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PRIM],
+	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
 				    amdgpu_prim_buf_per_se,
 				    64 * 1024);
 	if (r) {
@@ -899,7 +899,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
 	}
 
 	/* Position Buffer */
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[POS],
+	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
 				    amdgpu_pos_buf_per_se,
 				    256 * 1024);
 	if (r) {
@@ -908,7 +908,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
 	}
 
 	/* Control Sideband */
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[CNTL],
+	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
 				    amdgpu_cntl_sb_buf_per_se,
 				    256);
 	if (r) {
@@ -920,7 +920,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
 	if (amdgpu_param_buf_per_se <= 0)
 		goto out;
 
-	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PARAM],
+	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
 				    amdgpu_param_buf_per_se,
 				    512 * 1024);
 	if (r) {
@@ -949,45 +949,45 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
 
 	/* Program buffer size */
 	data = 0;
-	size = adev->gfx.ngg.buf[PRIM].size / 256;
+	size = adev->gfx.ngg.buf[NGG_PRIM].size / 256;
 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size);
 
-	size = adev->gfx.ngg.buf[POS].size / 256;
+	size = adev->gfx.ngg.buf[NGG_POS].size / 256;
 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size);
 
 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
 
 	data = 0;
-	size = adev->gfx.ngg.buf[CNTL].size / 256;
+	size = adev->gfx.ngg.buf[NGG_CNTL].size / 256;
 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size);
 
-	size = adev->gfx.ngg.buf[PARAM].size / 1024;
+	size = adev->gfx.ngg.buf[NGG_PARAM].size / 1024;
 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size);
 
 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
 
 	/* Program buffer base address */
-	base = lower_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr);
+	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
 
-	base = upper_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr);
+	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
 
-	base = lower_32_bits(adev->gfx.ngg.buf[POS].gpu_addr);
+	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
 
-	base = upper_32_bits(adev->gfx.ngg.buf[POS].gpu_addr);
+	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
 
-	base = lower_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr);
+	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
 
-	base = upper_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr);
+	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
 
-- 
cgit v1.2.3


From 652bd0c3441d861402bf7347088c846b6799c5b8 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 3 May 2017 15:38:58 +0800
Subject: drm/amd/powerplay: delete dead code in powerplay.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 28 ------------------------
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h        |  2 --
 2 files changed, 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 4ad84530c88a..b3d42858f68b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -4522,32 +4522,6 @@ static int smu7_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type
 	return 0;
 }
 
-static int smu7_request_firmware(struct pp_hwmgr *hwmgr)
-{
-	int ret;
-	struct cgs_firmware_info info = {0};
-
-	ret = cgs_get_firmware_info(hwmgr->device,
-				    smu7_convert_fw_type_to_cgs(UCODE_ID_SMU),
-				    &info);
-	if (ret || !info.kptr)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int smu7_release_firmware(struct pp_hwmgr *hwmgr)
-{
-	int ret;
-
-	ret = cgs_rel_firmware(hwmgr->device,
-			       smu7_convert_fw_type_to_cgs(UCODE_ID_SMU));
-	if (ret)
-		return -EINVAL;
-
-	return 0;
-}
-
 static void smu7_find_min_clock_masks(struct pp_hwmgr *hwmgr,
 		uint32_t *sclk_mask, uint32_t *mclk_mask,
 		uint32_t min_sclk, uint32_t min_mclk)
@@ -4691,8 +4665,6 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
 	.get_clock_by_type = smu7_get_clock_by_type,
 	.read_sensor = smu7_read_sensor,
 	.dynamic_state_management_disable = smu7_disable_dpm_tasks,
-	.request_firmware = smu7_request_firmware,
-	.release_firmware = smu7_release_firmware,
 	.set_power_profile_state = smu7_set_power_profile_state,
 	.avfs_control = smu7_avfs_control,
 	.disable_smc_firmware_ctf = smu7_thermal_disable_alert,
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 98d38c31f6a6..e4574c215c38 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -368,8 +368,6 @@ struct pp_hwmgr_func {
 	int (*get_mclk_od)(struct pp_hwmgr *hwmgr);
 	int (*set_mclk_od)(struct pp_hwmgr *hwmgr, uint32_t value);
 	int (*read_sensor)(struct pp_hwmgr *hwmgr, int idx, void *value, int *size);
-	int (*request_firmware)(struct pp_hwmgr *hwmgr);
-	int (*release_firmware)(struct pp_hwmgr *hwmgr);
 	int (*set_power_profile_state)(struct pp_hwmgr *hwmgr,
 			struct amd_pp_profile *request);
 	int (*avfs_control)(struct pp_hwmgr *hwmgr, bool enable);
-- 
cgit v1.2.3


From 5784d5cca66b362e3588c189eada757cf664ae6c Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 4 May 2017 14:51:31 +0800
Subject: drm/amd/powerplay: Setup sw CTF to allow graceful exit when
 temperature exceeds maximum.

cherry-pick from amd windows driver.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c   | 73 +++++++++++++---------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
index 5da88ba4a53c..d5f53d04fa08 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
@@ -381,14 +381,10 @@ int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr)
 
 	temp = cgs_read_register(hwmgr->device, reg);
 
-	temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
-			CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
+	temp = (temp & CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP_MASK) >>
+			CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP__SHIFT;
 
-	/* Bit 9 means the reading is lower than the lowest usable value. */
-	if (temp & 0x200)
-		temp = VEGA10_THERMAL_MAXIMUM_TEMP_READING;
-	else
-		temp = temp & 0x1ff;
+	temp = temp & 0x1ff;
 
 	temp *= PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
@@ -424,23 +420,28 @@ static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr,
 			mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL);
 
 	val = cgs_read_register(hwmgr->device, reg);
-	val &= ~(THM_THERMAL_INT_CTRL__DIG_THERM_INTH_MASK);
-	val |= (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) <<
-			THM_THERMAL_INT_CTRL__DIG_THERM_INTH__SHIFT;
-	val &= ~(THM_THERMAL_INT_CTRL__DIG_THERM_INTL_MASK);
-	val |= (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) <<
-			THM_THERMAL_INT_CTRL__DIG_THERM_INTL__SHIFT;
+
+	val &= (~THM_THERMAL_INT_CTRL__MAX_IH_CREDIT_MASK);
+	val |=  (5 << THM_THERMAL_INT_CTRL__MAX_IH_CREDIT__SHIFT);
+
+	val &= (~THM_THERMAL_INT_CTRL__THERM_IH_HW_ENA_MASK);
+	val |= (1 << THM_THERMAL_INT_CTRL__THERM_IH_HW_ENA__SHIFT);
+
+	val &= (~THM_THERMAL_INT_CTRL__DIG_THERM_INTH_MASK);
+	val |= ((high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)
+			<< THM_THERMAL_INT_CTRL__DIG_THERM_INTH__SHIFT);
+
+	val &= (~THM_THERMAL_INT_CTRL__DIG_THERM_INTL_MASK);
+	val |= ((low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)
+			<< THM_THERMAL_INT_CTRL__DIG_THERM_INTL__SHIFT);
+
+	val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
+
 	cgs_write_register(hwmgr->device, reg, val);
 
 	reg = soc15_get_register_offset(THM_HWID, 0,
 			mmTHM_TCON_HTC_BASE_IDX, mmTHM_TCON_HTC);
 
-	val = cgs_read_register(hwmgr->device, reg);
-	val &= ~(THM_TCON_HTC__HTC_TMP_LMT_MASK);
-	val |= (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) <<
-			THM_TCON_HTC__HTC_TMP_LMT__SHIFT;
-	cgs_write_register(hwmgr->device, reg, val);
-
 	return 0;
 }
 
@@ -482,18 +483,28 @@ static int vega10_thermal_initialize(struct pp_hwmgr *hwmgr)
 static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr)
 {
 	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
+	uint32_t val = 0;
+	uint32_t reg;
 
 	if (data->smu_features[GNLD_FW_CTF].supported) {
 		if (data->smu_features[GNLD_FW_CTF].enabled)
 			printk("[Thermal_EnableAlert] FW CTF Already Enabled!\n");
+
+		PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+				true,
+				data->smu_features[GNLD_FW_CTF].smu_feature_bitmap),
+				"Attempt to Enable FW CTF feature Failed!",
+				return -1);
+		data->smu_features[GNLD_FW_CTF].enabled = true;
 	}
 
-	PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
-			true,
-			data->smu_features[GNLD_FW_CTF].smu_feature_bitmap),
-			"Attempt to Enable FW CTF feature Failed!",
-			return -1);
-	data->smu_features[GNLD_FW_CTF].enabled = true;
+	val |= (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT);
+	val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT);
+	val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT);
+
+	reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
+	cgs_write_register(hwmgr->device, reg, val);
+
 	return 0;
 }
 
@@ -504,18 +515,24 @@ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr)
 int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr)
 {
 	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
+	uint32_t reg;
 
 	if (data->smu_features[GNLD_FW_CTF].supported) {
 		if (!data->smu_features[GNLD_FW_CTF].enabled)
 			printk("[Thermal_EnableAlert] FW CTF Already disabled!\n");
-	}
 
-	PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+
+		PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
 			false,
 			data->smu_features[GNLD_FW_CTF].smu_feature_bitmap),
 			"Attempt to disable FW CTF feature Failed!",
 			return -1);
-	data->smu_features[GNLD_FW_CTF].enabled = false;
+		data->smu_features[GNLD_FW_CTF].enabled = false;
+	}
+
+	reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
+	cgs_write_register(hwmgr->device, reg, 0);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7b52db39a4c27c0020ff953a4bb0aa8bbe55e4a2 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 4 May 2017 13:32:01 +0800
Subject: drm/amd/powerplay: fix bug sclk/mclk level can't be set on vega10.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 61 +++++++++++-----------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index e24e54c294bd..85a6c12ad1d4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -4189,55 +4189,56 @@ static int vega10_force_clock_level(struct pp_hwmgr *hwmgr,
 
 	switch (type) {
 	case PP_SCLK:
-		if (data->registry_data.sclk_dpm_key_disabled)
-			break;
-
 		for (i = 0; i < 32; i++) {
 			if (mask & (1 << i))
 				break;
 		}
+		data->smc_state_table.gfx_boot_level = i;
 
-		PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter(
-				hwmgr->smumgr,
-				PPSMC_MSG_SetSoftMinGfxclkByIndex,
-				i),
-				"Failed to set soft min sclk index!",
-				return -1);
+		for (i = 31; i >= 0; i--) {
+			if (mask & (1 << i))
+				break;
+		}
+		data->smc_state_table.gfx_max_level = i;
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
+			"Failed to upload boot level to lowest!",
+			return -EINVAL);
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
+			"Failed to upload dpm max level to highest!",
+			return -EINVAL);
 		break;
 
 	case PP_MCLK:
-		if (data->registry_data.mclk_dpm_key_disabled)
-			break;
-
 		for (i = 0; i < 32; i++) {
 			if (mask & (1 << i))
 				break;
 		}
 
-		PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter(
-				hwmgr->smumgr,
-				PPSMC_MSG_SetSoftMinUclkByIndex,
-				i),
-				"Failed to set soft min mclk index!",
-				return -1);
-		break;
-
-	case PP_PCIE:
-		if (data->registry_data.pcie_dpm_key_disabled)
-			break;
-
 		for (i = 0; i < 32; i++) {
 			if (mask & (1 << i))
 				break;
 		}
+		data->smc_state_table.mem_boot_level = i;
+
+		for (i = 31; i >= 0; i--) {
+			if (mask & (1 << i))
+				break;
+		}
+		data->smc_state_table.mem_max_level = i;
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
+			"Failed to upload boot level to lowest!",
+			return -EINVAL);
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
+			"Failed to upload dpm max level to highest!",
+			return -EINVAL);
 
-		PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter(
-				hwmgr->smumgr,
-				PPSMC_MSG_SetMinLinkDpmByIndex,
-				i),
-				"Failed to set min pcie index!",
-				return -1);
 		break;
+
+	case PP_PCIE:
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From db2c2a9798d266fd1d7c950b3937793b05533bf4 Mon Sep 17 00:00:00 2001
From: Pixel Ding <Pixel.Ding@amd.com>
Date: Tue, 25 Apr 2017 16:47:42 +0800
Subject: drm/amdgpu: fix mutex list null pointer reference

Fix NULL pointer reference.

Signed-off-by: Pixel Ding <Pixel.Ding@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 367811cd0763..43ca16b6eee2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2065,7 +2065,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 
 	DRM_INFO("amdgpu: finishing device.\n");
 	adev->shutdown = true;
-	drm_crtc_force_disable_all(adev->ddev);
+	if (adev->mode_info.mode_config_initialized)
+		drm_crtc_force_disable_all(adev->ddev);
 	/* evict vram memory */
 	amdgpu_bo_evict_vram(adev);
 	amdgpu_ib_pool_fini(adev);
-- 
cgit v1.2.3


From 25e78531268e9240fc594ce76587601b873d37c9 Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Fri, 5 May 2017 14:17:15 -0500
Subject: ibmvscsis: Do not send aborted task response

The driver is sending a response to the actual scsi op that was
aborted by an abort task TM, while LIO is sending a response to
the abort task TM.

ibmvscsis_tgt does not send the response to the client until
release_cmd time. The reason for this was because if we did it
at queue_status time, then the client would be free to reuse the
tag for that command, but we're still using the tag until the
command is released at release_cmd time, so we chose to delay
sending the response until then. That then caused this issue, because
release_cmd is always called, even if queue_status is not.

SCSI spec says that the initiator that sends the abort task
TM NEVER gets a response to the aborted op and with the current
code it will send a response. Thus this fix will remove that response
if the CMD_T_ABORTED && !CMD_T_TAS.

Another case with a small timing window is the case where if LIO sends a
TMR_DOES_NOT_EXIST, and the release_cmd callback is called for the TMR Abort
cmd before the release_cmd for the (attemped) aborted cmd, then we need to
ensure that we send the response for the (attempted) abort cmd to the client
before we send the response for the TMR Abort cmd.

Cc: <stable@vger.kernel.org> # v4.8+
Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Michael Cyr <mikecyr@linux.vnet.ibm.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 114 ++++++++++++++++++++++++-------
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h |   2 +
 2 files changed, 91 insertions(+), 25 deletions(-)

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index 0f807798c624..d390325c99ec 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -1170,6 +1170,7 @@ static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi)
 		cmd = list_first_entry_or_null(&vscsi->free_cmd,
 					       struct ibmvscsis_cmd, list);
 		if (cmd) {
+			cmd->flags &= ~(DELAY_SEND);
 			list_del(&cmd->list);
 			cmd->iue = iue;
 			cmd->type = UNSET_TYPE;
@@ -1749,45 +1750,79 @@ static void srp_snd_msg_failed(struct scsi_info *vscsi, long rc)
 static void ibmvscsis_send_messages(struct scsi_info *vscsi)
 {
 	u64 msg_hi = 0;
-	/* note do not attmempt to access the IU_data_ptr with this pointer
+	/* note do not attempt to access the IU_data_ptr with this pointer
 	 * it is not valid
 	 */
 	struct viosrp_crq *crq = (struct viosrp_crq *)&msg_hi;
 	struct ibmvscsis_cmd *cmd, *nxt;
 	struct iu_entry *iue;
 	long rc = ADAPT_SUCCESS;
+	bool retry = false;
 
 	if (!(vscsi->flags & RESPONSE_Q_DOWN)) {
-		list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) {
-			iue = cmd->iue;
+		do {
+			retry = false;
+			list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp,
+						 list) {
+				/*
+				 * Check to make sure abort cmd gets processed
+				 * prior to the abort tmr cmd
+				 */
+				if (cmd->flags & DELAY_SEND)
+					continue;
 
-			crq->valid = VALID_CMD_RESP_EL;
-			crq->format = cmd->rsp.format;
+				if (cmd->abort_cmd) {
+					retry = true;
+					cmd->abort_cmd->flags &= ~(DELAY_SEND);
+				}
 
-			if (cmd->flags & CMD_FAST_FAIL)
-				crq->status = VIOSRP_ADAPTER_FAIL;
+				/*
+				 * If CMD_T_ABORTED w/o CMD_T_TAS scenarios and
+				 * the case where LIO issued a
+				 * ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST
+				 * case then we dont send a response, since it
+				 * was already done.
+				 */
+				if (cmd->se_cmd.transport_state & CMD_T_ABORTED &&
+				    !(cmd->se_cmd.transport_state & CMD_T_TAS)) {
+					list_del(&cmd->list);
+					ibmvscsis_free_cmd_resources(vscsi,
+								     cmd);
+				} else {
+					iue = cmd->iue;
 
-			crq->IU_length = cpu_to_be16(cmd->rsp.len);
+					crq->valid = VALID_CMD_RESP_EL;
+					crq->format = cmd->rsp.format;
 
-			rc = h_send_crq(vscsi->dma_dev->unit_address,
-					be64_to_cpu(msg_hi),
-					be64_to_cpu(cmd->rsp.tag));
+					if (cmd->flags & CMD_FAST_FAIL)
+						crq->status = VIOSRP_ADAPTER_FAIL;
 
-			pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n",
-				 cmd, be64_to_cpu(cmd->rsp.tag), rc);
+					crq->IU_length = cpu_to_be16(cmd->rsp.len);
 
-			/* if all ok free up the command element resources */
-			if (rc == H_SUCCESS) {
-				/* some movement has occurred */
-				vscsi->rsp_q_timer.timer_pops = 0;
-				list_del(&cmd->list);
+					rc = h_send_crq(vscsi->dma_dev->unit_address,
+							be64_to_cpu(msg_hi),
+							be64_to_cpu(cmd->rsp.tag));
 
-				ibmvscsis_free_cmd_resources(vscsi, cmd);
-			} else {
-				srp_snd_msg_failed(vscsi, rc);
-				break;
+					pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n",
+						 cmd, be64_to_cpu(cmd->rsp.tag), rc);
+
+					/* if all ok free up the command
+					 * element resources
+					 */
+					if (rc == H_SUCCESS) {
+						/* some movement has occurred */
+						vscsi->rsp_q_timer.timer_pops = 0;
+						list_del(&cmd->list);
+
+						ibmvscsis_free_cmd_resources(vscsi,
+									     cmd);
+					} else {
+						srp_snd_msg_failed(vscsi, rc);
+						break;
+					}
+				}
 			}
-		}
+		} while (retry);
 
 		if (!rc) {
 			/*
@@ -2708,6 +2743,7 @@ static int ibmvscsis_alloc_cmds(struct scsi_info *vscsi, int num)
 
 	for (i = 0, cmd = (struct ibmvscsis_cmd *)vscsi->cmd_pool; i < num;
 	     i++, cmd++) {
+		cmd->abort_cmd = NULL;
 		cmd->adapter = vscsi;
 		INIT_WORK(&cmd->work, ibmvscsis_scheduler);
 		list_add_tail(&cmd->list, &vscsi->free_cmd);
@@ -3579,9 +3615,20 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd)
 {
 	struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd,
 						 se_cmd);
+	struct scsi_info *vscsi = cmd->adapter;
 	struct iu_entry *iue = cmd->iue;
 	int rc;
 
+	/*
+	 * If CLIENT_FAILED OR RESPONSE_Q_DOWN, then just return success
+	 * since LIO can't do anything about it, and we dont want to
+	 * attempt an srp_transfer_data.
+	 */
+	if ((vscsi->flags & (CLIENT_FAILED | RESPONSE_Q_DOWN))) {
+		pr_err("write_pending failed since: %d\n", vscsi->flags);
+		return 0;
+	}
+
 	rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma,
 			       1, 1);
 	if (rc) {
@@ -3660,11 +3707,28 @@ static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd)
 	struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd,
 						 se_cmd);
 	struct scsi_info *vscsi = cmd->adapter;
+	struct ibmvscsis_cmd *cmd_itr;
+	struct iu_entry *iue = iue = cmd->iue;
+	struct srp_tsk_mgmt *srp_tsk = &vio_iu(iue)->srp.tsk_mgmt;
+	u64 tag_to_abort = be64_to_cpu(srp_tsk->task_tag);
 	uint len;
 
 	pr_debug("queue_tm_rsp %p, status %d\n",
 		 se_cmd, (int)se_cmd->se_tmr_req->response);
 
+	if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK &&
+	    cmd->se_cmd.se_tmr_req->response == TMR_TASK_DOES_NOT_EXIST) {
+		spin_lock_bh(&vscsi->intr_lock);
+		list_for_each_entry(cmd_itr, &vscsi->active_q, list) {
+			if (tag_to_abort == cmd_itr->se_cmd.tag) {
+				cmd_itr->abort_cmd = cmd;
+				cmd->flags |= DELAY_SEND;
+				break;
+			}
+		}
+		spin_unlock_bh(&vscsi->intr_lock);
+	}
+
 	srp_build_response(vscsi, cmd, &len);
 	cmd->rsp.format = SRP_FORMAT;
 	cmd->rsp.len = len;
@@ -3672,8 +3736,8 @@ static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd)
 
 static void ibmvscsis_aborted_task(struct se_cmd *se_cmd)
 {
-	/* TBD: What (if anything) should we do here? */
-	pr_debug("ibmvscsis_aborted_task %p\n", se_cmd);
+	pr_debug("ibmvscsis_aborted_task %p task_tag: %llu\n",
+		 se_cmd, se_cmd->tag);
 }
 
 static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf,
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
index 65c6189885ab..b4391a8de456 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
@@ -168,10 +168,12 @@ struct ibmvscsis_cmd {
 	struct iu_rsp rsp;
 	struct work_struct work;
 	struct scsi_info *adapter;
+	struct ibmvscsis_cmd *abort_cmd;
 	/* Sense buffer that will be mapped into outgoing status */
 	unsigned char sense_buf[TRANSPORT_SENSE_BUFFER];
 	u64 init_time;
 #define CMD_FAST_FAIL	BIT(0)
+#define DELAY_SEND	BIT(1)
 	u32 flags;
 	char type;
 };
-- 
cgit v1.2.3


From 60854a12d281e2fa25662fa32ac8022bbff17432 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 May 2017 21:51:16 -0700
Subject: x86/boot: Declare error() as noreturn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The compressed boot function error() is used to halt execution, but it
wasn't marked with "noreturn". This fixes that in preparation for
supporting kernel FORTIFY_SOURCE, which uses the noreturn annotation
on panic, and calls error(). GCC would warn about a noreturn function
calling a non-noreturn function:

  arch/x86/boot/compressed/misc.c: In function ‘fortify_panic’:
  arch/x86/boot/compressed/misc.c:416:1: warning: ‘noreturn’ function does return
   }
 ^

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/20170506045116.GA2879@beast
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/error.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
index 2e59dac07f9e..d732e608e3af 100644
--- a/arch/x86/boot/compressed/error.h
+++ b/arch/x86/boot/compressed/error.h
@@ -1,7 +1,9 @@
 #ifndef BOOT_COMPRESSED_ERROR_H
 #define BOOT_COMPRESSED_ERROR_H
 
+#include <linux/compiler.h>
+
 void warn(char *m);
-void error(char *m);
+void error(char *m) __noreturn;
 
 #endif /* BOOT_COMPRESSED_ERROR_H */
-- 
cgit v1.2.3


From 873cb582738fde338ecaeaca594560cde2ba42c3 Mon Sep 17 00:00:00 2001
From: Benjamin Valentin <benpicco@googlemail.com>
Date: Sun, 7 May 2017 14:45:08 -0700
Subject: Input: xpad - sort supported devices by USB ID

Some entries in the table of supported devices are out of order.
To not create a mess when adding new ones using a script, sort them first.

Signed-off-by: Benjamin Valentin <benpicco@googlemail.com>
Reviewed-by: Cameron Gutman <aicommander@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index df83fdc6c0e7..f600f6662063 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -126,19 +126,19 @@ static const struct xpad_device {
 	u8 mapping;
 	u8 xtype;
 } xpad_device[] = {
+	{ 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
+	{ 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 },
 	{ 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX },
 	{ 0x045e, 0x0285, "Microsoft X-Box pad (Japan)", 0, XTYPE_XBOX },
 	{ 0x045e, 0x0287, "Microsoft Xbox Controller S", 0, XTYPE_XBOX },
 	{ 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX },
 	{ 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
+	{ 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
 	{ 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE },
 	{ 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE },
 	{ 0x045e, 0x02e3, "Microsoft X-Box One Elite pad", 0, XTYPE_XBOXONE },
 	{ 0x045e, 0x02ea, "Microsoft X-Box One S pad", 0, XTYPE_XBOXONE },
-	{ 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
 	{ 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
-	{ 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
-	{ 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 },
 	{ 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 },
 	{ 0x046d, 0xc21e, "Logitech Gamepad F510", 0, XTYPE_XBOX360 },
 	{ 0x046d, 0xc21f, "Logitech Gamepad F710", 0, XTYPE_XBOX360 },
@@ -180,10 +180,10 @@ static const struct xpad_device {
 	{ 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0113, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0139, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE },
+	{ 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE },
 	{ 0x0e6f, 0x0201, "Pelican PL-3601 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
-	{ 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE },
 	{ 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 },
 	{ 0x0e8f, 0x0201, "SmartJoy Frag Xpad/PS2 adaptor", 0, XTYPE_XBOX },
@@ -209,8 +209,6 @@ static const struct xpad_device {
 	{ 0x162e, 0xbeef, "Joytech Neo-Se Take2", 0, XTYPE_XBOX360 },
 	{ 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 },
 	{ 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 },
-	{ 0x24c6, 0x542a, "Xbox ONE spectra", 0, XTYPE_XBOXONE },
-	{ 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 },
@@ -224,12 +222,14 @@ static const struct xpad_device {
 	{ 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x541a, "PowerA Xbox One Mini Wired Controller", 0, XTYPE_XBOXONE },
+	{ 0x24c6, 0x542a, "Xbox ONE spectra", 0, XTYPE_XBOXONE },
 	{ 0x24c6, 0x543a, "PowerA Xbox One wired controller", 0, XTYPE_XBOXONE },
 	{ 0x24c6, 0x5500, "Hori XBOX 360 EX 2 with Turbo", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5501, "Hori Real Arcade Pro VX-SA", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 },
 	{ 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
 	{ 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN }
 };
-- 
cgit v1.2.3


From 44bc722593201da43862b7200ee0b98155410b07 Mon Sep 17 00:00:00 2001
From: Benjamin Valentin <benpicco@googlemail.com>
Date: Sun, 7 May 2017 14:48:14 -0700
Subject: Input: xpad - sync supported devices with xboxdrv

The userspace xboxdrv driver [0] contains some USB IDs unknown to the
kernel driver. I have created a simple script [1] to extract the missing
devices and add them to xpad.

A quick google search confirmed that all the new devices called
Fightstick/pad are Arcade-type devices [2] where the
MAP_TRIGGERS_TO_BUTTONS option should apply.

There are some similar devices in the existing device table where this
flag is not set, but I did refrain from changing those.

[0] https://github.com/xboxdrv/xboxdrv/blob/stable/src/xpad_device.cpp
[1] http://codepad.org/CHV98BNH
[2] https://www.google.com/search?q=SFxT+Fightstick+Pro&tbm=isch

Signed-off-by: Benjamin Valentin <benpicco@googlemail.com>
Reviewed-by: Cameron Gutman <aicommander@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index f600f6662063..16338f0b004f 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -145,6 +145,7 @@ static const struct xpad_device {
 	{ 0x046d, 0xc242, "Logitech Chillstream Controller", 0, XTYPE_XBOX360 },
 	{ 0x046d, 0xca84, "Logitech Xbox Cordless Controller", 0, XTYPE_XBOX },
 	{ 0x046d, 0xca88, "Logitech Compact Controller for Xbox", 0, XTYPE_XBOX },
+	{ 0x056e, 0x2004, "Elecom JC-U3613M", 0, XTYPE_XBOX360 },
 	{ 0x05fd, 0x1007, "Mad Catz Controller (unverified)", 0, XTYPE_XBOX },
 	{ 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX },
 	{ 0x0738, 0x4516, "Mad Catz Control Pad", 0, XTYPE_XBOX },
@@ -179,6 +180,7 @@ static const struct xpad_device {
 	{ 0x0e6f, 0x0006, "Edge wireless Controller", 0, XTYPE_XBOX },
 	{ 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0113, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
+	{ 0x0e6f, 0x011f, "Rock Candy Gamepad Wired Controller", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0139, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE },
 	{ 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE },
 	{ 0x0e6f, 0x0201, "Pelican PL-3601 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 },
@@ -186,6 +188,7 @@ static const struct xpad_device {
 	{ 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 },
 	{ 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 },
+	{ 0x0e6f, 0x0413, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
 	{ 0x0e8f, 0x0201, "SmartJoy Frag Xpad/PS2 adaptor", 0, XTYPE_XBOX },
 	{ 0x0e8f, 0x3008, "Generic xbox control (dealextreme)", 0, XTYPE_XBOX },
 	{ 0x0f0d, 0x000a, "Hori Co. DOA4 FightStick", 0, XTYPE_XBOX360 },
@@ -212,21 +215,30 @@ static const struct xpad_device {
 	{ 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf018, "Mad Catz Street Fighter IV SE Fighting Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf021, "Mad Cats Ghost Recon FS GamePad", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf023, "MLG Pro Circuit Controller (Xbox)", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf028, "Street Fighter IV FightPad", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf02e, "Mad Catz Fightpad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf038, "Street Fighter IV FightStick TE", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf03a, "Mad Catz SFxT Fightstick Pro", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf900, "Harmonix Xbox 360 Controller", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf901, "Gamestop Xbox 360 Controller", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf903, "Tron Xbox 360 controller", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x531a, "PowerA Pro Ex", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x5397, "FUS1ON Tournament Controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x541a, "PowerA Xbox One Mini Wired Controller", 0, XTYPE_XBOXONE },
 	{ 0x24c6, 0x542a, "Xbox ONE spectra", 0, XTYPE_XBOXONE },
 	{ 0x24c6, 0x543a, "PowerA Xbox One wired controller", 0, XTYPE_XBOXONE },
 	{ 0x24c6, 0x5500, "Hori XBOX 360 EX 2 with Turbo", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5501, "Hori Real Arcade Pro VX-SA", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x5503, "Hori Fighting Edge", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 },
+	{ 0x24c6, 0x550d, "Hori GEM Xbox controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 },
 	{ 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 },
@@ -316,6 +328,7 @@ static struct usb_device_id xpad_table[] = {
 	XPAD_XBOX360_VENDOR(0x045e),		/* Microsoft X-Box 360 controllers */
 	XPAD_XBOXONE_VENDOR(0x045e),		/* Microsoft X-Box One controllers */
 	XPAD_XBOX360_VENDOR(0x046d),		/* Logitech X-Box 360 style controllers */
+	XPAD_XBOX360_VENDOR(0x056e),		/* Elecom JC-U3613M */
 	XPAD_XBOX360_VENDOR(0x0738),		/* Mad Catz X-Box 360 controllers */
 	{ USB_DEVICE(0x0738, 0x4540) },		/* Mad Catz Beat Pad */
 	XPAD_XBOXONE_VENDOR(0x0738),		/* Mad Catz FightStick TE 2 */
-- 
cgit v1.2.3


From 4706aa075662fe3cad29c3f49b50878de53f4f3b Mon Sep 17 00:00:00 2001
From: Benjamin Valentin <benpicco@googlemail.com>
Date: Sun, 7 May 2017 14:49:09 -0700
Subject: Input: xpad - add USB IDs for Mad Catz Brawlstick and Razer
 Sabertooth

Add USB IDs for two more Xbox 360 controllers.

I found them in the pull requests for the xboxdrv userspace driver, which
seems abandoned.

Thanks to psychogony and mkaito for reporting the IDs there!

Signed-off-by: Benjamin Valentin <benpicco@googlemail.com>
Reviewed-by: Cameron Gutman <aicommander@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 16338f0b004f..def96cd2479b 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -212,10 +212,12 @@ static const struct xpad_device {
 	{ 0x162e, 0xbeef, "Joytech Neo-Se Take2", 0, XTYPE_XBOX360 },
 	{ 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 },
 	{ 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 },
+	{ 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf018, "Mad Catz Street Fighter IV SE Fighting Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+	{ 0x1bad, 0xf019, "Mad Catz Brawlstick for Xbox 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf021, "Mad Cats Ghost Recon FS GamePad", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf023, "MLG Pro Circuit Controller (Xbox)", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0xf028, "Street Fighter IV FightPad", 0, XTYPE_XBOX360 },
-- 
cgit v1.2.3


From f55813b4d8bfc9f35fda87bc1e21b7f26835fc5c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 4 May 2017 10:14:57 +0900
Subject: kbuild: dtbinst: remove unnecessary __dtbs_install_prep target

Since commit 5399eb9b3908 ("dtbsinstall: don't move target directory
out of the way"), the target __dtbs_install_prep is invoked just for
creating the install directory, but all the necessary directories
are automatically created by:
   cmd_dtb_install = mkdir -p $(2); cp $< $(2)

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Makefile.dtbinst | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst
index a1be75d0a5fd..34614a48b717 100644
--- a/scripts/Makefile.dtbinst
+++ b/scripts/Makefile.dtbinst
@@ -20,12 +20,6 @@ include include/config/auto.conf
 include scripts/Kbuild.include
 include $(src)/Makefile
 
-PHONY += __dtbs_install_prep
-__dtbs_install_prep:
-ifeq ("$(dtbinst-root)", "$(obj)")
-	$(Q)mkdir -p $(INSTALL_DTBS_PATH)
-endif
-
 dtbinst-files	:= $(dtb-y)
 dtbinst-dirs	:= $(dts-dirs)
 
@@ -35,8 +29,6 @@ quiet_cmd_dtb_install =	INSTALL $<
 
 install-dir = $(patsubst $(dtbinst-root)%,$(INSTALL_DTBS_PATH)%,$(obj))
 
-$(dtbinst-files) $(dtbinst-dirs): | __dtbs_install_prep
-
 $(dtbinst-files): %.dtb: $(obj)/%.dtb
 	$(call cmd,dtb_install,$(install-dir))
 
-- 
cgit v1.2.3


From 59ac9c078141b8fd0186c0b18660a1b2c24e724e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 4 May 2017 15:50:47 -0700
Subject: target/fileio: Fix zero-length READ and WRITE handling

This patch fixes zero-length READ and WRITE handling in target/FILEIO,
which was broken a long time back by:

Since:

  commit d81cb44726f050d7cf1be4afd9cb45d153b52066
  Author: Paolo Bonzini <pbonzini@redhat.com>
  Date:   Mon Sep 17 16:36:11 2012 -0700

      target: go through normal processing for all zero-length commands

which moved zero-length READ and WRITE completion out of target-core,
to doing submission into backend driver code.

To address this, go ahead and invoke target_complete_cmd() for any
non negative return value in fd_do_rw().

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Cc: <stable@vger.kernel.org> # v3.7+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_file.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 1bf6c31e4c21..73b8f93a5fef 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -608,8 +608,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	if (ret < 0)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	if (ret)
-		target_complete_cmd(cmd, SAM_STAT_GOOD);
+	target_complete_cmd(cmd, SAM_STAT_GOOD);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 55d694275f41a1c0eef4ef49044ff29bc3999490 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 4 May 2017 15:50:53 -0700
Subject: IB/srpt: Fix abort handling

Let the target core check the CMD_T_ABORTED flag instead of the SRP
target driver. Hence remove the transport_check_aborted_status()
call. Since state == SRPT_STATE_CMD_RSP_SENT is something that really
should not happen, do not try to recover if srpt_queue_response() is
called for an I/O context that is in that state. This patch is a bug
fix because the srpt_abort_cmd() call is misplaced - if that function
is called from srpt_queue_response() it should either be called
before the command state is changed or after the response has been
sent.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/infiniband/ulp/srpt/ib_srpt.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 7e314c2f2071..36d15da7a395 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -2302,12 +2302,8 @@ static void srpt_queue_response(struct se_cmd *cmd)
 	}
 	spin_unlock_irqrestore(&ioctx->spinlock, flags);
 
-	if (unlikely(transport_check_aborted_status(&ioctx->cmd, false)
-		     || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) {
-		atomic_inc(&ch->req_lim_delta);
-		srpt_abort_cmd(ioctx);
+	if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)))
 		return;
-	}
 
 	/* For read commands, transfer the data to the initiator. */
 	if (ioctx->cmd.data_direction == DMA_FROM_DEVICE &&
-- 
cgit v1.2.3


From bd2c52d733f126ff75f99c537a27655b2db07e28 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 4 May 2017 15:50:54 -0700
Subject: IB/srpt: Avoid that aborting a command triggers a kernel warning

Avoid that the following warning is triggered:

WARNING: CPU: 10 PID: 166 at ../drivers/infiniband/ulp/srpt/ib_srpt.c:2674 srpt_release_cmd+0x139/0x140 [ib_srpt]
CPU: 10 PID: 166 Comm: kworker/u24:8 Not tainted 4.9.4-1-default #1
Workqueue: tmr-fileio target_tmr_work [target_core_mod]
Call Trace:
 [<ffffffffaa3c4f70>] dump_stack+0x63/0x83
 [<ffffffffaa0844eb>] __warn+0xcb/0xf0
 [<ffffffffaa0845dd>] warn_slowpath_null+0x1d/0x20
 [<ffffffffc06ba429>] srpt_release_cmd+0x139/0x140 [ib_srpt]
 [<ffffffffc06e4377>] target_release_cmd_kref+0xb7/0x120 [target_core_mod]
 [<ffffffffc06e4d7f>] target_put_sess_cmd+0x2f/0x60 [target_core_mod]
 [<ffffffffc06e15e0>] core_tmr_lun_reset+0x340/0x790 [target_core_mod]
 [<ffffffffc06e4816>] target_tmr_work+0xe6/0x140 [target_core_mod]
 [<ffffffffaa09e4d3>] process_one_work+0x1f3/0x4d0
 [<ffffffffaa09e7f8>] worker_thread+0x48/0x4e0
 [<ffffffffaa09e7b0>] ? process_one_work+0x4d0/0x4d0
 [<ffffffffaa0a46da>] kthread+0xca/0xe0
 [<ffffffffaa0a4610>] ? kthread_park+0x60/0x60
 [<ffffffffaa71b775>] ret_from_fork+0x25/0x30

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/infiniband/ulp/srpt/ib_srpt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 36d15da7a395..ee026b6b4f0d 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -2685,7 +2685,8 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
 	struct srpt_rdma_ch *ch = ioctx->ch;
 	unsigned long flags;
 
-	WARN_ON(ioctx->state != SRPT_STATE_DONE);
+	WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE &&
+		     !(ioctx->cmd.transport_state & CMD_T_ABORTED));
 
 	if (ioctx->n_rw_ctx) {
 		srpt_free_rw_ctxs(ch, ioctx);
-- 
cgit v1.2.3


From 51d638b1f56a0bfd9219800620994794a1a2b219 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Sun, 7 May 2017 00:14:22 -0700
Subject: block/mq: fix potential deadlock during cpu hotplug

This can be triggered by hot-unplug one cpu.

======================================================
 [ INFO: possible circular locking dependency detected ]
 4.11.0+ #17 Not tainted
 -------------------------------------------------------
 step_after_susp/2640 is trying to acquire lock:
  (all_q_mutex){+.+...}, at: [<ffffffffb33f95b8>] blk_mq_queue_reinit_work+0x18/0x110

 but task is already holding lock:
  (cpu_hotplug.lock){+.+.+.}, at: [<ffffffffb306d04f>] cpu_hotplug_begin+0x7f/0xe0

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #1 (cpu_hotplug.lock){+.+.+.}:
        lock_acquire+0x11c/0x230
        __mutex_lock+0x92/0x990
        mutex_lock_nested+0x1b/0x20
        get_online_cpus+0x64/0x80
        blk_mq_init_allocated_queue+0x3a0/0x4e0
        blk_mq_init_queue+0x3a/0x60
        loop_add+0xe5/0x280
        loop_init+0x124/0x177
        do_one_initcall+0x53/0x1c0
        kernel_init_freeable+0x1e3/0x27f
        kernel_init+0xe/0x100
        ret_from_fork+0x31/0x40

 -> #0 (all_q_mutex){+.+...}:
        __lock_acquire+0x189a/0x18a0
        lock_acquire+0x11c/0x230
        __mutex_lock+0x92/0x990
        mutex_lock_nested+0x1b/0x20
        blk_mq_queue_reinit_work+0x18/0x110
        blk_mq_queue_reinit_dead+0x1c/0x20
        cpuhp_invoke_callback+0x1f2/0x810
        cpuhp_down_callbacks+0x42/0x80
        _cpu_down+0xb2/0xe0
        freeze_secondary_cpus+0xb6/0x390
        suspend_devices_and_enter+0x3b3/0xa40
        pm_suspend+0x129/0x490
        state_store+0x82/0xf0
        kobj_attr_store+0xf/0x20
        sysfs_kf_write+0x45/0x60
        kernfs_fop_write+0x135/0x1c0
        __vfs_write+0x37/0x160
        vfs_write+0xcd/0x1d0
        SyS_write+0x58/0xc0
        do_syscall_64+0x8f/0x710
        return_from_SYSCALL_64+0x0/0x7a

 other info that might help us debug this:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(cpu_hotplug.lock);
                                lock(all_q_mutex);
                                lock(cpu_hotplug.lock);
   lock(all_q_mutex);

  *** DEADLOCK ***

 8 locks held by step_after_susp/2640:
  #0:  (sb_writers#6){.+.+.+}, at: [<ffffffffb3244aed>] vfs_write+0x1ad/0x1d0
  #1:  (&of->mutex){+.+.+.}, at: [<ffffffffb32d3a51>] kernfs_fop_write+0x101/0x1c0
  #2:  (s_active#166){.+.+.+}, at: [<ffffffffb32d3a59>] kernfs_fop_write+0x109/0x1c0
  #3:  (pm_mutex){+.+...}, at: [<ffffffffb30d2ecd>] pm_suspend+0x21d/0x490
  #4:  (acpi_scan_lock){+.+.+.}, at: [<ffffffffb34dc3d7>] acpi_scan_lock_acquire+0x17/0x20
  #5:  (cpu_add_remove_lock){+.+.+.}, at: [<ffffffffb306d6d7>] freeze_secondary_cpus+0x27/0x390
  #6:  (cpu_hotplug.dep_map){++++++}, at: [<ffffffffb306cfd5>] cpu_hotplug_begin+0x5/0xe0
  #7:  (cpu_hotplug.lock){+.+.+.}, at: [<ffffffffb306d04f>] cpu_hotplug_begin+0x7f/0xe0

 stack backtrace:
 CPU: 3 PID: 2640 Comm: step_after_susp Not tainted 4.11.0+ #17
 Hardware name: Dell Inc. OptiPlex 7040/0JCTF8, BIOS 1.4.9 09/12/2016
 Call Trace:
  dump_stack+0x99/0xce
  print_circular_bug+0x1fa/0x270
  __lock_acquire+0x189a/0x18a0
  lock_acquire+0x11c/0x230
  ? lock_acquire+0x11c/0x230
  ? blk_mq_queue_reinit_work+0x18/0x110
  ? blk_mq_queue_reinit_work+0x18/0x110
  __mutex_lock+0x92/0x990
  ? blk_mq_queue_reinit_work+0x18/0x110
  ? kmem_cache_free+0x2cb/0x330
  ? anon_transport_class_unregister+0x20/0x20
  ? blk_mq_queue_reinit_work+0x110/0x110
  mutex_lock_nested+0x1b/0x20
  ? mutex_lock_nested+0x1b/0x20
  blk_mq_queue_reinit_work+0x18/0x110
  blk_mq_queue_reinit_dead+0x1c/0x20
  cpuhp_invoke_callback+0x1f2/0x810
  ? __flow_cache_shrink+0x160/0x160
  cpuhp_down_callbacks+0x42/0x80
  _cpu_down+0xb2/0xe0
  freeze_secondary_cpus+0xb6/0x390
  suspend_devices_and_enter+0x3b3/0xa40
  ? rcu_read_lock_sched_held+0x79/0x80
  pm_suspend+0x129/0x490
  state_store+0x82/0xf0
  kobj_attr_store+0xf/0x20
  sysfs_kf_write+0x45/0x60
  kernfs_fop_write+0x135/0x1c0
  __vfs_write+0x37/0x160
  ? rcu_read_lock_sched_held+0x79/0x80
  ? rcu_sync_lockdep_assert+0x2f/0x60
  ? __sb_start_write+0xd9/0x1c0
  ? vfs_write+0x1ad/0x1d0
  vfs_write+0xcd/0x1d0
  SyS_write+0x58/0xc0
  ? rcu_read_lock_sched_held+0x79/0x80
  do_syscall_64+0x8f/0x710
  ? trace_hardirqs_on_thunk+0x1a/0x1c
  entry_SYSCALL64_slow_path+0x25/0x25

The cpu hotplug path will hold cpu_hotplug.lock and then reinit all exiting
queues for blk mq w/ all_q_mutex, however, blk_mq_init_allocated_queue() will
contend these two locks in the inversion order. This is due to commit eabe06595d62
(blk/mq: Cure cpu hotplug lock inversion), it fixes a cpu hotplug lock inversion
issue because of hotplug rework, however the hotplug rework is still work-in-progress
and lives in a -tip branch and mainline cannot yet trigger that splat. The commit
breaks the linus's tree in the merge window, so this patch reverts the lock order
and avoids to splat linus's tree.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5d4ce7eb8dbf..a7e6b353e4e9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2341,15 +2341,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 
-	mutex_lock(&all_q_mutex);
 	get_online_cpus();
+	mutex_lock(&all_q_mutex);
 
 	list_add_tail(&q->all_q_node, &all_q_list);
 	blk_mq_add_queue_tag_set(set, q);
 	blk_mq_map_swqueue(q, cpu_online_mask);
 
-	put_online_cpus();
 	mutex_unlock(&all_q_mutex);
+	put_online_cpus();
 
 	if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
 		int ret;
-- 
cgit v1.2.3


From 629b1b2e0e6c8285fdc21624af60e8190fa4417e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 7 May 2017 16:14:44 +0200
Subject: lightnvm: remove unused rq parameter of nvme_nvm_rqtocmd() to kill
 warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With gcc 4.1.2:

    drivers/nvme/host/lightnvm.c: In function ‘nvme_nvm_submit_io’:
    drivers/nvme/host/lightnvm.c:498: warning: ‘rq’ is used uninitialized in this function

Indeed, since commit 2e13f33a2464fc3a ("lightnvm: create cmd before
allocating request"), the request is passed to nvme_nvm_rqtocmd() before
it is allocated.

Fortunately, as of commit 91276162de9476b8 ("lightnvm: refactor end_io
functions for sync"), nvme_nvm_rqtocmd () no longer uses the passed
request, so this warning is a false positive.

Drop the unused parameter to clean up the code and kill the warning.

Fixes: 2e13f33a2464fc3a ("lightnvm: create cmd before allocating request")
Fixes: 91276162de9476b8 ("lightnvm: refactor end_io functions for sync")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/lightnvm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 8c4adac6fafc..206bfdbc1c98 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -464,8 +464,8 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
 	return ret;
 }
 
-static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
-				struct nvme_ns *ns, struct nvme_nvm_command *c)
+static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
+				    struct nvme_nvm_command *c)
 {
 	c->ph_rw.opcode = rqd->opcode;
 	c->ph_rw.nsid = cpu_to_le32(ns->ns_id);
@@ -503,7 +503,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	if (!cmd)
 		return -ENOMEM;
 
-	nvme_nvm_rqtocmd(rq, rqd, ns, cmd);
+	nvme_nvm_rqtocmd(rqd, ns, cmd);
 
 	rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
 	if (IS_ERR(rq)) {
-- 
cgit v1.2.3


From 66aad4fdf2bf0af29c7decb4433dc5ec6c7c5451 Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@redhat.com>
Date: Thu, 4 May 2017 09:42:50 +0800
Subject: x86/mm: Add support for gbpages to kernel_ident_mapping_init()

Kernel identity mappings on x86-64 kernels are created in two
ways: by the early x86 boot code, or by kernel_ident_mapping_init().

Native kernels (which is the dominant usecase) use the former,
but the kexec and the hibernation code uses kernel_ident_mapping_init().

There's a subtle difference between these two ways of how identity
mappings are created, the current kernel_ident_mapping_init() code
creates identity mappings always using 2MB page(PMD level) - while
the native kernel boot path also utilizes gbpages where available.

This difference is suboptimal both for performance and for memory
usage: kernel_ident_mapping_init() needs to allocate pages for the
page tables when creating the new identity mappings.

This patch adds 1GB page(PUD level) support to kernel_ident_mapping_init()
to address these concerns.

The primary advantage would be better TLB coverage/performance,
because we'd utilize 1GB TLBs instead of 2MB ones.

It is also useful for machines with large number of memory to
save paging structure allocations(around 4MB/TB using 2MB page)
when setting identity mappings for all the memory, after using
1GB page it will consume only 8KB/TB.

( Note that this change alone does not activate gbpages in kexec,
  we are doing that in a separate patch. )

Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: akpm@linux-foundation.org
Cc: kexec@lists.infradead.org
Link: http://lkml.kernel.org/r/1493862171-8799-1-git-send-email-xlpang@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/pagetable.c |  2 +-
 arch/x86/include/asm/init.h          |  3 ++-
 arch/x86/kernel/machine_kexec_64.c   |  2 +-
 arch/x86/mm/ident_map.c              | 14 +++++++++++++-
 arch/x86/power/hibernate_64.c        |  2 +-
 5 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 56589d0a804b..1d78f1739087 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -70,7 +70,7 @@ static unsigned long level4p;
  * Due to relocation, pointers must be assigned at run time not build time.
  */
 static struct x86_mapping_info mapping_info = {
-	.pmd_flag       = __PAGE_KERNEL_LARGE_EXEC,
+	.page_flag       = __PAGE_KERNEL_LARGE_EXEC,
 };
 
 /* Locates and clears a region for a new top level page table. */
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 737da62bfeb0..474eb8c66fee 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -4,8 +4,9 @@
 struct x86_mapping_info {
 	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
 	void *context;			 /* context for alloc_pgt_page */
-	unsigned long pmd_flag;		 /* page flag for PMD entry */
+	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
+	bool direct_gbpages;		 /* PUD level 1GB page support */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 085c3b300d32..1d4f2b076545 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -113,7 +113,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 	struct x86_mapping_info info = {
 		.alloc_pgt_page	= alloc_pgt_page,
 		.context	= image,
-		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
 	};
 	unsigned long mstart, mend;
 	pgd_t *level4p;
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 04210a29dd60..adab1595f4bd 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -13,7 +13,7 @@ static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
 		if (pmd_present(*pmd))
 			continue;
 
-		set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
+		set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag));
 	}
 }
 
@@ -30,6 +30,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 		if (next > end)
 			next = end;
 
+		if (info->direct_gbpages) {
+			pud_t pudval;
+
+			if (pud_present(*pud))
+				continue;
+
+			addr &= PUD_MASK;
+			pudval = __pud((addr - info->offset) | info->page_flag);
+			set_pud(pud, pudval);
+			continue;
+		}
+
 		if (pud_present(*pud)) {
 			pmd = pmd_offset(pud, 0);
 			ident_pmd_init(info, pmd, addr, next);
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 6a61194ffd58..a6e21fee22ea 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -104,7 +104,7 @@ static int set_up_temporary_mappings(void)
 {
 	struct x86_mapping_info info = {
 		.alloc_pgt_page	= alloc_pgt_page,
-		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
 		.offset		= __PAGE_OFFSET,
 	};
 	unsigned long mstart, mend;
-- 
cgit v1.2.3


From 8638100c52bb7782462b14aad102a4aaf0c7094c Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@redhat.com>
Date: Thu, 4 May 2017 09:42:51 +0800
Subject: x86/kexec/64: Use gbpages for identity mappings if available

Kexec sets up all identity mappings before booting into the new
kernel, and this will cause extra memory consumption for paging
structures which is quite considerable on modern machines with
huge memory sizes.

E.g. on a 32TB machine that is kdumping, it could waste around
128MB (around 4MB/TB) from the reserved memory after kexec sets
all the identity mappings using the current 2MB page.

Add to that the memory needed for the loaded kdump kernel, initramfs,
etc., and it causes a kexec syscall -NOMEM failure.

As a result, we had to enlarge reserved memory via "crashkernel=X"
to work around this problem.

This causes some trouble for distributions that use policies
to evaluate the proper "crashkernel=X" value for users.

So enable gbpages for kexec mappings.

Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: akpm@linux-foundation.org
Cc: kexec@lists.infradead.org
Link: http://lkml.kernel.org/r/1493862171-8799-2-git-send-email-xlpang@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/machine_kexec_64.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 1d4f2b076545..c25d277d7d7e 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -122,6 +122,10 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 
 	level4p = (pgd_t *)__va(start_pgtable);
 	clear_page(level4p);
+
+	if (direct_gbpages)
+		info.direct_gbpages = true;
+
 	for (i = 0; i < nr_pfn_mapped; i++) {
 		mstart = pfn_mapped[i].start << PAGE_SHIFT;
 		mend   = pfn_mapped[i].end << PAGE_SHIFT;
-- 
cgit v1.2.3


From c3123d6adb715de12fb366a55c1dedc6aabd94b0 Mon Sep 17 00:00:00 2001
From: Florin Tudorache <florin_tudorache@live.com>
Date: Fri, 5 May 2017 19:40:06 +0000
Subject: ALSA: hda - Add mute led support for HP EliteBook 840 G3

Signed-off-by: Florin Tudorache <florin_tudorache@live.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index e8253737c47a..63bc894ddf5e 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -860,7 +860,9 @@ static const struct hda_fixup cxt_fixups[] = {
 			{ 0x16, 0x21011020 }, /* line-out */
 			{ 0x18, 0x2181103f }, /* line-in */
 			{ }
-		}
+		},
+		.chained = true,
+		.chain_id = CXT_FIXUP_MUTE_LED_GPIO,
 	},
 	[CXT_FIXUP_HP_SPECTRE] = {
 		.type = HDA_FIXUP_PINS,
-- 
cgit v1.2.3


From 6852aa0263cdc4d43ab8e23735e6e0973cadeb63 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Sun, 2 Apr 2017 20:08:29 -0700
Subject: nios2: add .gitignore entries for auto-generated files

Add .gitignore entries for nios2 specific files generated during the
build process.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Ley Foon Tan <ley.foon.tan@intel.com>
---
 arch/nios2/boot/.gitignore   | 2 ++
 arch/nios2/kernel/.gitignore | 1 +
 2 files changed, 3 insertions(+)
 create mode 100644 arch/nios2/boot/.gitignore
 create mode 100644 arch/nios2/kernel/.gitignore

diff --git a/arch/nios2/boot/.gitignore b/arch/nios2/boot/.gitignore
new file mode 100644
index 000000000000..109279ca5a4d
--- /dev/null
+++ b/arch/nios2/boot/.gitignore
@@ -0,0 +1,2 @@
+*.dtb
+vmImage
diff --git a/arch/nios2/kernel/.gitignore b/arch/nios2/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/nios2/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
-- 
cgit v1.2.3


From f679b311f0683be352cb73b28d7108eed79ef795 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Sun, 2 Apr 2017 20:08:40 -0700
Subject: nios2: remove wrapper header for cmpxchg.h

Since commit 713e9b802e21 ("nios2: Switch to generic __xchg()")
asm/cmpxchg.h for nios2 is merely including asm-generic/cmpxchg.h. Thus,
the wrapper can be omitted and the generic header can be used directly.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Ley Foon Tan <ley.foon.tan@intel.com>
---
 arch/nios2/include/asm/Kbuild    |  1 +
 arch/nios2/include/asm/cmpxchg.h | 14 --------------
 2 files changed, 1 insertion(+), 14 deletions(-)
 delete mode 100644 arch/nios2/include/asm/cmpxchg.h

diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 87e70f2b463f..727dbb333f60 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
+generic-y += cmpxchg.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
diff --git a/arch/nios2/include/asm/cmpxchg.h b/arch/nios2/include/asm/cmpxchg.h
deleted file mode 100644
index a7978f14d157..000000000000
--- a/arch/nios2/include/asm/cmpxchg.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2004 Microtronix Datacom Ltd.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_NIOS2_CMPXCHG_H
-#define _ASM_NIOS2_CMPXCHG_H
-
-#include <asm-generic/cmpxchg.h>
-
-#endif /* _ASM_NIOS2_CMPXCHG_H */
-- 
cgit v1.2.3


From 57ac76ed6c0daf02c520e09e0af421f02ee2de8b Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Sun, 2 Apr 2017 20:08:52 -0700
Subject: nios2: constify irq_domain_ops

struct irq_domain_ops is not modified, so it can be made const.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Ley Foon Tan <ley.foon.tan@intel.com>
---
 arch/nios2/kernel/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/nios2/kernel/irq.c b/arch/nios2/kernel/irq.c
index f5b74ae69b5b..6c833a9d4eab 100644
--- a/arch/nios2/kernel/irq.c
+++ b/arch/nios2/kernel/irq.c
@@ -67,7 +67,7 @@ static int irq_map(struct irq_domain *h, unsigned int virq,
 	return 0;
 }
 
-static struct irq_domain_ops irq_ops = {
+static const struct irq_domain_ops irq_ops = {
 	.map	= irq_map,
 	.xlate	= irq_domain_xlate_onecell,
 };
-- 
cgit v1.2.3


From 44a4ed42576da5f3387005a1667cb3e7aedfd687 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Sun, 2 Apr 2017 20:09:04 -0700
Subject: nios2: enable earlycon support

Enable generic earlycon support for nios2. This e.g. allows to use a
8250/16650 UART as earlycon.

In order to get the earlycon, we just need to call parse_early_param()
in early_init_devtree() as soon as the device tree is initially scanned.
By adding an stdout-path property to the dts (done in this patch for
10m50_devboard), the earlycon can be used.

In order to provide early printk support, we need to provide a dummy
implementation of early_console_write(), so that
arch/nios2/kernel/early_printk.c can still be compiled if neither
SERIAL_ALTERA_JTAGUART_CONSOLE nor SERIAL_ALTERA_UART_CONSOLE is
selected. As soon as the altera_uart and altera_jtaguart support
earlycon, the entire file can be removed.

Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Ley Foon Tan <ley.foon.tan@intel.com>
---
 arch/nios2/Kconfig.debug               | 1 -
 arch/nios2/boot/dts/10m50_devboard.dts | 3 ++-
 arch/nios2/kernel/early_printk.c       | 7 +++++--
 arch/nios2/kernel/setup.c              | 2 ++
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/nios2/Kconfig.debug b/arch/nios2/Kconfig.debug
index 2fd08cbfdddb..55105220370c 100644
--- a/arch/nios2/Kconfig.debug
+++ b/arch/nios2/Kconfig.debug
@@ -18,7 +18,6 @@ config EARLY_PRINTK
 	bool "Activate early kernel debugging"
 	default y
 	select SERIAL_CORE_CONSOLE
-	depends on SERIAL_ALTERA_JTAGUART_CONSOLE || SERIAL_ALTERA_UART_CONSOLE
 	help
 	  Enable early printk on console
 	  This is useful for kernel debugging when your machine crashes very
diff --git a/arch/nios2/boot/dts/10m50_devboard.dts b/arch/nios2/boot/dts/10m50_devboard.dts
index f362b2224ee7..4bb4dc1b52e9 100644
--- a/arch/nios2/boot/dts/10m50_devboard.dts
+++ b/arch/nios2/boot/dts/10m50_devboard.dts
@@ -244,6 +244,7 @@
 	};
 
 	chosen {
-		bootargs = "debug console=ttyS0,115200";
+		bootargs = "debug earlycon console=ttyS0,115200";
+		stdout-path = &a_16550_uart_0;
 	};
 };
diff --git a/arch/nios2/kernel/early_printk.c b/arch/nios2/kernel/early_printk.c
index c08e4c1486fc..4a7bb98f744c 100644
--- a/arch/nios2/kernel/early_printk.c
+++ b/arch/nios2/kernel/early_printk.c
@@ -81,8 +81,11 @@ static void early_console_write(struct console *con, const char *s, unsigned n)
 }
 
 #else
-# error Neither SERIAL_ALTERA_JTAGUART_CONSOLE nor SERIAL_ALTERA_UART_CONSOLE \
-selected
+
+static void early_console_write(struct console *con, const char *s, unsigned n)
+{
+}
+
 #endif
 
 static struct console early_console_prom = {
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index 6044d9be28b4..8b5146082e3e 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -137,6 +137,8 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
 		strncpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 #endif
 #endif
+
+	parse_early_param();
 }
 
 void __init setup_arch(char **cmdline_p)
-- 
cgit v1.2.3


From 4b40c1c092a932b5b5ab8154235080f02ae19a34 Mon Sep 17 00:00:00 2001
From: Julien Beraud <julien.beraud@spectracom.orolia.com>
Date: Wed, 5 Apr 2017 15:03:44 +0800
Subject: nios2: implement flush_dcache_mmap_lock/unlock

Use spin_lock/unlock_irq instead of doing nothing. This fixes corruptions
of the vma_interval_tree causing the kernel to be stuck in an
infinite loop in vma_interval_tree_foreach.

Signed-off-by: Julien Beraud <julien.beraud@spectracom.orolia.com>
Acked-by: Ley Foon Tan <ley.foon.tan@intel.com>
---
 arch/nios2/include/asm/cacheflush.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h
index 52abba973dc2..55e383c173f7 100644
--- a/arch/nios2/include/asm/cacheflush.h
+++ b/arch/nios2/include/asm/cacheflush.h
@@ -46,7 +46,9 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 extern void flush_dcache_range(unsigned long start, unsigned long end);
 extern void invalidate_dcache_range(unsigned long start, unsigned long end);
 
-#define flush_dcache_mmap_lock(mapping)		do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_dcache_mmap_lock(mapping) \
+	spin_lock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_unlock(mapping) \
+	spin_unlock_irq(&(mapping)->tree_lock)
 
 #endif /* _ASM_NIOS2_CACHEFLUSH_H */
-- 
cgit v1.2.3


From a89988a6e00c5a099ee23619cd91dc8dc7ec9328 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Wed, 19 Apr 2017 13:19:00 +0200
Subject: nios2: Add NIOS2_ARCH_REVISION to select between R1 and R2

Allow user to select between Nios2 R1 and R2. Since R1 and R2 are
not binary compatible, we cannot have a single kernel binary and
there is no point in having DT property for discerning these two.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Ley Foon Tan <lftan@altera.com>
---
 arch/nios2/Makefile                  | 3 +++
 arch/nios2/kernel/cpuinfo.c          | 2 ++
 arch/nios2/platform/Kconfig.platform | 8 ++++++++
 3 files changed, 13 insertions(+)

diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile
index e74afc12d516..38da17e4c908 100644
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile
@@ -22,7 +22,10 @@ export MMU
 
 LIBGCC         := $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
 
+KBUILD_AFLAGS += -march=r$(CONFIG_NIOS2_ARCH_REVISION)
+
 KBUILD_CFLAGS += -pipe -D__linux__ -D__ELF__
+KBUILD_CFLAGS += -march=r$(CONFIG_NIOS2_ARCH_REVISION)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_MUL_SUPPORT),-mhw-mul,-mno-hw-mul)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_MULX_SUPPORT),-mhw-mulx,-mno-hw-mulx)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_DIV_SUPPORT),-mhw-div,-mno-hw-div)
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c
index 1cccc36877bc..b4a6242b0f1c 100644
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -125,12 +125,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 	seq_printf(m,
 		   "CPU:\t\tNios II/%s\n"
+		   "REV:\t\t%i\n"
 		   "MMU:\t\t%s\n"
 		   "FPU:\t\tnone\n"
 		   "Clocking:\t%u.%02u MHz\n"
 		   "BogoMips:\t%lu.%02lu\n"
 		   "Calibration:\t%lu loops\n",
 		   cpuinfo.cpu_impl,
+		   CONFIG_NIOS2_ARCH_REVISION,
 		   cpuinfo.mmu ? "present" : "none",
 		   clockfreq / 1000000, (clockfreq / 100000) % 10,
 		   (loops_per_jiffy * HZ) / 500000,
diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform
index d3e5df9fb36b..b7ac5b90c399 100644
--- a/arch/nios2/platform/Kconfig.platform
+++ b/arch/nios2/platform/Kconfig.platform
@@ -52,6 +52,14 @@ config NIOS2_DTB_SOURCE
 
 comment "Nios II instructions"
 
+config NIOS2_ARCH_REVISION
+	int "Select Nios II architecture revision"
+	range 1 2
+	default 1
+	help
+	  Select between Nios II R1 and Nios II R2 . The architectures
+	  are binary incompatible. Default is R1 .
+
 config NIOS2_HW_MUL_SUPPORT
 	bool "Enable MUL instruction"
 	default n
-- 
cgit v1.2.3


From 23460839b983d5d8d47fe90f341599f66523dd81 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Wed, 19 Apr 2017 13:19:01 +0200
Subject: nios2: Add BMX support

Add support for the BMX Bit Manipulation Extensions present in
Nios II R2 . This introduces three new instructions, EXTRACT,
INSERT and MERGE.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Ley Foon Tan <lftan@altera.com>
---
 arch/nios2/Makefile                  |  1 +
 arch/nios2/include/asm/cpuinfo.h     |  1 +
 arch/nios2/kernel/cpuinfo.c          | 10 ++++++++--
 arch/nios2/platform/Kconfig.platform |  9 +++++++++
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile
index 38da17e4c908..74109c01eaea 100644
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile
@@ -29,6 +29,7 @@ KBUILD_CFLAGS += -march=r$(CONFIG_NIOS2_ARCH_REVISION)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_MUL_SUPPORT),-mhw-mul,-mno-hw-mul)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_MULX_SUPPORT),-mhw-mulx,-mno-hw-mulx)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_DIV_SUPPORT),-mhw-div,-mno-hw-div)
+KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_BMX_SUPPORT),-mbmx,-mno-bmx)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_FPU_SUPPORT),-mcustom-fpu-cfg=60-1,)
 
 KBUILD_CFLAGS += -fno-optimize-sibling-calls
diff --git a/arch/nios2/include/asm/cpuinfo.h b/arch/nios2/include/asm/cpuinfo.h
index 348bb228fec9..79d376435d7d 100644
--- a/arch/nios2/include/asm/cpuinfo.h
+++ b/arch/nios2/include/asm/cpuinfo.h
@@ -29,6 +29,7 @@ struct cpuinfo {
 	bool has_div;
 	bool has_mul;
 	bool has_mulx;
+	bool has_bmx;
 
 	/* CPU caches */
 	u32 icache_line_size;
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c
index b4a6242b0f1c..4c84fec34882 100644
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -67,6 +67,7 @@ void __init setup_cpuinfo(void)
 	cpuinfo.has_div = of_property_read_bool(cpu, "altr,has-div");
 	cpuinfo.has_mul = of_property_read_bool(cpu, "altr,has-mul");
 	cpuinfo.has_mulx = of_property_read_bool(cpu, "altr,has-mulx");
+	cpuinfo.has_bmx = of_property_read_bool(cpu, "altr,has-bmx");
 	cpuinfo.mmu = of_property_read_bool(cpu, "altr,has-mmu");
 
 	if (IS_ENABLED(CONFIG_NIOS2_HW_DIV_SUPPORT) && !cpuinfo.has_div)
@@ -78,6 +79,9 @@ void __init setup_cpuinfo(void)
 	if (IS_ENABLED(CONFIG_NIOS2_HW_MULX_SUPPORT) && !cpuinfo.has_mulx)
 		err_cpu("MULX");
 
+	if (IS_ENABLED(CONFIG_NIOS2_BMX_SUPPORT) && !cpuinfo.has_bmx)
+		err_cpu("BMX");
+
 	cpuinfo.tlb_num_ways = fcpu(cpu, "altr,tlb-num-ways");
 	if (!cpuinfo.tlb_num_ways)
 		panic("altr,tlb-num-ways can't be 0. Please check your hardware "
@@ -143,10 +147,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		   "HW:\n"
 		   " MUL:\t\t%s\n"
 		   " MULX:\t\t%s\n"
-		   " DIV:\t\t%s\n",
+		   " DIV:\t\t%s\n"
+		   " BMX:\t\t%s\n",
 		   cpuinfo.has_mul ? "yes" : "no",
 		   cpuinfo.has_mulx ? "yes" : "no",
-		   cpuinfo.has_div ? "yes" : "no");
+		   cpuinfo.has_div ? "yes" : "no",
+		   cpuinfo.has_bmx ? "yes" : "no");
 
 	seq_printf(m,
 		   "Icache:\t\t%ukB, line length: %u\n",
diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform
index b7ac5b90c399..b6a8bfd59491 100644
--- a/arch/nios2/platform/Kconfig.platform
+++ b/arch/nios2/platform/Kconfig.platform
@@ -81,6 +81,15 @@ config NIOS2_HW_DIV_SUPPORT
 	  Set to true if you configured the Nios II to include the DIV
 	  instruction.  Enables the -mhw-div compiler flag.
 
+config NIOS2_BMX_SUPPORT
+	bool "Enable BMX instructions"
+	depends on NIOS2_ARCH_REVISION = 2
+	default n
+	help
+	  Set to true if you configured the Nios II R2 to include
+	  the BMX Bit Manipulation Extension instructions. Enables
+	  the -mbmx compiler flag.
+
 config NIOS2_FPU_SUPPORT
 	bool "Custom floating point instr support"
 	default n
-- 
cgit v1.2.3


From edebea98777d7090ea14bdce2e38e6798557729d Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Wed, 19 Apr 2017 13:19:02 +0200
Subject: nios2: Add CDX support

Add support for the CDX Code Density Extensions present in
Nios II R2 . This introduces new 16bit instruction set to
improve code density while retaining support for the 32bit
Nios II R2 instructions.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Ley Foon Tan <lftan@altera.com>
---
 arch/nios2/Makefile                  |  1 +
 arch/nios2/include/asm/cpuinfo.h     |  1 +
 arch/nios2/kernel/cpuinfo.c          | 10 ++++++++--
 arch/nios2/platform/Kconfig.platform |  9 +++++++++
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile
index 74109c01eaea..8673a79dca9c 100644
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile
@@ -30,6 +30,7 @@ KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_MUL_SUPPORT),-mhw-mul,-mno-hw-mul)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_MULX_SUPPORT),-mhw-mulx,-mno-hw-mulx)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_HW_DIV_SUPPORT),-mhw-div,-mno-hw-div)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_BMX_SUPPORT),-mbmx,-mno-bmx)
+KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_CDX_SUPPORT),-mcdx,-mno-cdx)
 KBUILD_CFLAGS += $(if $(CONFIG_NIOS2_FPU_SUPPORT),-mcustom-fpu-cfg=60-1,)
 
 KBUILD_CFLAGS += -fno-optimize-sibling-calls
diff --git a/arch/nios2/include/asm/cpuinfo.h b/arch/nios2/include/asm/cpuinfo.h
index 79d376435d7d..dbdaf96f28d4 100644
--- a/arch/nios2/include/asm/cpuinfo.h
+++ b/arch/nios2/include/asm/cpuinfo.h
@@ -30,6 +30,7 @@ struct cpuinfo {
 	bool has_mul;
 	bool has_mulx;
 	bool has_bmx;
+	bool has_cdx;
 
 	/* CPU caches */
 	u32 icache_line_size;
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c
index 4c84fec34882..93207718bb22 100644
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -68,6 +68,7 @@ void __init setup_cpuinfo(void)
 	cpuinfo.has_mul = of_property_read_bool(cpu, "altr,has-mul");
 	cpuinfo.has_mulx = of_property_read_bool(cpu, "altr,has-mulx");
 	cpuinfo.has_bmx = of_property_read_bool(cpu, "altr,has-bmx");
+	cpuinfo.has_cdx = of_property_read_bool(cpu, "altr,has-cdx");
 	cpuinfo.mmu = of_property_read_bool(cpu, "altr,has-mmu");
 
 	if (IS_ENABLED(CONFIG_NIOS2_HW_DIV_SUPPORT) && !cpuinfo.has_div)
@@ -82,6 +83,9 @@ void __init setup_cpuinfo(void)
 	if (IS_ENABLED(CONFIG_NIOS2_BMX_SUPPORT) && !cpuinfo.has_bmx)
 		err_cpu("BMX");
 
+	if (IS_ENABLED(CONFIG_NIOS2_CDX_SUPPORT) && !cpuinfo.has_cdx)
+		err_cpu("CDX");
+
 	cpuinfo.tlb_num_ways = fcpu(cpu, "altr,tlb-num-ways");
 	if (!cpuinfo.tlb_num_ways)
 		panic("altr,tlb-num-ways can't be 0. Please check your hardware "
@@ -148,11 +152,13 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		   " MUL:\t\t%s\n"
 		   " MULX:\t\t%s\n"
 		   " DIV:\t\t%s\n"
-		   " BMX:\t\t%s\n",
+		   " BMX:\t\t%s\n"
+		   " CDX:\t\t%s\n",
 		   cpuinfo.has_mul ? "yes" : "no",
 		   cpuinfo.has_mulx ? "yes" : "no",
 		   cpuinfo.has_div ? "yes" : "no",
-		   cpuinfo.has_bmx ? "yes" : "no");
+		   cpuinfo.has_bmx ? "yes" : "no",
+		   cpuinfo.has_cdx ? "yes" : "no");
 
 	seq_printf(m,
 		   "Icache:\t\t%ukB, line length: %u\n",
diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform
index b6a8bfd59491..74c1aaf588b8 100644
--- a/arch/nios2/platform/Kconfig.platform
+++ b/arch/nios2/platform/Kconfig.platform
@@ -90,6 +90,15 @@ config NIOS2_BMX_SUPPORT
 	  the BMX Bit Manipulation Extension instructions. Enables
 	  the -mbmx compiler flag.
 
+config NIOS2_CDX_SUPPORT
+	bool "Enable CDX instructions"
+	depends on NIOS2_ARCH_REVISION = 2
+	default n
+	help
+	  Set to true if you configured the Nios II R2 to include
+	  the CDX Bit Manipulation Extension instructions. Enables
+	  the -mcdx compiler flag.
+
 config NIOS2_FPU_SUPPORT
 	bool "Custom floating point instr support"
 	default n
-- 
cgit v1.2.3


From 2f242bf45370b8ea44f209b22c3c90984655a102 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 5 May 2017 11:53:19 +0200
Subject: mac80211: properly remove RX_ENC_FLAG_40MHZ

Somehow I missed this in my RX rate cleanup series, causing some
drivers to not report correct bandwidth since this flag isn't
used by mac80211 anymore. Fix this, and make hwsim also report
higher bandwidths appropriately.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath9k/ar9003_mac.c    | 2 +-
 drivers/net/wireless/ath/ath9k/mac.c           | 4 ++--
 drivers/net/wireless/intel/iwlegacy/4965-mac.c | 4 +++-
 drivers/net/wireless/intel/iwlwifi/dvm/rx.c    | 4 +++-
 drivers/net/wireless/mac80211_hwsim.c          | 8 +++++++-
 include/net/mac80211.h                         | 2 --
 6 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.c b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
index 68fcbe03bce2..b3f20b3c0210 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_mac.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
@@ -522,7 +522,7 @@ int ath9k_hw_process_rxdesc_edma(struct ath_hw *ah, struct ath_rx_status *rxs,
 	rxs->rs_moreaggr = (rxsp->status11 & AR_RxMoreAggr) ? 1 : 0;
 	rxs->rs_antenna = (MS(rxsp->status4, AR_RxAntenna) & 0x7);
 	rxs->enc_flags |= (rxsp->status4 & AR_GI) ? RX_ENC_FLAG_SHORT_GI : 0;
-	rxs->enc_flags |= (rxsp->status4 & AR_2040) ? RX_ENC_FLAG_40MHZ : 0;
+	rxs->bw = (rxsp->status4 & AR_2040) ? RATE_INFO_BW_40 : RATE_INFO_BW_20;
 
 	rxs->evm0 = rxsp->status6;
 	rxs->evm1 = rxsp->status7;
diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c
index 6128c2bb23d8..77c94f9e7b61 100644
--- a/drivers/net/wireless/ath/ath9k/mac.c
+++ b/drivers/net/wireless/ath/ath9k/mac.c
@@ -580,8 +580,8 @@ int ath9k_hw_rxprocdesc(struct ath_hw *ah, struct ath_desc *ds,
 	/* directly mapped flags for ieee80211_rx_status */
 	rs->enc_flags |=
 		(ads.ds_rxstatus3 & AR_GI) ? RX_ENC_FLAG_SHORT_GI : 0;
-	rs->enc_flags |=
-		(ads.ds_rxstatus3 & AR_2040) ? RX_ENC_FLAG_40MHZ : 0;
+	rs->bw = (ads.ds_rxstatus3 & AR_2040) ? RATE_INFO_BW_40 :
+						RATE_INFO_BW_20;
 	if (AR_SREV_9280_20_OR_LATER(ah))
 		rs->enc_flags |=
 			(ads.ds_rxstatus3 & AR_STBC) ?
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
index 5d5faa3cad24..49a2ff15ddae 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
@@ -734,7 +734,9 @@ il4965_hdl_rx(struct il_priv *il, struct il_rx_buf *rxb)
 	if (rate_n_flags & RATE_MCS_HT_MSK)
 		rx_status.encoding = RX_ENC_HT;
 	if (rate_n_flags & RATE_MCS_HT40_MSK)
-		rx_status.enc_flags |= RX_ENC_FLAG_40MHZ;
+		rx_status.bw = RATE_INFO_BW_40;
+	else
+		rx_status.bw = RATE_INFO_BW_20;
 	if (rate_n_flags & RATE_MCS_SGI_MSK)
 		rx_status.enc_flags |= RX_ENC_FLAG_SHORT_GI;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rx.c b/drivers/net/wireless/intel/iwlwifi/dvm/rx.c
index 1ee1ba9931a7..adfd6307edca 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/rx.c
@@ -889,7 +889,9 @@ static void iwlagn_rx_reply_rx(struct iwl_priv *priv,
 	if (rate_n_flags & RATE_MCS_HT_MSK)
 		rx_status.encoding = RX_ENC_HT;
 	if (rate_n_flags & RATE_MCS_HT40_MSK)
-		rx_status.enc_flags |= RX_ENC_FLAG_40MHZ;
+		rx_status.bw = RATE_INFO_BW_40;
+	else
+		rx_status.bw = RATE_INFO_BW_20;
 	if (rate_n_flags & RATE_MCS_SGI_MSK)
 		rx_status.enc_flags |= RX_ENC_FLAG_SHORT_GI;
 	if (rate_n_flags & RATE_MCS_GF_MSK)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 87444af20fc5..002b25cff5b6 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1201,7 +1201,13 @@ static bool mac80211_hwsim_tx_frame_no_nl(struct ieee80211_hw *hw,
 			rx_status.encoding = RX_ENC_HT;
 	}
 	if (info->control.rates[0].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
-		rx_status.enc_flags |= RX_ENC_FLAG_40MHZ;
+		rx_status.bw = RATE_INFO_BW_40;
+	else if (info->control.rates[0].flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
+		rx_status.bw = RATE_INFO_BW_80;
+	else if (info->control.rates[0].flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
+		rx_status.bw = RATE_INFO_BW_160;
+	else
+		rx_status.bw = RATE_INFO_BW_20;
 	if (info->control.rates[0].flags & IEEE80211_TX_RC_SHORT_GI)
 		rx_status.enc_flags |= RX_ENC_FLAG_SHORT_GI;
 	/* TODO: simulate real signal strength (and optional packet loss) */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 4d05a9443344..76ed24a201eb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1141,7 +1141,6 @@ enum mac80211_rx_flags {
  * enum mac80211_rx_encoding_flags - MCS & bandwidth flags
  *
  * @RX_ENC_FLAG_SHORTPRE: Short preamble was used for this frame
- * @RX_ENC_FLAG_40MHZ: HT40 (40 MHz) was used
  * @RX_ENC_FLAG_SHORT_GI: Short guard interval was used
  * @RX_ENC_FLAG_HT_GF: This frame was received in a HT-greenfield transmission,
  *	if the driver fills this value it should add
@@ -1153,7 +1152,6 @@ enum mac80211_rx_flags {
  */
 enum mac80211_rx_encoding_flags {
 	RX_ENC_FLAG_SHORTPRE		= BIT(0),
-	RX_ENC_FLAG_40MHZ		= BIT(1),
 	RX_ENC_FLAG_SHORT_GI		= BIT(2),
 	RX_ENC_FLAG_HT_GF		= BIT(3),
 	RX_ENC_FLAG_STBC_MASK		= BIT(4) | BIT(5),
-- 
cgit v1.2.3


From 7f1e614113ffe75f5ea7c99f641bf1b56f85be03 Mon Sep 17 00:00:00 2001
From: Ley Foon Tan <ley.foon.tan@intel.com>
Date: Mon, 8 May 2017 17:14:14 +0800
Subject: nios2: use generic strncpy_from_user() and strnlen_user()

This change enables the generic strncpy_from_user() and strnlen_user()

Signed-off-by: Ley Foon Tan <ley.foon.tan@intel.com>
---
 arch/nios2/Kconfig               |  2 ++
 arch/nios2/include/asm/uaccess.h |  7 +++++--
 arch/nios2/mm/uaccess.c          | 33 ---------------------------------
 3 files changed, 7 insertions(+), 35 deletions(-)

diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 51a56c8b04b4..a72d5f0de692 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -6,6 +6,8 @@ config NIOS2
 	select GENERIC_CPU_DEVICES
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_KGDB
 	select IRQ_DOMAIN
diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
index 727bd9504899..dfa3c7cb30b4 100644
--- a/arch/nios2/include/asm/uaccess.h
+++ b/arch/nios2/include/asm/uaccess.h
@@ -42,6 +42,8 @@
 
 # define __EX_TABLE_SECTION	".section __ex_table,\"a\"\n"
 
+#define user_addr_max() (uaccess_kernel() ? ~0UL : TASK_SIZE)
+
 /*
  * Zero Userspace
  */
@@ -81,8 +83,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
 #define INLINE_COPY_TO_USER
 
 extern long strncpy_from_user(char *__to, const char __user *__from,
-				long __len);
-extern long strnlen_user(const char __user *s, long n);
+			      long __len);
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *s, long n);
 
 /* Optimized macros */
 #define __get_user_asm(val, insn, addr, err)				\
diff --git a/arch/nios2/mm/uaccess.c b/arch/nios2/mm/uaccess.c
index 804983317766..34f10af8ea40 100644
--- a/arch/nios2/mm/uaccess.c
+++ b/arch/nios2/mm/uaccess.c
@@ -128,36 +128,3 @@ asm(
 	".word 12b,13b\n"
 	".previous\n");
 EXPORT_SYMBOL(raw_copy_to_user);
-
-long strncpy_from_user(char *__to, const char __user *__from, long __len)
-{
-	int l = strnlen_user(__from, __len);
-	int is_zt = 1;
-
-	if (l > __len) {
-		is_zt = 0;
-		l = __len;
-	}
-
-	if (l == 0 || copy_from_user(__to, __from, l))
-		return -EFAULT;
-
-	if (is_zt)
-		l--;
-	return l;
-}
-
-long strnlen_user(const char __user *s, long n)
-{
-	long i;
-
-	for (i = 0; i < n; i++) {
-		char c;
-
-		if (get_user(c, s + i) == -EFAULT)
-			return 0;
-		if (c == 0)
-			return i + 1;
-	}
-	return n + 1;
-}
-- 
cgit v1.2.3


From f8860ce836f2d502b07ef99559707fe55d90f5bc Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Tue, 2 May 2017 17:56:21 +0300
Subject: mac80211: bail out from prep_connection() if a reconfig is ongoing

If ieee80211_hw_restart() is called during authentication, the
authentication process will continue, causing the driver to be called
in a wrong state.  This ultimately causes an oops in the iwlwifi
driver (at least).

This fixes bugzilla 195299 partly.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195299
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 89dff563b1ec..0ea9712bd99e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4382,6 +4382,10 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 	if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data))
 		return -EINVAL;
 
+	/* If a reconfig is happening, bail out */
+	if (local->in_reconfig)
+		return -EBUSY;
+
 	if (assoc) {
 		rcu_read_lock();
 		have_sta = sta_info_get(sdata, cbss->bssid);
-- 
cgit v1.2.3


From 4954601f821bb5afd4dd59b57bf801adf4924bbd Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 Apr 2017 09:13:38 +0200
Subject: nl80211: correctly validate MU-MIMO groups

Since groups 0 and 63 are invalid, we should check for those bits.
Note that the 802.11 spec specifies the *bit* order, but the CPU
doesn't care about bit order since it can't address bits, so it's
always treating BIT(0) as the lowest bit within a byte.

Reported-by: Jan Fuchs <jan.fuchs@lancom.de>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 570fc95dc507..c3bc9da30cff 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2764,8 +2764,8 @@ static int nl80211_parse_mon_options(struct cfg80211_registered_device *rdev,
 			nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]);
 
 		/* bits 0 and 63 are reserved and must be zero */
-		if ((mumimo_groups[0] & BIT(7)) ||
-		    (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(0)))
+		if ((mumimo_groups[0] & BIT(0)) ||
+		    (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(7)))
 			return -EINVAL;
 
 		params->vht_mumimo_groups = mumimo_groups;
-- 
cgit v1.2.3


From f1f3e9e2a50a70de908f9dfe0d870e9cdc67e042 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 Apr 2017 13:19:04 +0200
Subject: mac80211: fix IBSS presp allocation size

When VHT IBSS support was added, the size of the extra elements
wasn't considered in ieee80211_ibss_build_presp(), which makes
it possible that it would overrun the allocated buffer. Fix it
by allocating the necessary space.

Fixes: abcff6ef01f9 ("mac80211: add VHT support for IBSS")
Reported-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ibss.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 6db09fa18269..364d4e137649 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -66,6 +66,8 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
 		    2 + (IEEE80211_MAX_SUPP_RATES - 8) +
 		    2 + sizeof(struct ieee80211_ht_cap) +
 		    2 + sizeof(struct ieee80211_ht_operation) +
+		    2 + sizeof(struct ieee80211_vht_cap) +
+		    2 + sizeof(struct ieee80211_vht_operation) +
 		    ifibss->ie_len;
 	presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL);
 	if (!presp)
-- 
cgit v1.2.3


From 6406c91943a0f29b6e8786921aaa038663e08055 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 2 May 2017 09:33:40 +0200
Subject: cfg80211: fix multi scheduled scan kernel-doc

Replace @results_wk with @report_results, which was missed
in an earlier patch between revisions thereof.

Fixes: b34939b98369 ("cfg80211: add request id to cfg80211_sched_scan_*() api")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 15d6599b8bc6..b083e6cbae8c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1666,7 +1666,7 @@ struct cfg80211_bss_select_adjust {
  * 	(others are filtered out).
  *	If ommited, all results are passed.
  * @n_match_sets: number of match sets
- * @results_wk: worker for processing results notification.
+ * @report_results: indicates that results were reported for this request
  * @wiphy: the wiphy this was for
  * @dev: the interface
  * @scan_start: start time of the scheduled scan
-- 
cgit v1.2.3


From de2a0910795d44d6534e0fe83c571c1bbbe05808 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Mon, 9 Jan 2017 14:59:24 +0100
Subject: KVM: arm/arm64: Add ITS save/restore API documentation

Add description for how to access ITS registers and how to save/restore
ITS tables into/from memory.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
 Documentation/virtual/kvm/devices/arm-vgic-its.txt | 120 +++++++++++++++++++++
 1 file changed, 120 insertions(+)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
index 6081a5b7fc1e..ba132e9885b3 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
@@ -32,7 +32,127 @@ Groups:
     KVM_DEV_ARM_VGIC_CTRL_INIT
       request the initialization of the ITS, no additional parameter in
       kvm_device_attr.addr.
+
+    KVM_DEV_ARM_ITS_SAVE_TABLES
+      save the ITS table data into guest RAM, at the location provisioned
+      by the guest in corresponding registers/table entries.
+
+      The layout of the tables in guest memory defines an ABI. The entries
+      are laid out in little endian format as described in the last paragraph.
+
+    KVM_DEV_ARM_ITS_RESTORE_TABLES
+      restore the ITS tables from guest RAM to ITS internal structures.
+
+      The GICV3 must be restored before the ITS and all ITS registers but
+      the GITS_CTLR must be restored before restoring the ITS tables.
+
+      The GITS_IIDR read-only register must also be restored before
+      calling KVM_DEV_ARM_ITS_RESTORE_TABLES as the IIDR revision field
+      encodes the ABI revision.
+
+      The expected ordering when restoring the GICv3/ITS is described in section
+      "ITS Restore Sequence".
+
   Errors:
     -ENXIO:  ITS not properly configured as required prior to setting
              this attribute
     -ENOMEM: Memory shortage when allocating ITS internal data
+    -EINVAL: Inconsistent restored data
+    -EFAULT: Invalid guest ram access
+    -EBUSY:  One or more VCPUS are running
+
+  KVM_DEV_ARM_VGIC_GRP_ITS_REGS
+  Attributes:
+      The attr field of kvm_device_attr encodes the offset of the
+      ITS register, relative to the ITS control frame base address
+      (ITS_base).
+
+      kvm_device_attr.addr points to a __u64 value whatever the width
+      of the addressed register (32/64 bits). 64 bit registers can only
+      be accessed with full length.
+
+      Writes to read-only registers are ignored by the kernel except for:
+      - GITS_CREADR. It must be restored otherwise commands in the queue
+        will be re-executed after restoring CWRITER. GITS_CREADR must be
+        restored before restoring the GITS_CTLR which is likely to enable the
+        ITS. Also it must be restored after GITS_CBASER since a write to
+        GITS_CBASER resets GITS_CREADR.
+      - GITS_IIDR. The Revision field encodes the table layout ABI revision.
+        In the future we might implement direct injection of virtual LPIs.
+        This will require an upgrade of the table layout and an evolution of
+        the ABI. GITS_IIDR must be restored before calling
+        KVM_DEV_ARM_ITS_RESTORE_TABLES.
+
+      For other registers, getting or setting a register has the same
+      effect as reading/writing the register on real hardware.
+  Errors:
+    -ENXIO: Offset does not correspond to any supported register
+    -EFAULT: Invalid user pointer for attr->addr
+    -EINVAL: Offset is not 64-bit aligned
+    -EBUSY: one or more VCPUS are running
+
+ ITS Restore Sequence:
+ -------------------------
+
+The following ordering must be followed when restoring the GIC and the ITS:
+a) restore all guest memory and create vcpus
+b) restore all redistributors
+c) initialize the ITS and then provide its base address
+   (KVM_DEV_ARM_VGIC_CTRL_INIT, KVM_DEV_ARM_VGIC_GRP_ADDR)
+d) restore the ITS in the following order:
+   1. Restore GITS_CBASER
+   2. Restore all other GITS_ registers, except GITS_CTLR!
+   3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
+   4. Restore GITS_CTLR
+
+Then vcpus can be started.
+
+ ITS Table ABI REV0:
+ -------------------
+
+ Revision 0 of the ABI only supports physical LPIs.
+
+ The device table and ITT are indexed by the deviceid and eventid,
+ respectively. The collection table is not indexed by collectionid:
+ CTEs are written in the table in the order of collection creation. All
+ entries are 8 bytes.
+
+ Device Table Entry (DTE):
+
+ bits:     | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
+ values:   | V |   next    | ITT_addr |  Size   |
+
+ where;
+ - V indicates whether the entry is valid. If not, other fields
+   are not meaningful.
+ - next: equals to 0 if this entry is the last one; otherwise it
+   corresponds to the deviceid offset to the next DTE, capped by
+   2^14 -1.
+ - ITT_addr matches bits [51:8] of the ITT address (256 Byte aligned).
+ - Size specifies the supported number of bits for the eventid,
+   minus one
+
+ Collection Table Entry (CTE):
+
+ bits:     | 63| 62 ..  52  | 51 ... 16 | 15  ...   0 |
+ values:   | V |    RES0    |  RDBase   |    ICID     |
+
+ where:
+ - V indicates whether the entry is valid. If not, other fields are
+   not meaningful.
+ - RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
+ - RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
+ - ICID is the collection ID
+
+ Interrupt Translation Entry (ITE):
+
+ bits:     | 63 ... 48 | 47 ... 16 | 15 ... 0 |
+ values:   |    next   |   pINTID  |  ICID    |
+
+ where:
+ - next: equals to 0 if this entry is the last one; otherwise it corresponds
+   to the eventid offset to the next ITE capped by 2^16 -1.
+ - pINTID is the physical LPI ID; if zero, it means the entry is not valid
+   and other fields are not meaningful.
+ - ICID is the collection ID
+
-- 
cgit v1.2.3


From 100e62983ef3a012e62e0e7116035529a286c1ec Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Fri, 14 Apr 2017 10:18:12 +0200
Subject: KVM: arm/arm64: Add GICV3 pending table save API documentation

Add description for how to save GICV3 LPI pending bit into
guest RAM pending tables.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/virtual/kvm/devices/arm-vgic-v3.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
index c1a24612c198..9293b45abdb9 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
@@ -167,11 +167,17 @@ Groups:
     KVM_DEV_ARM_VGIC_CTRL_INIT
       request the initialization of the VGIC, no additional parameter in
       kvm_device_attr.addr.
+    KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES
+      save all LPI pending bits into guest RAM pending tables.
+
+      The first kB of the pending table is not altered by this operation.
   Errors:
     -ENXIO: VGIC not properly configured as required prior to calling
      this attribute
     -ENODEV: no online VCPU
     -ENOMEM: memory shortage when allocating vgic internal data
+    -EFAULT: Invalid guest ram access
+    -EBUSY:  One or more VCPUS are running
 
 
   KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
-- 
cgit v1.2.3


From 9ce91c7234ff477c805c3dec7cf54716193c7a48 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Wed, 8 Feb 2017 06:09:29 +0100
Subject: KVM: arm/arm64: vgic-its: rename itte into ite

The actual abbreviation for the interrupt translation table entry
is ITE. Let's rename all itte instances by ite.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-its.c | 148 +++++++++++++++++++++----------------------
 1 file changed, 74 insertions(+), 74 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 8d1da1af4b09..3ffcbbe97523 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -114,8 +114,8 @@ struct its_collection {
 #define its_is_collection_mapped(coll) ((coll) && \
 				((coll)->target_addr != COLLECTION_NOT_MAPPED))
 
-struct its_itte {
-	struct list_head itte_list;
+struct its_ite {
+	struct list_head ite_list;
 
 	struct vgic_irq *irq;
 	struct its_collection *collection;
@@ -143,27 +143,27 @@ static struct its_device *find_its_device(struct vgic_its *its, u32 device_id)
  * Device ID/Event ID pair on an ITS.
  * Must be called with the its_lock mutex held.
  */
-static struct its_itte *find_itte(struct vgic_its *its, u32 device_id,
+static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
 				  u32 event_id)
 {
 	struct its_device *device;
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 	device = find_its_device(its, device_id);
 	if (device == NULL)
 		return NULL;
 
-	list_for_each_entry(itte, &device->itt_head, itte_list)
-		if (itte->event_id == event_id)
-			return itte;
+	list_for_each_entry(ite, &device->itt_head, ite_list)
+		if (ite->event_id == event_id)
+			return ite;
 
 	return NULL;
 }
 
 /* To be used as an iterator this macro misses the enclosing parentheses */
-#define for_each_lpi_its(dev, itte, its) \
+#define for_each_lpi_its(dev, ite, its) \
 	list_for_each_entry(dev, &(its)->device_list, dev_list) \
-		list_for_each_entry(itte, &(dev)->itt_head, itte_list)
+		list_for_each_entry(ite, &(dev)->itt_head, ite_list)
 
 /*
  * We only implement 48 bits of PA at the moment, although the ITS
@@ -270,18 +270,18 @@ static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr)
  * Needs to be called whenever either the collection for a LPIs has
  * changed or the collection itself got retargeted.
  */
-static void update_affinity_itte(struct kvm *kvm, struct its_itte *itte)
+static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)
 {
 	struct kvm_vcpu *vcpu;
 
-	if (!its_is_collection_mapped(itte->collection))
+	if (!its_is_collection_mapped(ite->collection))
 		return;
 
-	vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
+	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
 
-	spin_lock(&itte->irq->irq_lock);
-	itte->irq->target_vcpu = vcpu;
-	spin_unlock(&itte->irq->irq_lock);
+	spin_lock(&ite->irq->irq_lock);
+	ite->irq->target_vcpu = vcpu;
+	spin_unlock(&ite->irq->irq_lock);
 }
 
 /*
@@ -292,13 +292,13 @@ static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its,
 				       struct its_collection *coll)
 {
 	struct its_device *device;
-	struct its_itte *itte;
+	struct its_ite *ite;
 
-	for_each_lpi_its(device, itte, its) {
-		if (!itte->collection || coll != itte->collection)
+	for_each_lpi_its(device, ite, its) {
+		if (!ite->collection || coll != ite->collection)
 			continue;
 
-		update_affinity_itte(kvm, itte);
+		update_affinity_ite(kvm, ite);
 	}
 }
 
@@ -425,25 +425,25 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
 				u32 devid, u32 eventid)
 {
 	struct kvm_vcpu *vcpu;
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 	if (!its->enabled)
 		return -EBUSY;
 
-	itte = find_itte(its, devid, eventid);
-	if (!itte || !its_is_collection_mapped(itte->collection))
+	ite = find_ite(its, devid, eventid);
+	if (!ite || !its_is_collection_mapped(ite->collection))
 		return E_ITS_INT_UNMAPPED_INTERRUPT;
 
-	vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
+	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
 	if (!vcpu)
 		return E_ITS_INT_UNMAPPED_INTERRUPT;
 
 	if (!vcpu->arch.vgic_cpu.lpis_enabled)
 		return -EBUSY;
 
-	spin_lock(&itte->irq->irq_lock);
-	itte->irq->pending_latch = true;
-	vgic_queue_irq_unlock(kvm, itte->irq);
+	spin_lock(&ite->irq->irq_lock);
+	ite->irq->pending_latch = true;
+	vgic_queue_irq_unlock(kvm, ite->irq);
 
 	return 0;
 }
@@ -511,15 +511,15 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
 }
 
 /* Requires the its_lock to be held. */
-static void its_free_itte(struct kvm *kvm, struct its_itte *itte)
+static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
 {
-	list_del(&itte->itte_list);
+	list_del(&ite->ite_list);
 
 	/* This put matches the get in vgic_add_lpi. */
-	if (itte->irq)
-		vgic_put_irq(kvm, itte->irq);
+	if (ite->irq)
+		vgic_put_irq(kvm, ite->irq);
 
-	kfree(itte);
+	kfree(ite);
 }
 
 static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size)
@@ -544,17 +544,17 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
 {
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	u32 event_id = its_cmd_get_id(its_cmd);
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 
-	itte = find_itte(its, device_id, event_id);
-	if (itte && itte->collection) {
+	ite = find_ite(its, device_id, event_id);
+	if (ite && ite->collection) {
 		/*
 		 * Though the spec talks about removing the pending state, we
 		 * don't bother here since we clear the ITTE anyway and the
 		 * pending state is a property of the ITTE struct.
 		 */
-		its_free_itte(kvm, itte);
+		its_free_ite(kvm, ite);
 		return 0;
 	}
 
@@ -572,26 +572,26 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
 	u32 event_id = its_cmd_get_id(its_cmd);
 	u32 coll_id = its_cmd_get_collection(its_cmd);
 	struct kvm_vcpu *vcpu;
-	struct its_itte *itte;
+	struct its_ite *ite;
 	struct its_collection *collection;
 
-	itte = find_itte(its, device_id, event_id);
-	if (!itte)
+	ite = find_ite(its, device_id, event_id);
+	if (!ite)
 		return E_ITS_MOVI_UNMAPPED_INTERRUPT;
 
-	if (!its_is_collection_mapped(itte->collection))
+	if (!its_is_collection_mapped(ite->collection))
 		return E_ITS_MOVI_UNMAPPED_COLLECTION;
 
 	collection = find_collection(its, coll_id);
 	if (!its_is_collection_mapped(collection))
 		return E_ITS_MOVI_UNMAPPED_COLLECTION;
 
-	itte->collection = collection;
+	ite->collection = collection;
 	vcpu = kvm_get_vcpu(kvm, collection->target_addr);
 
-	spin_lock(&itte->irq->irq_lock);
-	itte->irq->target_vcpu = vcpu;
-	spin_unlock(&itte->irq->irq_lock);
+	spin_lock(&ite->irq->irq_lock);
+	ite->irq->target_vcpu = vcpu;
+	spin_unlock(&ite->irq->irq_lock);
 
 	return 0;
 }
@@ -679,7 +679,7 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
 {
 	struct its_collection *collection;
 	struct its_device *device;
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 	/*
 	 * Clearing the mapping for that collection ID removes the
@@ -690,10 +690,10 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
 	if (!collection)
 		return;
 
-	for_each_lpi_its(device, itte, its)
-		if (itte->collection &&
-		    itte->collection->collection_id == coll_id)
-			itte->collection = NULL;
+	for_each_lpi_its(device, ite, its)
+		if (ite->collection &&
+		    ite->collection->collection_id == coll_id)
+			ite->collection = NULL;
 
 	list_del(&collection->coll_list);
 	kfree(collection);
@@ -709,7 +709,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	u32 event_id = its_cmd_get_id(its_cmd);
 	u32 coll_id = its_cmd_get_collection(its_cmd);
-	struct its_itte *itte;
+	struct its_ite *ite;
 	struct its_device *device;
 	struct its_collection *collection, *new_coll = NULL;
 	int lpi_nr;
@@ -728,7 +728,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 		return E_ITS_MAPTI_PHYSICALID_OOR;
 
 	/* If there is an existing mapping, behavior is UNPREDICTABLE. */
-	if (find_itte(its, device_id, event_id))
+	if (find_ite(its, device_id, event_id))
 		return 0;
 
 	collection = find_collection(its, coll_id);
@@ -739,36 +739,36 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 		new_coll = collection;
 	}
 
-	itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
-	if (!itte) {
+	ite = kzalloc(sizeof(struct its_ite), GFP_KERNEL);
+	if (!ite) {
 		if (new_coll)
 			vgic_its_free_collection(its, coll_id);
 		return -ENOMEM;
 	}
 
-	itte->event_id	= event_id;
-	list_add_tail(&itte->itte_list, &device->itt_head);
+	ite->event_id	= event_id;
+	list_add_tail(&ite->ite_list, &device->itt_head);
 
-	itte->collection = collection;
-	itte->lpi = lpi_nr;
+	ite->collection = collection;
+	ite->lpi = lpi_nr;
 
 	irq = vgic_add_lpi(kvm, lpi_nr);
 	if (IS_ERR(irq)) {
 		if (new_coll)
 			vgic_its_free_collection(its, coll_id);
-		its_free_itte(kvm, itte);
+		its_free_ite(kvm, ite);
 		return PTR_ERR(irq);
 	}
-	itte->irq = irq;
+	ite->irq = irq;
 
-	update_affinity_itte(kvm, itte);
+	update_affinity_ite(kvm, ite);
 
 	/*
 	 * We "cache" the configuration table entries in out struct vgic_irq's.
 	 * However we only have those structs for mapped IRQs, so we read in
 	 * the respective config data from memory here upon mapping the LPI.
 	 */
-	update_lpi_config(kvm, itte->irq, NULL);
+	update_lpi_config(kvm, ite->irq, NULL);
 
 	return 0;
 }
@@ -776,15 +776,15 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 /* Requires the its_lock to be held. */
 static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device)
 {
-	struct its_itte *itte, *temp;
+	struct its_ite *ite, *temp;
 
 	/*
 	 * The spec says that unmapping a device with still valid
 	 * ITTEs associated is UNPREDICTABLE. We remove all ITTEs,
 	 * since we cannot leave the memory unreferenced.
 	 */
-	list_for_each_entry_safe(itte, temp, &device->itt_head, itte_list)
-		its_free_itte(kvm, itte);
+	list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list)
+		its_free_ite(kvm, ite);
 
 	list_del(&device->dev_list);
 	kfree(device);
@@ -883,14 +883,14 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
 {
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	u32 event_id = its_cmd_get_id(its_cmd);
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 
-	itte = find_itte(its, device_id, event_id);
-	if (!itte)
+	ite = find_ite(its, device_id, event_id);
+	if (!ite)
 		return E_ITS_CLEAR_UNMAPPED_INTERRUPT;
 
-	itte->irq->pending_latch = false;
+	ite->irq->pending_latch = false;
 
 	return 0;
 }
@@ -904,14 +904,14 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
 {
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	u32 event_id = its_cmd_get_id(its_cmd);
-	struct its_itte *itte;
+	struct its_ite *ite;
 
 
-	itte = find_itte(its, device_id, event_id);
-	if (!itte)
+	ite = find_ite(its, device_id, event_id);
+	if (!ite)
 		return E_ITS_INV_UNMAPPED_INTERRUPT;
 
-	return update_lpi_config(kvm, itte->irq, NULL);
+	return update_lpi_config(kvm, ite->irq, NULL);
 }
 
 /*
@@ -1435,7 +1435,7 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
 	struct kvm *kvm = kvm_dev->kvm;
 	struct vgic_its *its = kvm_dev->private;
 	struct its_device *dev;
-	struct its_itte *itte;
+	struct its_ite *ite;
 	struct list_head *dev_cur, *dev_temp;
 	struct list_head *cur, *temp;
 
@@ -1450,8 +1450,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
 	list_for_each_safe(dev_cur, dev_temp, &its->device_list) {
 		dev = container_of(dev_cur, struct its_device, dev_list);
 		list_for_each_safe(cur, temp, &dev->itt_head) {
-			itte = (container_of(cur, struct its_itte, itte_list));
-			its_free_itte(kvm, itte);
+			ite = (container_of(cur, struct its_ite, ite_list));
+			its_free_ite(kvm, ite);
 		}
 		list_del(dev_cur);
 		kfree(dev);
-- 
cgit v1.2.3


From 4b7171ac564786247a49eedc59d3060bddf2377c Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Tue, 20 Dec 2016 09:20:00 +0100
Subject: arm/arm64: vgic: turn vgic_find_mmio_region into public

We plan to use vgic_find_mmio_region in vgic-its.c so let's
turn it into a public function.

Also let's take the opportunity to rename the region parameter
into regions to emphasize this latter is an array of regions.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-mmio.c | 11 +++++------
 virt/kvm/arm/vgic/vgic-mmio.h |  5 +++++
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 2a5db1352722..1c17b2a2f105 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -446,13 +446,12 @@ static int match_region(const void *key, const void *elt)
 	return 0;
 }
 
-/* Find the proper register handler entry given a certain address offset. */
-static const struct vgic_register_region *
-vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
-		      unsigned int offset)
+const struct vgic_register_region *
+vgic_find_mmio_region(const struct vgic_register_region *regions,
+		      int nr_regions, unsigned int offset)
 {
-	return bsearch((void *)(uintptr_t)offset, region, nr_regions,
-		       sizeof(region[0]), match_region);
+	return bsearch((void *)(uintptr_t)offset, regions, nr_regions,
+		       sizeof(regions[0]), match_region);
 }
 
 void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 98bb566b660a..6eec91bb1657 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -192,4 +192,9 @@ u64 vgic_sanitise_shareability(u64 reg);
 u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift,
 			u64 (*sanitise_fn)(u64));
 
+/* Find the proper register handler entry given a certain address offset */
+const struct vgic_register_region *
+vgic_find_mmio_region(const struct vgic_register_region *regions,
+		      int nr_regions, unsigned int offset);
+
 #endif
-- 
cgit v1.2.3


From 876ae234cb5e908c12c60562295cd633eac687bb Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Tue, 20 Dec 2016 01:36:35 -0500
Subject: KVM: arm64: vgic-its: KVM_DEV_ARM_VGIC_GRP_ITS_REGS group

The ITS KVM device exposes a new KVM_DEV_ARM_VGIC_GRP_ITS_REGS
group which allows the userspace to save/restore ITS registers.

At this stage the get/set/has operations are not yet implemented.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/uapi/asm/kvm.h   |  1 +
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 virt/kvm/arm/vgic/vgic-its.c      | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index a5838d605e7b..ee183fcd1c82 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -194,6 +194,7 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS	8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index cd6bea495e63..ee4a0ad44359 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -214,6 +214,7 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 3ffcbbe97523..f687e91741f0 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1466,6 +1466,19 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
 	kfree(its);
 }
 
+int vgic_its_has_attr_regs(struct kvm_device *dev,
+			   struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+int vgic_its_attr_regs_access(struct kvm_device *dev,
+			      struct kvm_device_attr *attr,
+			      u64 *reg, bool is_write)
+{
+	return -ENXIO;
+}
+
 static int vgic_its_has_attr(struct kvm_device *dev,
 			     struct kvm_device_attr *attr)
 {
@@ -1482,6 +1495,8 @@ static int vgic_its_has_attr(struct kvm_device *dev,
 			return 0;
 		}
 		break;
+	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS:
+		return vgic_its_has_attr_regs(dev, attr);
 	}
 	return -ENXIO;
 }
@@ -1521,6 +1536,15 @@ static int vgic_its_set_attr(struct kvm_device *dev,
 			return 0;
 		}
 		break;
+	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		u64 reg;
+
+		if (get_user(reg, uaddr))
+			return -EFAULT;
+
+		return vgic_its_attr_regs_access(dev, attr, &reg, true);
+	}
 	}
 	return -ENXIO;
 }
@@ -1541,10 +1565,20 @@ static int vgic_its_get_attr(struct kvm_device *dev,
 		if (copy_to_user(uaddr, &addr, sizeof(addr)))
 			return -EFAULT;
 		break;
+	}
+	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		u64 reg;
+		int ret;
+
+		ret = vgic_its_attr_regs_access(dev, attr, &reg, false);
+		if (ret)
+			return ret;
+		return put_user(reg, uaddr);
+	}
 	default:
 		return -ENXIO;
 	}
-	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From dfc99f85c0a554b2885824654b1fc235bc4f9be9 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 23 Mar 2017 11:51:52 +0100
Subject: KVM: arm/arm64: vgic: expose (un)lock_all_vcpus

We need to use those helpers in vgic-its.c so let's
expose them in the private vgic header.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-kvm-device.c | 4 ++--
 virt/kvm/arm/vgic/vgic.h            | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index d181d2baee9c..859bfa871b30 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -259,13 +259,13 @@ static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
 	}
 }
 
-static void unlock_all_vcpus(struct kvm *kvm)
+void unlock_all_vcpus(struct kvm *kvm)
 {
 	unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
 }
 
 /* Returns true if all vcpus were locked, false otherwise */
-static bool lock_all_vcpus(struct kvm *kvm)
+bool lock_all_vcpus(struct kvm *kvm)
 {
 	struct kvm_vcpu *tmp_vcpu;
 	int c;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 44445dac0835..a48f33b04b2d 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -179,4 +179,7 @@ int vgic_init(struct kvm *kvm);
 int vgic_debug_init(struct kvm *kvm);
 int vgic_debug_destroy(struct kvm *kvm);
 
+bool lock_all_vcpus(struct kvm *kvm);
+void unlock_all_vcpus(struct kvm *kvm);
+
 #endif
-- 
cgit v1.2.3


From 8331c23c28e8b7f70cb5c25366160d900ab127d7 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Tue, 20 Dec 2016 09:33:13 +0100
Subject: KVM: arm64: vgic-its: Implement vgic_its_has_attr_regs and
 attr_regs_access

This patch implements vgic_its_has_attr_regs and vgic_its_attr_regs_access
upon the MMIO framework. VGIC ITS KVM device KVM_DEV_ARM_VGIC_GRP_ITS_REGS
group becomes functional.

At least GITS_CREADR and GITS_IIDR require to differentiate a guest write
action from a user access. As such let's introduce a new uaccess_its_write
vgic_register_region callback.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-its.c  | 79 +++++++++++++++++++++++++++++++++++++++++--
 virt/kvm/arm/vgic/vgic-mmio.h |  9 +++--
 2 files changed, 84 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index f687e91741f0..fd2660d0d694 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1469,14 +1469,89 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
 int vgic_its_has_attr_regs(struct kvm_device *dev,
 			   struct kvm_device_attr *attr)
 {
-	return -ENXIO;
+	const struct vgic_register_region *region;
+	gpa_t offset = attr->attr;
+	int align;
+
+	align = (offset < GITS_TYPER) || (offset >= GITS_PIDR4) ? 0x3 : 0x7;
+
+	if (offset & align)
+		return -EINVAL;
+
+	region = vgic_find_mmio_region(its_registers,
+				       ARRAY_SIZE(its_registers),
+				       offset);
+	if (!region)
+		return -ENXIO;
+
+	return 0;
 }
 
 int vgic_its_attr_regs_access(struct kvm_device *dev,
 			      struct kvm_device_attr *attr,
 			      u64 *reg, bool is_write)
 {
-	return -ENXIO;
+	const struct vgic_register_region *region;
+	struct vgic_its *its;
+	gpa_t addr, offset;
+	unsigned int len;
+	int align, ret = 0;
+
+	its = dev->private;
+	offset = attr->attr;
+
+	/*
+	 * Although the spec supports upper/lower 32-bit accesses to
+	 * 64-bit ITS registers, the userspace ABI requires 64-bit
+	 * accesses to all 64-bit wide registers. We therefore only
+	 * support 32-bit accesses to GITS_CTLR, GITS_IIDR and GITS ID
+	 * registers
+	 */
+	if ((offset < GITS_TYPER) || (offset >= GITS_PIDR4))
+		align = 0x3;
+	else
+		align = 0x7;
+
+	if (offset & align)
+		return -EINVAL;
+
+	mutex_lock(&dev->kvm->lock);
+
+	if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	region = vgic_find_mmio_region(its_registers,
+				       ARRAY_SIZE(its_registers),
+				       offset);
+	if (!region) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (!lock_all_vcpus(dev->kvm)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	addr = its->vgic_its_base + offset;
+
+	len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4;
+
+	if (is_write) {
+		if (region->uaccess_its_write)
+			ret = region->uaccess_its_write(dev->kvm, its, addr,
+							len, *reg);
+		else
+			region->its_write(dev->kvm, its, addr, len, *reg);
+	} else {
+		*reg = region->its_read(dev->kvm, its, addr, len);
+	}
+	unlock_all_vcpus(dev->kvm);
+out:
+	mutex_unlock(&dev->kvm->lock);
+	return ret;
 }
 
 static int vgic_its_has_attr(struct kvm_device *dev,
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 6eec91bb1657..ea4171acdef3 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -36,8 +36,13 @@ struct vgic_register_region {
 	};
 	unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr,
 				      unsigned int len);
-	void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
-			      unsigned int len, unsigned long val);
+	union {
+		void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
+				      unsigned int len, unsigned long val);
+		int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its,
+					 gpa_t addr, unsigned int len,
+					 unsigned long val);
+	};
 };
 
 extern struct kvm_io_device_ops kvm_io_gic_ops;
-- 
cgit v1.2.3


From 0979bfa69421279459b4516542503f1d577d2ec5 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Wed, 4 Jan 2017 11:58:41 +0100
Subject: KVM: arm64: vgic-its: Implement vgic_mmio_uaccess_write_its_creadr

GITS_CREADR needs to be restored so let's implement the associated
uaccess_write_its callback. The write only is allowed if the its
is disabled.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-its.c | 42 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index fd2660d0d694..bf3ff0931572 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1213,6 +1213,33 @@ static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm,
 	return extract_bytes(its->creadr, addr & 0x7, len);
 }
 
+static int vgic_mmio_uaccess_write_its_creadr(struct kvm *kvm,
+					      struct vgic_its *its,
+					      gpa_t addr, unsigned int len,
+					      unsigned long val)
+{
+	u32 cmd_offset;
+	int ret = 0;
+
+	mutex_lock(&its->cmd_lock);
+
+	if (its->enabled) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	cmd_offset = ITS_CMD_OFFSET(val);
+	if (cmd_offset >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	its->creadr = cmd_offset;
+out:
+	mutex_unlock(&its->cmd_lock);
+	return ret;
+}
+
 #define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7)
 static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm,
 					      struct vgic_its *its,
@@ -1317,6 +1344,16 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
 	.its_write = wr,					\
 }
 
+#define REGISTER_ITS_DESC_UACCESS(off, rd, wr, uwr, length, acc)\
+{								\
+	.reg_offset = off,					\
+	.len = length,						\
+	.access_flags = acc,					\
+	.its_read = rd,						\
+	.its_write = wr,					\
+	.uaccess_its_write = uwr,				\
+}
+
 static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its,
 			      gpa_t addr, unsigned int len, unsigned long val)
 {
@@ -1339,8 +1376,9 @@ static struct vgic_register_region its_registers[] = {
 	REGISTER_ITS_DESC(GITS_CWRITER,
 		vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8,
 		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
-	REGISTER_ITS_DESC(GITS_CREADR,
-		vgic_mmio_read_its_creadr, its_mmio_write_wi, 8,
+	REGISTER_ITS_DESC_UACCESS(GITS_CREADR,
+		vgic_mmio_read_its_creadr, its_mmio_write_wi,
+		vgic_mmio_uaccess_write_its_creadr, 8,
 		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
 	REGISTER_ITS_DESC(GITS_BASER,
 		vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40,
-- 
cgit v1.2.3


From 71afe470e20db133b30730cfa856e5d6854312e9 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 13 Apr 2017 09:06:20 +0200
Subject: KVM: arm64: vgic-its: Introduce migration ABI infrastructure

We plan to support different migration ABIs, ie. characterizing
the ITS table layout format in guest RAM. For example, a new ABI
will be needed if vLPIs get supported for nested use case.

So let's introduce an array of supported ABIs (at the moment a single
ABI is supported though). The following characteristics are foreseen
to vary with the ABI: size of table entries, save/restore operation,
the way abi settings are applied.

By default the MAX_ABI_REV is applied on its creation. In subsequent
patches we will introduce a way for the userspace to change the ABI
in use.

The entry sizes now are set according to the ABI version and not
hardcoded anymore.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
---
 include/kvm/arm_vgic.h             |  3 ++
 include/linux/irqchip/arm-gic-v3.h |  5 ++
 virt/kvm/arm/vgic/vgic-its.c       | 93 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 26ed4fb896bb..fabcc649e2ce 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -162,6 +162,9 @@ struct vgic_its {
 	u32			creadr;
 	u32			cwriter;
 
+	/* migration ABI revision in use */
+	u32			abi_rev;
+
 	/* Protects the device and collection lists */
 	struct mutex		its_lock;
 	struct list_head	device_list;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 97cbca19430d..81ebe437ccc3 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -132,6 +132,9 @@
 #define GIC_BASER_SHAREABILITY(reg, type)				\
 	(GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
 
+/* encode a size field of width @w containing @n - 1 units */
+#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
+
 #define GICR_PROPBASER_SHAREABILITY_SHIFT		(10)
 #define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT		(7)
 #define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT		(56)
@@ -232,6 +235,7 @@
 #define GITS_CTLR_QUIESCENT		(1U << 31)
 
 #define GITS_TYPER_PLPIS		(1UL << 0)
+#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT	4
 #define GITS_TYPER_IDBITS_SHIFT		8
 #define GITS_TYPER_DEVBITS_SHIFT	13
 #define GITS_TYPER_DEVBITS(r)		((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
@@ -290,6 +294,7 @@
 #define GITS_BASER_TYPE(r)		(((r) >> GITS_BASER_TYPE_SHIFT) & 7)
 #define GITS_BASER_ENTRY_SIZE_SHIFT		(48)
 #define GITS_BASER_ENTRY_SIZE(r)	((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
+#define GITS_BASER_ENTRY_SIZE_MASK	GENMASK_ULL(52, 48)
 #define GITS_BASER_SHAREABILITY_SHIFT	(10)
 #define GITS_BASER_InnerShareable					\
 	GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index bf3ff0931572..4f6ea46c496c 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -33,6 +33,10 @@
 #include "vgic.h"
 #include "vgic-mmio.h"
 
+static int vgic_its_save_tables_v0(struct vgic_its *its);
+static int vgic_its_restore_tables_v0(struct vgic_its *its);
+static int vgic_its_commit_v0(struct vgic_its *its);
+
 /*
  * Creates a new (reference to a) struct vgic_irq for a given LPI.
  * If this LPI is already mapped on another ITS, we increase its refcount
@@ -123,6 +127,50 @@ struct its_ite {
 	u32 event_id;
 };
 
+/**
+ * struct vgic_its_abi - ITS abi ops and settings
+ * @cte_esz: collection table entry size
+ * @dte_esz: device table entry size
+ * @ite_esz: interrupt translation table entry size
+ * @save tables: save the ITS tables into guest RAM
+ * @restore_tables: restore the ITS internal structs from tables
+ *  stored in guest RAM
+ * @commit: initialize the registers which expose the ABI settings,
+ *  especially the entry sizes
+ */
+struct vgic_its_abi {
+	int cte_esz;
+	int dte_esz;
+	int ite_esz;
+	int (*save_tables)(struct vgic_its *its);
+	int (*restore_tables)(struct vgic_its *its);
+	int (*commit)(struct vgic_its *its);
+};
+
+static const struct vgic_its_abi its_table_abi_versions[] = {
+	[0] = {.cte_esz = 8, .dte_esz = 8, .ite_esz = 8,
+	 .save_tables = vgic_its_save_tables_v0,
+	 .restore_tables = vgic_its_restore_tables_v0,
+	 .commit = vgic_its_commit_v0,
+	},
+};
+
+#define NR_ITS_ABIS	ARRAY_SIZE(its_table_abi_versions)
+
+inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its)
+{
+	return &its_table_abi_versions[its->abi_rev];
+}
+
+int vgic_its_set_abi(struct vgic_its *its, int rev)
+{
+	const struct vgic_its_abi *abi;
+
+	its->abi_rev = rev;
+	abi = vgic_its_get_abi(its);
+	return abi->commit(its);
+}
+
 /*
  * Find and returns a device in the device table for an ITS.
  * Must be called with the its_lock mutex held.
@@ -364,6 +412,7 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
 					      struct vgic_its *its,
 					      gpa_t addr, unsigned int len)
 {
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
 	u64 reg = GITS_TYPER_PLPIS;
 
 	/*
@@ -376,6 +425,7 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
 	 */
 	reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT;
 	reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT;
+	reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT;
 
 	return extract_bytes(reg, addr & 7, len);
 }
@@ -1268,6 +1318,7 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
 				      gpa_t addr, unsigned int len,
 				      unsigned long val)
 {
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
 	u64 entry_size, device_type;
 	u64 reg, *regptr, clearbits = 0;
 
@@ -1278,12 +1329,12 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
 	switch (BASER_INDEX(addr)) {
 	case 0:
 		regptr = &its->baser_device_table;
-		entry_size = 8;
+		entry_size = abi->dte_esz;
 		device_type = GITS_BASER_TYPE_DEVICE;
 		break;
 	case 1:
 		regptr = &its->baser_coll_table;
-		entry_size = 8;
+		entry_size = abi->cte_esz;
 		device_type = GITS_BASER_TYPE_COLLECTION;
 		clearbits = GITS_BASER_INDIRECT;
 		break;
@@ -1425,7 +1476,6 @@ static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
 	(GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)		| \
 	 GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner)		| \
 	 GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)		| \
-	 ((8ULL - 1) << GITS_BASER_ENTRY_SIZE_SHIFT)			| \
 	 GITS_BASER_PAGE_SIZE_64K)
 
 #define INITIAL_PROPBASER_VALUE						  \
@@ -1465,7 +1515,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
 
 	dev->private = its;
 
-	return 0;
+	return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
 }
 
 static void vgic_its_destroy(struct kvm_device *kvm_dev)
@@ -1592,6 +1642,41 @@ out:
 	return ret;
 }
 
+/**
+ * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM
+ * according to v0 ABI
+ */
+static int vgic_its_save_tables_v0(struct vgic_its *its)
+{
+	return -ENXIO;
+}
+
+/**
+ * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM
+ * to internal data structs according to V0 ABI
+ *
+ */
+static int vgic_its_restore_tables_v0(struct vgic_its *its)
+{
+	return -ENXIO;
+}
+
+static int vgic_its_commit_v0(struct vgic_its *its)
+{
+	const struct vgic_its_abi *abi;
+
+	abi = vgic_its_get_abi(its);
+	its->baser_coll_table &= ~GITS_BASER_ENTRY_SIZE_MASK;
+	its->baser_device_table &= ~GITS_BASER_ENTRY_SIZE_MASK;
+
+	its->baser_coll_table |= (GIC_ENCODE_SZ(abi->cte_esz, 5)
+					<< GITS_BASER_ENTRY_SIZE_SHIFT);
+
+	its->baser_device_table |= (GIC_ENCODE_SZ(abi->dte_esz, 5)
+					<< GITS_BASER_ENTRY_SIZE_SHIFT);
+	return 0;
+}
+
 static int vgic_its_has_attr(struct kvm_device *dev,
 			     struct kvm_device_attr *attr)
 {
-- 
cgit v1.2.3


From ab01c6bdacc43c41c6b326889f4358f5afc38bf9 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 23 Mar 2017 15:14:00 +0100
Subject: KVM: arm64: vgic-its: Implement vgic_mmio_uaccess_write_its_iidr

The GITS_IIDR revision field is used to encode the migration ABI
revision. So we need to restore it to check the table layout is
readable by the destination.

By writing the IIDR, userspace thus forces the ABI revision to be
used and this must be less than or equal to the max revision KVM
supports.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
---
 include/linux/irqchip/arm-gic-v3.h |  5 +++++
 virt/kvm/arm/vgic/vgic-its.c       | 23 ++++++++++++++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 81ebe437ccc3..2eaea308f003 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -242,6 +242,11 @@
 #define GITS_TYPER_PTA			(1UL << 19)
 #define GITS_TYPER_HWCOLLCNT_SHIFT	24
 
+#define GITS_IIDR_REV_SHIFT		12
+#define GITS_IIDR_REV_MASK		(0xf << GITS_IIDR_REV_SHIFT)
+#define GITS_IIDR_REV(r)		(((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
+#define GITS_IIDR_PRODUCTID_SHIFT	24
+
 #define GITS_CBASER_VALID			(1ULL << 63)
 #define GITS_CBASER_SHAREABILITY_SHIFT		(10)
 #define GITS_CBASER_INNER_CACHEABILITY_SHIFT	(59)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 4f6ea46c496c..9338efb79a54 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -434,7 +434,23 @@ static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm,
 					     struct vgic_its *its,
 					     gpa_t addr, unsigned int len)
 {
-	return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+	u32 val;
+
+	val = (its->abi_rev << GITS_IIDR_REV_SHIFT) & GITS_IIDR_REV_MASK;
+	val |= (PRODUCT_ID_KVM << GITS_IIDR_PRODUCTID_SHIFT) | IMPLEMENTER_ARM;
+	return val;
+}
+
+static int vgic_mmio_uaccess_write_its_iidr(struct kvm *kvm,
+					    struct vgic_its *its,
+					    gpa_t addr, unsigned int len,
+					    unsigned long val)
+{
+	u32 rev = GITS_IIDR_REV(val);
+
+	if (rev >= NR_ITS_ABIS)
+		return -EINVAL;
+	return vgic_its_set_abi(its, rev);
 }
 
 static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
@@ -1415,8 +1431,9 @@ static struct vgic_register_region its_registers[] = {
 	REGISTER_ITS_DESC(GITS_CTLR,
 		vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_ITS_DESC(GITS_IIDR,
-		vgic_mmio_read_its_iidr, its_mmio_write_wi, 4,
+	REGISTER_ITS_DESC_UACCESS(GITS_IIDR,
+		vgic_mmio_read_its_iidr, its_mmio_write_wi,
+		vgic_mmio_uaccess_write_its_iidr, 4,
 		VGIC_ACCESS_32bit),
 	REGISTER_ITS_DESC(GITS_TYPER,
 		vgic_mmio_read_its_typer, its_mmio_write_wi, 8,
-- 
cgit v1.2.3


From 0d44cdb631ef53ea75be056886cf0541311e48df Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 22 Dec 2016 18:14:14 +0100
Subject: KVM: arm64: vgic-its: Interpret MAPD Size field and check related
 errors

Up to now the MAPD's ITT size field has been ignored. It encodes
the number of eventid bit minus 1. It should be used to check
the eventid when a MAPTI command is issued on a device. Let's
store the number of eventid bits in the its_device and do the
check on MAPTI. Also make sure the ITT size field does
not exceed the GITS_TYPER IDBITS field.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h |  2 ++
 virt/kvm/arm/vgic/vgic-its.c       | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 2eaea308f003..be8bad00c419 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -347,9 +347,11 @@
 #define E_ITS_INT_UNMAPPED_INTERRUPT		0x010307
 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT		0x010507
 #define E_ITS_MAPD_DEVICE_OOR			0x010801
+#define E_ITS_MAPD_ITTSIZE_OOR			0x010802
 #define E_ITS_MAPC_PROCNUM_OOR			0x010902
 #define E_ITS_MAPC_COLLECTION_OOR		0x010903
 #define E_ITS_MAPTI_UNMAPPED_DEVICE		0x010a04
+#define E_ITS_MAPTI_ID_OOR			0x010a05
 #define E_ITS_MAPTI_PHYSICALID_OOR		0x010a06
 #define E_ITS_INV_UNMAPPED_INTERRUPT		0x010c07
 #define E_ITS_INVALL_UNMAPPED_COLLECTION	0x010d09
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 9338efb79a54..031f6abd50fd 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -103,6 +103,7 @@ struct its_device {
 
 	/* the head for the list of ITTEs */
 	struct list_head itt_head;
+	u32 num_eventid_bits;
 	u32 device_id;
 };
 
@@ -224,6 +225,8 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
 
 #define GIC_LPI_OFFSET 8192
 
+#define VITS_TYPER_IDBITS 16
+
 /*
  * Finds and returns a collection in the ITS collection table.
  * Must be called with the its_lock mutex held.
@@ -424,7 +427,7 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
 	 * DevBits low - as least for the time being.
 	 */
 	reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT;
-	reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT;
+	reg |= GIC_ENCODE_SZ(VITS_TYPER_IDBITS, 5) << GITS_TYPER_IDBITS_SHIFT;
 	reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT;
 
 	return extract_bytes(reg, addr & 7, len);
@@ -595,6 +598,7 @@ static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size)
 
 #define its_cmd_get_command(cmd)	its_cmd_mask_field(cmd, 0,  0,  8)
 #define its_cmd_get_deviceid(cmd)	its_cmd_mask_field(cmd, 0, 32, 32)
+#define its_cmd_get_size(cmd)		(its_cmd_mask_field(cmd, 1,  0,  5) + 1)
 #define its_cmd_get_id(cmd)		its_cmd_mask_field(cmd, 1,  0, 32)
 #define its_cmd_get_physical_id(cmd)	its_cmd_mask_field(cmd, 1, 32, 32)
 #define its_cmd_get_collection(cmd)	its_cmd_mask_field(cmd, 2,  0, 16)
@@ -785,6 +789,9 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 	if (!device)
 		return E_ITS_MAPTI_UNMAPPED_DEVICE;
 
+	if (event_id >= BIT_ULL(device->num_eventid_bits))
+		return E_ITS_MAPTI_ID_OOR;
+
 	if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI)
 		lpi_nr = its_cmd_get_physical_id(its_cmd);
 	else
@@ -865,11 +872,15 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
 {
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	bool valid = its_cmd_get_validbit(its_cmd);
+	u8 num_eventid_bits = its_cmd_get_size(its_cmd);
 	struct its_device *device;
 
 	if (!vgic_its_check_id(its, its->baser_device_table, device_id))
 		return E_ITS_MAPD_DEVICE_OOR;
 
+	if (valid && num_eventid_bits > VITS_TYPER_IDBITS)
+		return E_ITS_MAPD_ITTSIZE_OOR;
+
 	device = find_its_device(its, device_id);
 
 	/*
@@ -892,6 +903,8 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
 		return -ENOMEM;
 
 	device->device_id = device_id;
+	device->num_eventid_bits = num_eventid_bits;
+
 	INIT_LIST_HEAD(&device->itt_head);
 
 	list_add_tail(&device->dev_list, &its->device_list);
-- 
cgit v1.2.3


From 7333cefe7b147320debcc64ea41220bdd1dca723 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 2 Feb 2017 13:45:45 +0100
Subject: KVM: arm64: vgic-its: Interpret MAPD ITT_addr field

Up to now the MAPD ITT_addr had been ignored. We will need it
for save/restore. Let's record it in the its_device struct.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 031f6abd50fd..7b95b73061c7 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -104,6 +104,7 @@ struct its_device {
 	/* the head for the list of ITTEs */
 	struct list_head itt_head;
 	u32 num_eventid_bits;
+	gpa_t itt_addr;
 	u32 device_id;
 };
 
@@ -602,6 +603,7 @@ static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size)
 #define its_cmd_get_id(cmd)		its_cmd_mask_field(cmd, 1,  0, 32)
 #define its_cmd_get_physical_id(cmd)	its_cmd_mask_field(cmd, 1, 32, 32)
 #define its_cmd_get_collection(cmd)	its_cmd_mask_field(cmd, 2,  0, 16)
+#define its_cmd_get_ittaddr(cmd)	(its_cmd_mask_field(cmd, 2,  8, 44) << 8)
 #define its_cmd_get_target_addr(cmd)	its_cmd_mask_field(cmd, 2, 16, 32)
 #define its_cmd_get_validbit(cmd)	its_cmd_mask_field(cmd, 2, 63,  1)
 
@@ -873,6 +875,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
 	u32 device_id = its_cmd_get_deviceid(its_cmd);
 	bool valid = its_cmd_get_validbit(its_cmd);
 	u8 num_eventid_bits = its_cmd_get_size(its_cmd);
+	gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd);
 	struct its_device *device;
 
 	if (!vgic_its_check_id(its, its->baser_device_table, device_id))
@@ -904,6 +907,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
 
 	device->device_id = device_id;
 	device->num_eventid_bits = num_eventid_bits;
+	device->itt_addr = itt_addr;
 
 	INIT_LIST_HEAD(&device->itt_head);
 
-- 
cgit v1.2.3


From 07a3e9a7b8d2c5b0fa59b281f4480711bea8fa2b Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 2 Feb 2017 14:37:33 +0100
Subject: KVM: arm64: vgic-its: Check the device id matches TYPER DEVBITS range

On MAPD we currently check the device id can be stored in the device table.
Let's first check it can be encoded within the range defined by TYPER
DEVBITS.

Also check the collection ID belongs to the 16 bit range as GITS_TYPER
CIL field equals to 0.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 7b95b73061c7..bd1362e9c214 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -227,6 +227,7 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
 #define GIC_LPI_OFFSET 8192
 
 #define VITS_TYPER_IDBITS 16
+#define VITS_TYPER_DEVBITS 16
 
 /*
  * Finds and returns a collection in the ITS collection table.
@@ -427,7 +428,7 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
 	 * To avoid memory waste in the guest, we keep the number of IDBits and
 	 * DevBits low - as least for the time being.
 	 */
-	reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT;
+	reg |= GIC_ENCODE_SZ(VITS_TYPER_DEVBITS, 5) << GITS_TYPER_DEVBITS_SHIFT;
 	reg |= GIC_ENCODE_SZ(VITS_TYPER_IDBITS, 5) << GITS_TYPER_IDBITS_SHIFT;
 	reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT;
 
@@ -672,16 +673,30 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
  * Check whether an ID can be stored into the corresponding guest table.
  * For a direct table this is pretty easy, but gets a bit nasty for
  * indirect tables. We check whether the resulting guest physical address
- * is actually valid (covered by a memslot and guest accessbible).
+ * is actually valid (covered by a memslot and guest accessible).
  * For this we have to read the respective first level entry.
  */
-static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
+static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id)
 {
 	int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
+	u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
+	int esz = GITS_BASER_ENTRY_SIZE(baser);
 	int index;
-	u64 indirect_ptr;
 	gfn_t gfn;
-	int esz = GITS_BASER_ENTRY_SIZE(baser);
+
+	switch (type) {
+	case GITS_BASER_TYPE_DEVICE:
+		if (id >= BIT_ULL(VITS_TYPER_DEVBITS))
+			return false;
+		break;
+	case GITS_BASER_TYPE_COLLECTION:
+		/* as GITS_TYPER.CIL == 0, ITS supports 16-bit collection ID */
+		if (id >= BIT_ULL(16))
+			return false;
+		break;
+	default:
+		return false;
+	}
 
 	if (!(baser & GITS_BASER_INDIRECT)) {
 		phys_addr_t addr;
-- 
cgit v1.2.3


From 44de9d683847ba6dbac290bb8c9f1b773cbda746 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 4 May 2017 11:19:52 +0200
Subject: KVM: arm64: vgic-v3: vgic_v3_lpi_sync_pending_status

this new helper synchronizes the irq pending_latch
with the LPI pending bit status found in rdist pending table.
As the status is consumed, we reset the bit in pending table.

As we need the PENDBASER_ADDRESS() in vgic-v3, let's move its
definition in the irqchip header. We restore the full length
of the field, ie [51:16]. Same for PROPBASER_ADDRESS with full
field length of [51:12].

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
---
 include/linux/irqchip/arm-gic-v3.h |  2 ++
 virt/kvm/arm/vgic/vgic-its.c       |  6 ++----
 virt/kvm/arm/vgic/vgic-v3.c        | 44 ++++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/vgic/vgic.h           |  1 +
 4 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index be8bad00c419..fffb91202bc9 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -159,6 +159,8 @@
 #define GICR_PROPBASER_RaWaWb	GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
 
 #define GICR_PROPBASER_IDBITS_MASK			(0x1f)
+#define GICR_PROPBASER_ADDRESS(x)	((x) & GENMASK_ULL(51, 12))
+#define GICR_PENDBASER_ADDRESS(x)	((x) & GENMASK_ULL(51, 16))
 
 #define GICR_PENDBASER_SHAREABILITY_SHIFT		(10)
 #define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT		(7)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index bd1362e9c214..3601790d841e 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -221,8 +221,6 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
  */
 #define BASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 16))
 #define CBASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 12))
-#define PENDBASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 16))
-#define PROPBASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 12))
 
 #define GIC_LPI_OFFSET 8192
 
@@ -257,7 +255,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
 static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
 			     struct kvm_vcpu *filter_vcpu)
 {
-	u64 propbase = PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
+	u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
 	u8 prop;
 	int ret;
 
@@ -369,7 +367,7 @@ static u32 max_lpis_propbaser(u64 propbaser)
  */
 static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
 {
-	gpa_t pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+	gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
 	struct vgic_irq *irq;
 	int last_byte_offset = -1;
 	int ret = 0;
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index df1503650300..9ae8adddff69 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -234,6 +234,50 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
 	vgic_v3->vgic_hcr = ICH_HCR_EN;
 }
 
+int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
+{
+	struct kvm_vcpu *vcpu;
+	int byte_offset, bit_nr;
+	gpa_t pendbase, ptr;
+	bool status;
+	u8 val;
+	int ret;
+
+retry:
+	vcpu = irq->target_vcpu;
+	if (!vcpu)
+		return 0;
+
+	pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+
+	byte_offset = irq->intid / BITS_PER_BYTE;
+	bit_nr = irq->intid % BITS_PER_BYTE;
+	ptr = pendbase + byte_offset;
+
+	ret = kvm_read_guest(kvm, ptr, &val, 1);
+	if (ret)
+		return ret;
+
+	status = val & (1 << bit_nr);
+
+	spin_lock(&irq->irq_lock);
+	if (irq->target_vcpu != vcpu) {
+		spin_unlock(&irq->irq_lock);
+		goto retry;
+	}
+	irq->pending_latch = status;
+	vgic_queue_irq_unlock(vcpu->kvm, irq);
+
+	if (status) {
+		/* clear consumed data */
+		val &= ~(1 << bit_nr);
+		ret = kvm_write_guest(kvm, ptr, &val, 1);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
 /* check for overlapping regions and for regions crossing the end of memory */
 static bool vgic_v3_check_base(struct kvm *kvm)
 {
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index a48f33b04b2d..79768c801863 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -149,6 +149,7 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_enable(struct kvm_vcpu *vcpu);
 int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
+int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
 int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
 
 void vgic_v3_load(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From 06bd5359549d7a0a2759a08f07e3d13a7acc9ecc Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 4 May 2017 11:36:32 +0200
Subject: KVM: arm64: vgic-its: Read config and pending bit in add_lpi()

When creating the lpi we now ask the redistributor what is the state
of the LPI (priority, enabled, pending).

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-its.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 3601790d841e..ffd0a801aeb5 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -36,6 +36,8 @@
 static int vgic_its_save_tables_v0(struct vgic_its *its);
 static int vgic_its_restore_tables_v0(struct vgic_its *its);
 static int vgic_its_commit_v0(struct vgic_its *its);
+static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
+			     struct kvm_vcpu *filter_vcpu);
 
 /*
  * Creates a new (reference to a) struct vgic_irq for a given LPI.
@@ -44,10 +46,12 @@ static int vgic_its_commit_v0(struct vgic_its *its);
  * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq.
  * This function returns a pointer to the _unlocked_ structure.
  */
-static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
+static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
+				     struct kvm_vcpu *vcpu)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq;
+	int ret;
 
 	/* In this case there is no put, since we keep the reference. */
 	if (irq)
@@ -64,6 +68,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
 	irq->config = VGIC_CONFIG_EDGE;
 	kref_init(&irq->refcount);
 	irq->intid = intid;
+	irq->target_vcpu = vcpu;
 
 	spin_lock(&dist->lpi_list_lock);
 
@@ -95,6 +100,19 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
 out_unlock:
 	spin_unlock(&dist->lpi_list_lock);
 
+	/*
+	 * We "cache" the configuration table entries in our struct vgic_irq's.
+	 * However we only have those structs for mapped IRQs, so we read in
+	 * the respective config data from memory here upon mapping the LPI.
+	 */
+	ret = update_lpi_config(kvm, irq, NULL);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = vgic_v3_lpi_sync_pending_status(kvm, irq);
+	if (ret)
+		return ERR_PTR(ret);
+
 	return irq;
 }
 
@@ -795,6 +813,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 	u32 event_id = its_cmd_get_id(its_cmd);
 	u32 coll_id = its_cmd_get_collection(its_cmd);
 	struct its_ite *ite;
+	struct kvm_vcpu *vcpu = NULL;
 	struct its_device *device;
 	struct its_collection *collection, *new_coll = NULL;
 	int lpi_nr;
@@ -840,7 +859,10 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 	ite->collection = collection;
 	ite->lpi = lpi_nr;
 
-	irq = vgic_add_lpi(kvm, lpi_nr);
+	if (its_is_collection_mapped(collection))
+		vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+
+	irq = vgic_add_lpi(kvm, lpi_nr, vcpu);
 	if (IS_ERR(irq)) {
 		if (new_coll)
 			vgic_its_free_collection(its, coll_id);
@@ -849,15 +871,6 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 	}
 	ite->irq = irq;
 
-	update_affinity_ite(kvm, ite);
-
-	/*
-	 * We "cache" the configuration table entries in out struct vgic_irq's.
-	 * However we only have those structs for mapped IRQs, so we read in
-	 * the respective config data from memory here upon mapping the LPI.
-	 */
-	update_lpi_config(kvm, ite->irq, NULL);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 3b65808f4b2914db175a048097956d59ec609e04 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Sat, 24 Dec 2016 18:48:04 +0100
Subject: KVM: arm64: vgic-its: KVM_DEV_ARM_ITS_SAVE/RESTORE_TABLES

Introduce new attributes in KVM_DEV_ARM_VGIC_GRP_CTRL group:
- KVM_DEV_ARM_ITS_SAVE_TABLES: saves the ITS tables into guest RAM
- KVM_DEV_ARM_ITS_RESTORE_TABLES: restores them into VGIC internal
  structures.

We hold the vcpus lock during the save and restore to make
sure no vcpu is running.

At this stage the functionality is not yet implemented. Only
the skeleton is put in place.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
[Given we will move the iodev register until setting the base addr]
Reviewed-by: Christoffer Dall <cdall@linaro.org>
---
 arch/arm/include/uapi/asm/kvm.h   |   4 +-
 arch/arm64/include/uapi/asm/kvm.h |   4 +-
 virt/kvm/arm/vgic/vgic-its.c      | 107 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index ee183fcd1c82..0f7ee15f9fcf 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -201,7 +201,9 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
 #define VGIC_LEVEL_INFO_LINE_LEVEL	0
 
-#define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
+#define   KVM_DEV_ARM_ITS_SAVE_TABLES		1
+#define   KVM_DEV_ARM_ITS_RESTORE_TABLES	2
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index ee4a0ad44359..5dd7be049934 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -221,7 +221,9 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK	0x3ff
 #define VGIC_LEVEL_INFO_LINE_LEVEL	0
 
-#define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
+#define   KVM_DEV_ARM_ITS_SAVE_TABLES           1
+#define   KVM_DEV_ARM_ITS_RESTORE_TABLES        2
 
 /* Device Control API on vcpu fd */
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index ffd0a801aeb5..cb7ae4c53ad9 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1702,13 +1702,72 @@ out:
 	return ret;
 }
 
+/**
+ * vgic_its_save_device_tables - Save the device table and all ITT
+ * into guest RAM
+ */
+static int vgic_its_save_device_tables(struct vgic_its *its)
+{
+	return -ENXIO;
+}
+
+/**
+ * vgic_its_restore_device_tables - Restore the device table and all ITT
+ * from guest RAM to internal data structs
+ */
+static int vgic_its_restore_device_tables(struct vgic_its *its)
+{
+	return -ENXIO;
+}
+
+/**
+ * vgic_its_save_collection_table - Save the collection table into
+ * guest RAM
+ */
+static int vgic_its_save_collection_table(struct vgic_its *its)
+{
+	return -ENXIO;
+}
+
+/**
+ * vgic_its_restore_collection_table - reads the collection table
+ * in guest memory and restores the ITS internal state. Requires the
+ * BASER registers to be restored before.
+ */
+static int vgic_its_restore_collection_table(struct vgic_its *its)
+{
+	return -ENXIO;
+}
+
 /**
  * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM
  * according to v0 ABI
  */
 static int vgic_its_save_tables_v0(struct vgic_its *its)
 {
-	return -ENXIO;
+	struct kvm *kvm = its->dev->kvm;
+	int ret;
+
+	mutex_lock(&kvm->lock);
+	mutex_lock(&its->its_lock);
+
+	if (!lock_all_vcpus(kvm)) {
+		mutex_unlock(&its->its_lock);
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+
+	ret = vgic_its_save_device_tables(its);
+	if (ret)
+		goto out;
+
+	ret = vgic_its_save_collection_table(its);
+
+out:
+	unlock_all_vcpus(kvm);
+	mutex_unlock(&its->its_lock);
+	mutex_unlock(&kvm->lock);
+	return ret;
 }
 
 /**
@@ -1718,7 +1777,37 @@ static int vgic_its_save_tables_v0(struct vgic_its *its)
  */
 static int vgic_its_restore_tables_v0(struct vgic_its *its)
 {
-	return -ENXIO;
+	struct kvm *kvm = its->dev->kvm;
+	int ret;
+
+	mutex_lock(&kvm->lock);
+	mutex_lock(&its->its_lock);
+
+	if (!lock_all_vcpus(kvm)) {
+		mutex_unlock(&its->its_lock);
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+
+	ret = vgic_its_restore_collection_table(its);
+	if (ret)
+		goto out;
+
+	ret = vgic_its_restore_device_tables(its);
+
+out:
+	unlock_all_vcpus(kvm);
+	mutex_unlock(&its->its_lock);
+	mutex_unlock(&kvm->lock);
+
+	if (ret)
+		return ret;
+
+	/*
+	 * On restore path, MSI injections can happen before the
+	 * first VCPU run so let's complete the GIC init here.
+	 */
+	return kvm_vgic_map_resources(its->dev->kvm);
 }
 
 static int vgic_its_commit_v0(struct vgic_its *its)
@@ -1751,6 +1840,10 @@ static int vgic_its_has_attr(struct kvm_device *dev,
 		switch (attr->attr) {
 		case KVM_DEV_ARM_VGIC_CTRL_INIT:
 			return 0;
+		case KVM_DEV_ARM_ITS_SAVE_TABLES:
+			return 0;
+		case KVM_DEV_ARM_ITS_RESTORE_TABLES:
+			return 0;
 		}
 		break;
 	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS:
@@ -1786,14 +1879,20 @@ static int vgic_its_set_attr(struct kvm_device *dev,
 
 		return 0;
 	}
-	case KVM_DEV_ARM_VGIC_GRP_CTRL:
+	case KVM_DEV_ARM_VGIC_GRP_CTRL: {
+		const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+
 		switch (attr->attr) {
 		case KVM_DEV_ARM_VGIC_CTRL_INIT:
 			its->initialized = true;
 
 			return 0;
+		case KVM_DEV_ARM_ITS_SAVE_TABLES:
+			return abi->save_tables(its);
+		case KVM_DEV_ARM_ITS_RESTORE_TABLES:
+			return abi->restore_tables(its);
 		}
-		break;
+	}
 	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
 		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
 		u64 reg;
-- 
cgit v1.2.3


From 528297f560add8b7cac2f401d6aeb5bb3d1ef345 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Sun, 25 Dec 2016 18:57:54 +0100
Subject: KVM: arm64: vgic-its: vgic_its_alloc_ite/device

Add two new helpers to allocate an its ite and an its device.
This will avoid duplication on restore path.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 68 ++++++++++++++++++++++++++++++--------------
 1 file changed, 47 insertions(+), 21 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index cb7ae4c53ad9..737ba3c9e9be 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -802,6 +802,25 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
 	kfree(collection);
 }
 
+/* Must be called with its_lock mutex held */
+static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
+					  struct its_collection *collection,
+					  u32 lpi_id, u32 event_id)
+{
+	struct its_ite *ite;
+
+	ite = kzalloc(sizeof(*ite), GFP_KERNEL);
+	if (!ite)
+		return ERR_PTR(-ENOMEM);
+
+	ite->event_id	= event_id;
+	ite->collection = collection;
+	ite->lpi = lpi_id;
+
+	list_add_tail(&ite->ite_list, &device->itt_head);
+	return ite;
+}
+
 /*
  * The MAPTI and MAPI commands map LPIs to ITTEs.
  * Must be called with its_lock mutex held.
@@ -816,8 +835,8 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 	struct kvm_vcpu *vcpu = NULL;
 	struct its_device *device;
 	struct its_collection *collection, *new_coll = NULL;
-	int lpi_nr;
 	struct vgic_irq *irq;
+	int lpi_nr;
 
 	device = find_its_device(its, device_id);
 	if (!device)
@@ -846,19 +865,13 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 		new_coll = collection;
 	}
 
-	ite = kzalloc(sizeof(struct its_ite), GFP_KERNEL);
-	if (!ite) {
+	ite = vgic_its_alloc_ite(device, collection, lpi_nr, event_id);
+	if (IS_ERR(ite)) {
 		if (new_coll)
 			vgic_its_free_collection(its, coll_id);
-		return -ENOMEM;
+		return PTR_ERR(ite);
 	}
 
-	ite->event_id	= event_id;
-	list_add_tail(&ite->ite_list, &device->itt_head);
-
-	ite->collection = collection;
-	ite->lpi = lpi_nr;
-
 	if (its_is_collection_mapped(collection))
 		vcpu = kvm_get_vcpu(kvm, collection->target_addr);
 
@@ -891,6 +904,26 @@ static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device)
 	kfree(device);
 }
 
+/* Must be called with its_lock mutex held */
+static struct its_device *vgic_its_alloc_device(struct vgic_its *its,
+						u32 device_id, gpa_t itt_addr,
+						u8 num_eventid_bits)
+{
+	struct its_device *device;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device)
+		return ERR_PTR(-ENOMEM);
+
+	device->device_id = device_id;
+	device->itt_addr = itt_addr;
+	device->num_eventid_bits = num_eventid_bits;
+	INIT_LIST_HEAD(&device->itt_head);
+
+	list_add_tail(&device->dev_list, &its->device_list);
+	return device;
+}
+
 /*
  * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs).
  * Must be called with the its_lock mutex held.
@@ -927,17 +960,10 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
 	if (!valid)
 		return 0;
 
-	device = kzalloc(sizeof(struct its_device), GFP_KERNEL);
-	if (!device)
-		return -ENOMEM;
-
-	device->device_id = device_id;
-	device->num_eventid_bits = num_eventid_bits;
-	device->itt_addr = itt_addr;
-
-	INIT_LIST_HEAD(&device->itt_head);
-
-	list_add_tail(&device->dev_list, &its->device_list);
+	device = vgic_its_alloc_device(its, device_id, itt_addr,
+				       num_eventid_bits);
+	if (IS_ERR(device))
+		return PTR_ERR(device);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 920a7a8fa92ae0ec73c4f6d6f15c01f86017f20d Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Wed, 8 Feb 2017 05:20:04 +0100
Subject: KVM: arm64: vgic-its: Add infrastructure for table lookup

Add a generic scan_its_table() helper whose role consists in
scanning a contiguous table located in guest RAM and applying
a callback on each entry. Entries can be handled as linked lists
since the callback may return an id offset to the next entry and
also indicate whether the entry is the last one.

Helper functions also are added to compute the device/event ID
offset to the next DTE/ITE.

compute_next_devid_offset, compute_next_eventid_offset and
scan_table will become static in subsequent patches

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 92 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 737ba3c9e9be..d5c0057196ae 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -244,6 +244,8 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
 
 #define VITS_TYPER_IDBITS 16
 #define VITS_TYPER_DEVBITS 16
+#define VITS_DTE_MAX_DEVID_OFFSET	(BIT(14) - 1)
+#define VITS_ITE_MAX_EVENTID_OFFSET	(BIT(16) - 1)
 
 /*
  * Finds and returns a collection in the ITS collection table.
@@ -1728,6 +1730,96 @@ out:
 	return ret;
 }
 
+u32 compute_next_devid_offset(struct list_head *h, struct its_device *dev)
+{
+	struct its_device *next;
+	u32 next_offset;
+
+	if (list_is_last(&dev->dev_list, h))
+		return 0;
+	next = list_next_entry(dev, dev_list);
+	next_offset = next->device_id - dev->device_id;
+
+	return min_t(u32, next_offset, VITS_DTE_MAX_DEVID_OFFSET);
+}
+
+u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite)
+{
+	struct its_ite *next;
+	u32 next_offset;
+
+	if (list_is_last(&ite->ite_list, h))
+		return 0;
+	next = list_next_entry(ite, ite_list);
+	next_offset = next->event_id - ite->event_id;
+
+	return min_t(u32, next_offset, VITS_ITE_MAX_EVENTID_OFFSET);
+}
+
+/**
+ * entry_fn_t - Callback called on a table entry restore path
+ * @its: its handle
+ * @id: id of the entry
+ * @entry: pointer to the entry
+ * @opaque: pointer to an opaque data
+ *
+ * Return: < 0 on error, 0 if last element was identified, id offset to next
+ * element otherwise
+ */
+typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
+			  void *opaque);
+
+/**
+ * scan_its_table - Scan a contiguous table in guest RAM and applies a function
+ * to each entry
+ *
+ * @its: its handle
+ * @base: base gpa of the table
+ * @size: size of the table in bytes
+ * @esz: entry size in bytes
+ * @start_id: the ID of the first entry in the table
+ * (non zero for 2d level tables)
+ * @fn: function to apply on each entry
+ *
+ * Return: < 0 on error, 0 if last element was identified, 1 otherwise
+ * (the last element may not be found on second level tables)
+ */
+int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
+		   int start_id, entry_fn_t fn, void *opaque)
+{
+	void *entry = kzalloc(esz, GFP_KERNEL);
+	struct kvm *kvm = its->dev->kvm;
+	unsigned long len = size;
+	int id = start_id;
+	gpa_t gpa = base;
+	int ret;
+
+	while (len > 0) {
+		int next_offset;
+		size_t byte_offset;
+
+		ret = kvm_read_guest(kvm, gpa, entry, esz);
+		if (ret)
+			goto out;
+
+		next_offset = fn(its, id, entry, opaque);
+		if (next_offset <= 0) {
+			ret = next_offset;
+			goto out;
+		}
+
+		byte_offset = next_offset * esz;
+		id += next_offset;
+		gpa += byte_offset;
+		len -= byte_offset;
+	}
+	ret =  1;
+
+out:
+	kfree(entry);
+	return ret;
+}
+
 /**
  * vgic_its_save_device_tables - Save the device table and all ITT
  * into guest RAM
-- 
cgit v1.2.3


From ea1ad53e1e31a32dcbeae61b8d3d01cccedcffc5 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Mon, 9 Jan 2017 16:19:41 +0100
Subject: KVM: arm64: vgic-its: Collection table save/restore

The save path copies the collection entries into guest RAM
at the GPA specified in the BASER register. This obviously
requires the BASER to be set. The last written element is a
dummy collection table entry.

We do not index by collection ID as the collection entry
can fit into 8 bytes while containing the collection ID.

On restore path we re-allocate the collection objects.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 100 ++++++++++++++++++++++++++++++++++++++++++-
 virt/kvm/arm/vgic/vgic.h     |   9 ++++
 2 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index d5c0057196ae..5523f0a1cddc 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1838,13 +1838,89 @@ static int vgic_its_restore_device_tables(struct vgic_its *its)
 	return -ENXIO;
 }
 
+static int vgic_its_save_cte(struct vgic_its *its,
+			     struct its_collection *collection,
+			     gpa_t gpa, int esz)
+{
+	u64 val;
+
+	val = (1ULL << KVM_ITS_CTE_VALID_SHIFT |
+	       ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) |
+	       collection->collection_id);
+	val = cpu_to_le64(val);
+	return kvm_write_guest(its->dev->kvm, gpa, &val, esz);
+}
+
+static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
+{
+	struct its_collection *collection;
+	struct kvm *kvm = its->dev->kvm;
+	u32 target_addr, coll_id;
+	u64 val;
+	int ret;
+
+	BUG_ON(esz > sizeof(val));
+	ret = kvm_read_guest(kvm, gpa, &val, esz);
+	if (ret)
+		return ret;
+	val = le64_to_cpu(val);
+	if (!(val & KVM_ITS_CTE_VALID_MASK))
+		return 0;
+
+	target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT);
+	coll_id = val & KVM_ITS_CTE_ICID_MASK;
+
+	if (target_addr >= atomic_read(&kvm->online_vcpus))
+		return -EINVAL;
+
+	collection = find_collection(its, coll_id);
+	if (collection)
+		return -EEXIST;
+	ret = vgic_its_alloc_collection(its, &collection, coll_id);
+	if (ret)
+		return ret;
+	collection->target_addr = target_addr;
+	return 1;
+}
+
 /**
  * vgic_its_save_collection_table - Save the collection table into
  * guest RAM
  */
 static int vgic_its_save_collection_table(struct vgic_its *its)
 {
-	return -ENXIO;
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	struct its_collection *collection;
+	u64 val;
+	gpa_t gpa;
+	size_t max_size, filled = 0;
+	int ret, cte_esz = abi->cte_esz;
+
+	gpa = BASER_ADDRESS(its->baser_coll_table);
+	if (!gpa)
+		return 0;
+
+	max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K;
+
+	list_for_each_entry(collection, &its->collection_list, coll_list) {
+		ret = vgic_its_save_cte(its, collection, gpa, cte_esz);
+		if (ret)
+			return ret;
+		gpa += cte_esz;
+		filled += cte_esz;
+	}
+
+	if (filled == max_size)
+		return 0;
+
+	/*
+	 * table is not fully filled, add a last dummy element
+	 * with valid bit unset
+	 */
+	val = 0;
+	BUG_ON(cte_esz > sizeof(val));
+	ret = kvm_write_guest(its->dev->kvm, gpa, &val, cte_esz);
+	return ret;
 }
 
 /**
@@ -1854,7 +1930,27 @@ static int vgic_its_save_collection_table(struct vgic_its *its)
  */
 static int vgic_its_restore_collection_table(struct vgic_its *its)
 {
-	return -ENXIO;
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	int cte_esz = abi->cte_esz;
+	size_t max_size, read = 0;
+	gpa_t gpa;
+	int ret;
+
+	if (!(its->baser_coll_table & GITS_BASER_VALID))
+		return 0;
+
+	gpa = BASER_ADDRESS(its->baser_coll_table);
+
+	max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K;
+
+	while (read < max_size) {
+		ret = vgic_its_restore_cte(its, gpa, cte_esz);
+		if (ret <= 0)
+			break;
+		gpa += cte_esz;
+		read += cte_esz;
+	}
+	return ret;
 }
 
 /**
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 79768c801863..757de7a2bdc3 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -73,6 +73,15 @@
 				      KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \
 				      KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
 
+/*
+ * As per Documentation/virtual/kvm/devices/arm-vgic-its.txt,
+ * below macros are defined for ITS table entry encoding.
+ */
+#define KVM_ITS_CTE_VALID_SHIFT		63
+#define KVM_ITS_CTE_VALID_MASK		BIT_ULL(63)
+#define KVM_ITS_CTE_RDBASE_SHIFT	16
+#define KVM_ITS_CTE_ICID_MASK		GENMASK_ULL(15, 0)
+
 static inline bool irq_is_pending(struct vgic_irq *irq)
 {
 	if (irq->config == VGIC_CONFIG_EDGE)
-- 
cgit v1.2.3


From dceff7025851405412e1a180ddd0cf79c7cbc1d8 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Tue, 31 Jan 2017 14:36:14 +0100
Subject: KVM: arm64: vgic-its: vgic_its_check_id returns the entry's GPA

As vgic_its_check_id() computes the device/collection entry's
GPA, let's return it so that new callers can retrieve it easily.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Christoffer Dall <cdall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 5523f0a1cddc..90afc838f138 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -694,7 +694,8 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
  * is actually valid (covered by a memslot and guest accessible).
  * For this we have to read the respective first level entry.
  */
-static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id)
+static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
+			      gpa_t *eaddr)
 {
 	int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
 	u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
@@ -725,6 +726,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id)
 		addr = BASER_ADDRESS(baser) + id * esz;
 		gfn = addr >> PAGE_SHIFT;
 
+		if (eaddr)
+			*eaddr = addr;
 		return kvm_is_visible_gfn(its->dev->kvm, gfn);
 	}
 
@@ -757,6 +760,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id)
 	indirect_ptr += index * esz;
 	gfn = indirect_ptr >> PAGE_SHIFT;
 
+	if (eaddr)
+		*eaddr = indirect_ptr;
 	return kvm_is_visible_gfn(its->dev->kvm, gfn);
 }
 
@@ -766,7 +771,7 @@ static int vgic_its_alloc_collection(struct vgic_its *its,
 {
 	struct its_collection *collection;
 
-	if (!vgic_its_check_id(its, its->baser_coll_table, coll_id))
+	if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
 		return E_ITS_MAPC_COLLECTION_OOR;
 
 	collection = kzalloc(sizeof(*collection), GFP_KERNEL);
@@ -939,7 +944,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
 	gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd);
 	struct its_device *device;
 
-	if (!vgic_its_check_id(its, its->baser_device_table, device_id))
+	if (!vgic_its_check_id(its, its->baser_device_table, device_id, NULL))
 		return E_ITS_MAPD_DEVICE_OOR;
 
 	if (valid && num_eventid_bits > VITS_TYPER_IDBITS)
-- 
cgit v1.2.3


From 57a9a117154c93539e33161dd318e6aeb8c04efa Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Mon, 9 Jan 2017 16:27:07 +0100
Subject: KVM: arm64: vgic-its: Device table save/restore

This patch saves the device table entries into guest RAM.
Both flat table and 2 stage tables are supported. DeviceId
indexing is used.

For each device listed in the device table, we also save
the translation table using the vgic_its_save/restore_itt
routines. Those functions will be implemented in a subsequent
patch.

On restore, devices are re-allocated and their itt are
re-built.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-its.c | 194 +++++++++++++++++++++++++++++++++++++++++--
 virt/kvm/arm/vgic/vgic.h     |  10 +++
 2 files changed, 199 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 90afc838f138..3dea626400d1 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/uaccess.h>
+#include <linux/list_sort.h>
 
 #include <linux/irqchip/arm-gic-v3.h>
 
@@ -1735,7 +1736,8 @@ out:
 	return ret;
 }
 
-u32 compute_next_devid_offset(struct list_head *h, struct its_device *dev)
+static u32 compute_next_devid_offset(struct list_head *h,
+				     struct its_device *dev)
 {
 	struct its_device *next;
 	u32 next_offset;
@@ -1789,8 +1791,8 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
  * Return: < 0 on error, 0 if last element was identified, 1 otherwise
  * (the last element may not be found on second level tables)
  */
-int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
-		   int start_id, entry_fn_t fn, void *opaque)
+static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
+			  int start_id, entry_fn_t fn, void *opaque)
 {
 	void *entry = kzalloc(esz, GFP_KERNEL);
 	struct kvm *kvm = its->dev->kvm;
@@ -1825,13 +1827,171 @@ out:
 	return ret;
 }
 
+static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
+{
+	return -ENXIO;
+}
+
+static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev)
+{
+	return -ENXIO;
+}
+
+/**
+ * vgic_its_save_dte - Save a device table entry at a given GPA
+ *
+ * @its: ITS handle
+ * @dev: ITS device
+ * @ptr: GPA
+ */
+static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev,
+			     gpa_t ptr, int dte_esz)
+{
+	struct kvm *kvm = its->dev->kvm;
+	u64 val, itt_addr_field;
+	u32 next_offset;
+
+	itt_addr_field = dev->itt_addr >> 8;
+	next_offset = compute_next_devid_offset(&its->device_list, dev);
+	val = (1ULL << KVM_ITS_DTE_VALID_SHIFT |
+	       ((u64)next_offset << KVM_ITS_DTE_NEXT_SHIFT) |
+	       (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) |
+		(dev->num_eventid_bits - 1));
+	val = cpu_to_le64(val);
+	return kvm_write_guest(kvm, ptr, &val, dte_esz);
+}
+
+/**
+ * vgic_its_restore_dte - restore a device table entry
+ *
+ * @its: its handle
+ * @id: device id the DTE corresponds to
+ * @ptr: kernel VA where the 8 byte DTE is located
+ * @opaque: unused
+ *
+ * Return: < 0 on error, 0 if the dte is the last one, id offset to the
+ * next dte otherwise
+ */
+static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
+				void *ptr, void *opaque)
+{
+	struct its_device *dev;
+	gpa_t itt_addr;
+	u8 num_eventid_bits;
+	u64 entry = *(u64 *)ptr;
+	bool valid;
+	u32 offset;
+	int ret;
+
+	entry = le64_to_cpu(entry);
+
+	valid = entry >> KVM_ITS_DTE_VALID_SHIFT;
+	num_eventid_bits = (entry & KVM_ITS_DTE_SIZE_MASK) + 1;
+	itt_addr = ((entry & KVM_ITS_DTE_ITTADDR_MASK)
+			>> KVM_ITS_DTE_ITTADDR_SHIFT) << 8;
+
+	if (!valid)
+		return 1;
+
+	/* dte entry is valid */
+	offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT;
+
+	dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	ret = vgic_its_restore_itt(its, dev);
+	if (ret)
+		return ret;
+
+	return offset;
+}
+
+static int vgic_its_device_cmp(void *priv, struct list_head *a,
+			       struct list_head *b)
+{
+	struct its_device *deva = container_of(a, struct its_device, dev_list);
+	struct its_device *devb = container_of(b, struct its_device, dev_list);
+
+	if (deva->device_id < devb->device_id)
+		return -1;
+	else
+		return 1;
+}
+
 /**
  * vgic_its_save_device_tables - Save the device table and all ITT
  * into guest RAM
+ *
+ * L1/L2 handling is hidden by vgic_its_check_id() helper which directly
+ * returns the GPA of the device entry
  */
 static int vgic_its_save_device_tables(struct vgic_its *its)
 {
-	return -ENXIO;
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	struct its_device *dev;
+	int dte_esz = abi->dte_esz;
+	u64 baser;
+
+	baser = its->baser_device_table;
+
+	list_sort(NULL, &its->device_list, vgic_its_device_cmp);
+
+	list_for_each_entry(dev, &its->device_list, dev_list) {
+		int ret;
+		gpa_t eaddr;
+
+		if (!vgic_its_check_id(its, baser,
+				       dev->device_id, &eaddr))
+			return -EINVAL;
+
+		ret = vgic_its_save_itt(its, dev);
+		if (ret)
+			return ret;
+
+		ret = vgic_its_save_dte(its, dev, eaddr, dte_esz);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+/**
+ * handle_l1_dte - callback used for L1 device table entries (2 stage case)
+ *
+ * @its: its handle
+ * @id: index of the entry in the L1 table
+ * @addr: kernel VA
+ * @opaque: unused
+ *
+ * L1 table entries are scanned by steps of 1 entry
+ * Return < 0 if error, 0 if last dte was found when scanning the L2
+ * table, +1 otherwise (meaning next L1 entry must be scanned)
+ */
+static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr,
+			 void *opaque)
+{
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	int l2_start_id = id * (SZ_64K / abi->dte_esz);
+	u64 entry = *(u64 *)addr;
+	int dte_esz = abi->dte_esz;
+	gpa_t gpa;
+	int ret;
+
+	entry = le64_to_cpu(entry);
+
+	if (!(entry & KVM_ITS_L1E_VALID_MASK))
+		return 1;
+
+	gpa = entry & KVM_ITS_L1E_ADDR_MASK;
+
+	ret = scan_its_table(its, gpa, SZ_64K, dte_esz,
+			     l2_start_id, vgic_its_restore_dte, NULL);
+
+	if (ret <= 0)
+		return ret;
+
+	return 1;
 }
 
 /**
@@ -1840,7 +2000,31 @@ static int vgic_its_save_device_tables(struct vgic_its *its)
  */
 static int vgic_its_restore_device_tables(struct vgic_its *its)
 {
-	return -ENXIO;
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	u64 baser = its->baser_device_table;
+	int l1_esz, ret;
+	int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
+	gpa_t l1_gpa;
+
+	if (!(baser & GITS_BASER_VALID))
+		return 0;
+
+	l1_gpa = BASER_ADDRESS(baser);
+
+	if (baser & GITS_BASER_INDIRECT) {
+		l1_esz = GITS_LVL1_ENTRY_SIZE;
+		ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0,
+				     handle_l1_dte, NULL);
+	} else {
+		l1_esz = abi->dte_esz;
+		ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0,
+				     vgic_its_restore_dte, NULL);
+	}
+
+	if (ret > 0)
+		ret = -EINVAL;
+
+	return ret;
 }
 
 static int vgic_its_save_cte(struct vgic_its *its,
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 757de7a2bdc3..f35e993883dc 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -81,6 +81,16 @@
 #define KVM_ITS_CTE_VALID_MASK		BIT_ULL(63)
 #define KVM_ITS_CTE_RDBASE_SHIFT	16
 #define KVM_ITS_CTE_ICID_MASK		GENMASK_ULL(15, 0)
+#define KVM_ITS_DTE_VALID_SHIFT		63
+#define KVM_ITS_DTE_VALID_MASK		BIT_ULL(63)
+#define KVM_ITS_DTE_NEXT_SHIFT		49
+#define KVM_ITS_DTE_NEXT_MASK		GENMASK_ULL(62, 49)
+#define KVM_ITS_DTE_ITTADDR_SHIFT	5
+#define KVM_ITS_DTE_ITTADDR_MASK	GENMASK_ULL(48, 5)
+#define KVM_ITS_DTE_SIZE_MASK		GENMASK_ULL(4, 0)
+#define KVM_ITS_L1E_VALID_MASK		BIT_ULL(63)
+/* we only support 64 kB translation table page size */
+#define KVM_ITS_L1E_ADDR_MASK		GENMASK_ULL(51, 16)
 
 static inline bool irq_is_pending(struct vgic_irq *irq)
 {
-- 
cgit v1.2.3


From eff484e0298da5a4d18ca82f5454c557fd942af5 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Wed, 3 May 2017 17:38:01 +0200
Subject: KVM: arm64: vgic-its: ITT save and restore

Implement routines to save and restore device ITT and their
interrupt table entries (ITE).

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-its.c | 116 +++++++++++++++++++++++++++++++++++++++++--
 virt/kvm/arm/vgic/vgic.h     |   4 ++
 2 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 3dea626400d1..adb3d9ea72f7 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1750,7 +1750,7 @@ static u32 compute_next_devid_offset(struct list_head *h,
 	return min_t(u32, next_offset, VITS_DTE_MAX_DEVID_OFFSET);
 }
 
-u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite)
+static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite)
 {
 	struct its_ite *next;
 	u32 next_offset;
@@ -1827,14 +1827,124 @@ out:
 	return ret;
 }
 
+/**
+ * vgic_its_save_ite - Save an interrupt translation entry at @gpa
+ */
+static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
+			      struct its_ite *ite, gpa_t gpa, int ite_esz)
+{
+	struct kvm *kvm = its->dev->kvm;
+	u32 next_offset;
+	u64 val;
+
+	next_offset = compute_next_eventid_offset(&dev->itt_head, ite);
+	val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) |
+	       ((u64)ite->lpi << KVM_ITS_ITE_PINTID_SHIFT) |
+		ite->collection->collection_id;
+	val = cpu_to_le64(val);
+	return kvm_write_guest(kvm, gpa, &val, ite_esz);
+}
+
+/**
+ * vgic_its_restore_ite - restore an interrupt translation entry
+ * @event_id: id used for indexing
+ * @ptr: pointer to the ITE entry
+ * @opaque: pointer to the its_device
+ */
+static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
+				void *ptr, void *opaque)
+{
+	struct its_device *dev = (struct its_device *)opaque;
+	struct its_collection *collection;
+	struct kvm *kvm = its->dev->kvm;
+	struct kvm_vcpu *vcpu = NULL;
+	u64 val;
+	u64 *p = (u64 *)ptr;
+	struct vgic_irq *irq;
+	u32 coll_id, lpi_id;
+	struct its_ite *ite;
+	u32 offset;
+
+	val = *p;
+
+	val = le64_to_cpu(val);
+
+	coll_id = val & KVM_ITS_ITE_ICID_MASK;
+	lpi_id = (val & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT;
+
+	if (!lpi_id)
+		return 1; /* invalid entry, no choice but to scan next entry */
+
+	if (lpi_id < VGIC_MIN_LPI)
+		return -EINVAL;
+
+	offset = val >> KVM_ITS_ITE_NEXT_SHIFT;
+	if (event_id + offset >= BIT_ULL(dev->num_eventid_bits))
+		return -EINVAL;
+
+	collection = find_collection(its, coll_id);
+	if (!collection)
+		return -EINVAL;
+
+	ite = vgic_its_alloc_ite(dev, collection, lpi_id, event_id);
+	if (IS_ERR(ite))
+		return PTR_ERR(ite);
+
+	if (its_is_collection_mapped(collection))
+		vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+
+	irq = vgic_add_lpi(kvm, lpi_id, vcpu);
+	if (IS_ERR(irq))
+		return PTR_ERR(irq);
+	ite->irq = irq;
+
+	return offset;
+}
+
+static int vgic_its_ite_cmp(void *priv, struct list_head *a,
+			    struct list_head *b)
+{
+	struct its_ite *itea = container_of(a, struct its_ite, ite_list);
+	struct its_ite *iteb = container_of(b, struct its_ite, ite_list);
+
+	if (itea->event_id < iteb->event_id)
+		return -1;
+	else
+		return 1;
+}
+
 static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
 {
-	return -ENXIO;
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	gpa_t base = device->itt_addr;
+	struct its_ite *ite;
+	int ret;
+	int ite_esz = abi->ite_esz;
+
+	list_sort(NULL, &device->itt_head, vgic_its_ite_cmp);
+
+	list_for_each_entry(ite, &device->itt_head, ite_list) {
+		gpa_t gpa = base + ite->event_id * ite_esz;
+
+		ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz);
+		if (ret)
+			return ret;
+	}
+	return 0;
 }
 
 static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev)
 {
-	return -ENXIO;
+	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	gpa_t base = dev->itt_addr;
+	int ret;
+	int ite_esz = abi->ite_esz;
+	size_t max_size = BIT_ULL(dev->num_eventid_bits) * ite_esz;
+
+	ret = scan_its_table(its, base, max_size, ite_esz, 0,
+			     vgic_its_restore_ite, dev);
+
+	return ret;
 }
 
 /**
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index f35e993883dc..433449b25c78 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -81,6 +81,10 @@
 #define KVM_ITS_CTE_VALID_MASK		BIT_ULL(63)
 #define KVM_ITS_CTE_RDBASE_SHIFT	16
 #define KVM_ITS_CTE_ICID_MASK		GENMASK_ULL(15, 0)
+#define KVM_ITS_ITE_NEXT_SHIFT		48
+#define KVM_ITS_ITE_PINTID_SHIFT	16
+#define KVM_ITS_ITE_PINTID_MASK		GENMASK_ULL(47, 16)
+#define KVM_ITS_ITE_ICID_MASK		GENMASK_ULL(15, 0)
 #define KVM_ITS_DTE_VALID_SHIFT		63
 #define KVM_ITS_DTE_VALID_MASK		BIT_ULL(63)
 #define KVM_ITS_DTE_NEXT_SHIFT		49
-- 
cgit v1.2.3


From ccb1d791ab9e9d45be15a779e6a42fc661309af1 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Wed, 12 Apr 2017 14:13:27 +0200
Subject: KVM: arm64: vgic-its: Fix pending table sync

In its_sync_lpi_pending_table() we currently ignore the
target_vcpu of the LPIs. We sync the pending bit found in
the vcpu pending table even if the LPI is not targeting it.

Also in vgic_its_cmd_handle_invall() we are supposed to
read the config table data for the LPIs associated to the
collection ID. At the moment we refresh all LPI config
information.

This patch passes a vpcu to vgic_copy_lpi_list() so that
this latter returns a snapshot of the LPIs targeting this
CPU and only those.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index adb3d9ea72f7..9f7105c61ece 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -301,13 +301,13 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
 }
 
 /*
- * Create a snapshot of the current LPI list, so that we can enumerate all
- * LPIs without holding any lock.
- * Returns the array length and puts the kmalloc'ed array into intid_ptr.
+ * Create a snapshot of the current LPIs targeting @vcpu, so that we can
+ * enumerate those LPIs without holding any lock.
+ * Returns their number and puts the kmalloc'ed array into intid_ptr.
  */
-static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr)
+static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
 {
-	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	struct vgic_irq *irq;
 	u32 *intids;
 	int irq_count = dist->lpi_list_count, i = 0;
@@ -326,14 +326,14 @@ static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr)
 	spin_lock(&dist->lpi_list_lock);
 	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
 		/* We don't need to "get" the IRQ, as we hold the list lock. */
-		intids[i] = irq->intid;
-		if (++i == irq_count)
-			break;
+		if (irq->target_vcpu != vcpu)
+			continue;
+		intids[i++] = irq->intid;
 	}
 	spin_unlock(&dist->lpi_list_lock);
 
 	*intid_ptr = intids;
-	return irq_count;
+	return i;
 }
 
 /*
@@ -382,7 +382,7 @@ static u32 max_lpis_propbaser(u64 propbaser)
 }
 
 /*
- * Scan the whole LPI pending table and sync the pending bit in there
+ * Sync the pending table pending bit of LPIs targeting @vcpu
  * with our own data structures. This relies on the LPI being
  * mapped before.
  */
@@ -395,7 +395,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
 	u32 *intids;
 	int nr_irqs, i;
 
-	nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids);
+	nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
 	if (nr_irqs < 0)
 		return nr_irqs;
 
@@ -1081,7 +1081,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
 
 	vcpu = kvm_get_vcpu(kvm, collection->target_addr);
 
-	irq_count = vgic_copy_lpi_list(kvm, &intids);
+	irq_count = vgic_copy_lpi_list(vcpu, &intids);
 	if (irq_count < 0)
 		return irq_count;
 
-- 
cgit v1.2.3


From 280771252c1bae0947215c59fb9ea6a8d9f8399d Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Mon, 9 Jan 2017 16:28:27 +0100
Subject: KVM: arm64: vgic-v3: KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES

This patch adds a new attribute to GICV3 KVM device
KVM_DEV_ARM_VGIC_GRP_CTRL group. This allows userspace to
flush all GICR pending tables into guest RAM.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/uapi/asm/kvm.h     |  1 +
 arch/arm64/include/uapi/asm/kvm.h   |  1 +
 virt/kvm/arm/vgic/vgic-kvm-device.c | 20 +++++++++++++++
 virt/kvm/arm/vgic/vgic-v3.c         | 51 +++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/vgic/vgic.h            |  1 +
 5 files changed, 74 insertions(+)

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 0f7ee15f9fcf..a3fbab56d4a8 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -204,6 +204,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
 #define   KVM_DEV_ARM_ITS_SAVE_TABLES		1
 #define   KVM_DEV_ARM_ITS_RESTORE_TABLES	2
+#define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 5dd7be049934..d46a99d999ec 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -224,6 +224,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
 #define   KVM_DEV_ARM_ITS_SAVE_TABLES           1
 #define   KVM_DEV_ARM_ITS_RESTORE_TABLES        2
+#define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 
 /* Device Control API on vcpu fd */
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index 859bfa871b30..d48743cafedc 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -580,6 +580,24 @@ static int vgic_v3_set_attr(struct kvm_device *dev,
 		reg = tmp32;
 		return vgic_v3_attr_regs_access(dev, attr, &reg, true);
 	}
+	case KVM_DEV_ARM_VGIC_GRP_CTRL: {
+		int ret;
+
+		switch (attr->attr) {
+		case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
+			mutex_lock(&dev->kvm->lock);
+
+			if (!lock_all_vcpus(dev->kvm)) {
+				mutex_unlock(&dev->kvm->lock);
+				return -EBUSY;
+			}
+			ret = vgic_v3_save_pending_tables(dev->kvm);
+			unlock_all_vcpus(dev->kvm);
+			mutex_unlock(&dev->kvm->lock);
+			return ret;
+		}
+		break;
+	}
 	}
 	return -ENXIO;
 }
@@ -658,6 +676,8 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
 		switch (attr->attr) {
 		case KVM_DEV_ARM_VGIC_CTRL_INIT:
 			return 0;
+		case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
+			return 0;
 		}
 	}
 	return -ENXIO;
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 9ae8adddff69..54dee725da18 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -278,6 +278,57 @@ retry:
 	return 0;
 }
 
+/**
+ * vgic_its_save_pending_tables - Save the pending tables into guest RAM
+ * kvm lock and all vcpu lock must be held
+ */
+int vgic_v3_save_pending_tables(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	int last_byte_offset = -1;
+	struct vgic_irq *irq;
+	int ret;
+
+	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+		int byte_offset, bit_nr;
+		struct kvm_vcpu *vcpu;
+		gpa_t pendbase, ptr;
+		bool stored;
+		u8 val;
+
+		vcpu = irq->target_vcpu;
+		if (!vcpu)
+			continue;
+
+		pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+
+		byte_offset = irq->intid / BITS_PER_BYTE;
+		bit_nr = irq->intid % BITS_PER_BYTE;
+		ptr = pendbase + byte_offset;
+
+		if (byte_offset != last_byte_offset) {
+			ret = kvm_read_guest(kvm, ptr, &val, 1);
+			if (ret)
+				return ret;
+			last_byte_offset = byte_offset;
+		}
+
+		stored = val & (1U << bit_nr);
+		if (stored == irq->pending_latch)
+			continue;
+
+		if (irq->pending_latch)
+			val |= 1 << bit_nr;
+		else
+			val &= ~(1 << bit_nr);
+
+		ret = kvm_write_guest(kvm, ptr, &val, 1);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
 /* check for overlapping regions and for regions crossing the end of memory */
 static bool vgic_v3_check_base(struct kvm *kvm)
 {
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 433449b25c78..8259f0a859ab 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -173,6 +173,7 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu);
 int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
+int vgic_v3_save_pending_tables(struct kvm *kvm);
 int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
 
 void vgic_v3_load(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From 2e84611b3f4fa50e1f4c12f2966fcc7fb955d944 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 5 May 2017 13:02:42 -0400
Subject: NFSv4: Fix an rcu lock leak

The intention in the original patch was to release the lock when
we put the inode, however something got screwed up.

Reported-by: Jason Yan <yanaijie@huawei.com>
Fixes: 7b410d9ce460f ("pNFS: Delay getting the layout header in..")
Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_proc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index e7f041447afd..52479f180ea1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -131,10 +131,11 @@ restart:
 			if (!inode)
 				continue;
 			if (!nfs_sb_active(inode->i_sb)) {
-				rcu_read_lock();
+				rcu_read_unlock();
 				spin_unlock(&clp->cl_lock);
 				iput(inode);
 				spin_lock(&clp->cl_lock);
+				rcu_read_lock();
 				goto restart;
 			}
 			return inode;
@@ -170,10 +171,11 @@ restart:
 			if (!inode)
 				continue;
 			if (!nfs_sb_active(inode->i_sb)) {
-				rcu_read_lock();
+				rcu_read_unlock();
 				spin_unlock(&clp->cl_lock);
 				iput(inode);
 				spin_lock(&clp->cl_lock);
+				rcu_read_lock();
 				goto restart;
 			}
 			return inode;
-- 
cgit v1.2.3


From 28cf22d0ba283deccc30b71de5f34d222cf9aa4c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 6 May 2017 12:19:11 -0400
Subject: NFSv4: Fix exclusive create attributes encoding

When using NFS4_CREATE_EXCLUSIVE4_1 mode, the client will overestimate the
amount of space that it needs for the attributes because it does so
before checking whether or not the server supports a given attribute.

Fix by checking the attribute mask earlier.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4xdr.c | 75 ++++++++++++++++++++++++++------------------------------
 1 file changed, 35 insertions(+), 40 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dbfe48ac3529..3aebfdc82b30 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1000,8 +1000,9 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
 
 static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 				const struct nfs4_label *label,
+				const umode_t *umask,
 				const struct nfs_server *server,
-				bool excl_check, const umode_t *umask)
+				const uint32_t attrmask[])
 {
 	char owner_name[IDMAP_NAMESZ];
 	char owner_group[IDMAP_NAMESZ];
@@ -1016,22 +1017,20 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 	/*
 	 * We reserve enough space to write the entire attribute buffer at once.
 	 */
-	if (iap->ia_valid & ATTR_SIZE) {
+	if ((iap->ia_valid & ATTR_SIZE) && (attrmask[0] & FATTR4_WORD0_SIZE)) {
 		bmval[0] |= FATTR4_WORD0_SIZE;
 		len += 8;
 	}
-	if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
-		umask = NULL;
 	if (iap->ia_valid & ATTR_MODE) {
-		if (umask) {
+		if (umask && (attrmask[2] & FATTR4_WORD2_MODE_UMASK)) {
 			bmval[2] |= FATTR4_WORD2_MODE_UMASK;
 			len += 8;
-		} else {
+		} else if (attrmask[1] & FATTR4_WORD1_MODE) {
 			bmval[1] |= FATTR4_WORD1_MODE;
 			len += 4;
 		}
 	}
-	if (iap->ia_valid & ATTR_UID) {
+	if ((iap->ia_valid & ATTR_UID) && (attrmask[1] & FATTR4_WORD1_OWNER)) {
 		owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
 		if (owner_namelen < 0) {
 			dprintk("nfs: couldn't resolve uid %d to string\n",
@@ -1044,7 +1043,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 		bmval[1] |= FATTR4_WORD1_OWNER;
 		len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
 	}
-	if (iap->ia_valid & ATTR_GID) {
+	if ((iap->ia_valid & ATTR_GID) &&
+	   (attrmask[1] & FATTR4_WORD1_OWNER_GROUP)) {
 		owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
 		if (owner_grouplen < 0) {
 			dprintk("nfs: couldn't resolve gid %d to string\n",
@@ -1056,32 +1056,26 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 		bmval[1] |= FATTR4_WORD1_OWNER_GROUP;
 		len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
 	}
-	if (iap->ia_valid & ATTR_ATIME_SET) {
-		bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
-		len += 16;
-	} else if (iap->ia_valid & ATTR_ATIME) {
-		bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
-		len += 4;
-	}
-	if (iap->ia_valid & ATTR_MTIME_SET) {
-		bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
-		len += 16;
-	} else if (iap->ia_valid & ATTR_MTIME) {
-		bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
-		len += 4;
+	if (attrmask[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
+		if (iap->ia_valid & ATTR_ATIME_SET) {
+			bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
+			len += 16;
+		} else if (iap->ia_valid & ATTR_ATIME) {
+			bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
+			len += 4;
+		}
 	}
-
-	if (excl_check) {
-		const u32 *excl_bmval = server->exclcreat_bitmask;
-		bmval[0] &= excl_bmval[0];
-		bmval[1] &= excl_bmval[1];
-		bmval[2] &= excl_bmval[2];
-
-		if (!(excl_bmval[2] & FATTR4_WORD2_SECURITY_LABEL))
-			label = NULL;
+	if (attrmask[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
+		if (iap->ia_valid & ATTR_MTIME_SET) {
+			bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
+			len += 16;
+		} else if (iap->ia_valid & ATTR_MTIME) {
+			bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
+			len += 4;
+		}
 	}
 
-	if (label) {
+	if (label && (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL)) {
 		len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
 		bmval[2] |= FATTR4_WORD2_SECURITY_LABEL;
 	}
@@ -1188,8 +1182,8 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
 	}
 
 	encode_string(xdr, create->name->len, create->name->name);
-	encode_attrs(xdr, create->attrs, create->label, create->server, false,
-		     &create->umask);
+	encode_attrs(xdr, create->attrs, create->label, &create->umask,
+			create->server, create->server->attr_bitmask);
 }
 
 static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr)
@@ -1409,13 +1403,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
 	switch(arg->createmode) {
 	case NFS4_CREATE_UNCHECKED:
 		*p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
-		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false,
-			     &arg->umask);
+		encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
+				arg->server, arg->server->attr_bitmask);
 		break;
 	case NFS4_CREATE_GUARDED:
 		*p = cpu_to_be32(NFS4_CREATE_GUARDED);
-		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false,
-			     &arg->umask);
+		encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
+				arg->server, arg->server->attr_bitmask);
 		break;
 	case NFS4_CREATE_EXCLUSIVE:
 		*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
@@ -1424,8 +1418,8 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
 	case NFS4_CREATE_EXCLUSIVE4_1:
 		*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1);
 		encode_nfs4_verifier(xdr, &arg->u.verifier);
-		encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true,
-			     &arg->umask);
+		encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
+				arg->server, arg->server->exclcreat_bitmask);
 	}
 }
 
@@ -1681,7 +1675,8 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
 {
 	encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr);
 	encode_nfs4_stateid(xdr, &arg->stateid);
-	encode_attrs(xdr, arg->iap, arg->label, server, false, NULL);
+	encode_attrs(xdr, arg->iap, arg->label, NULL, server,
+			server->attr_bitmask);
 }
 
 static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
-- 
cgit v1.2.3


From ebd768579552a10682c2cbdfa657779dea62a01d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 8 May 2017 11:44:40 +0100
Subject: blk-mq: make __blk_mq_stop_hw_queues static

Making __blk_mq_stop_hw_queues static fixes sparse warning:

  block/blk-mq.c:6: warning: symbol '__blk_mq_stop_hw_queues' was not
  declared. Should it be static?

Fixes: 2719aa217e0d0 ("blk-mq: don't use sync workqueue flushing from drivers")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a7e6b353e4e9..9ae1d9ccf4df 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1236,7 +1236,7 @@ void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_stop_hw_queue);
 
-void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
+static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
-- 
cgit v1.2.3


From 3013c4983eb15f4ce8958e81922cdfd80f771d3e Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 4 May 2017 23:10:56 +0300
Subject: aquantia: Fix "ethtool -S" crash when adapter down.

This patch fixes the crash that happens when driver tries to collect statistics
from already released "aq_vec" object.
If adapter is in "down" state we still allow user to see statistics from HW.

V2: fixed braces around "aq_vec_free".

Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code")
Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Tested-by: David Arcari <darcari@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index cdb02991f249..9ee1c5016784 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -755,7 +755,7 @@ void aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
 	count = 0U;
 
 	for (i = 0U, aq_vec = self->aq_vec[0];
-		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) {
+		aq_vec && self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) {
 		data += count;
 		aq_vec_get_sw_stats(aq_vec, data, &count);
 	}
@@ -959,8 +959,10 @@ void aq_nic_free_hot_resources(struct aq_nic_s *self)
 		goto err_exit;
 
 	for (i = AQ_DIMOF(self->aq_vec); i--;) {
-		if (self->aq_vec[i])
+		if (self->aq_vec[i]) {
 			aq_vec_free(self->aq_vec[i]);
+			self->aq_vec[i] = NULL;
+		}
 	}
 
 err_exit:;
-- 
cgit v1.2.3


From 74d71a01abef37f71d914f2105a4cb8712a2beb8 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Sat, 6 May 2017 06:14:43 +0200
Subject: device-dax: Tell kbuild DEV_DAX_PMEM depends on DEV_DAX

ERROR: "devm_create_dev_dax" [drivers/dax/dax_pmem.ko] undefined!
ERROR: "alloc_dax_region" [drivers/dax/dax_pmem.ko] undefined!
ERROR: "dax_region_put" [drivers/dax/dax_pmem.ko] undefined!

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index b7053eafd88e..6b7e20eae16c 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -19,7 +19,7 @@ config DEV_DAX
 
 config DEV_DAX_PMEM
 	tristate "PMEM DAX: direct access to persistent memory"
-	depends on LIBNVDIMM && NVDIMM_DAX
+	depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
 	default DEV_DAX
 	help
 	  Support raw access to persistent memory.  Note that this
-- 
cgit v1.2.3


From ef51042472f55b325fd7f2b26a2e29fd89757234 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 8 May 2017 10:55:27 -0700
Subject: block, dax: move "select DAX" from BLOCK to FS_DAX

For configurations that do not enable DAX filesystems or drivers, do not
require the DAX core to be built.

Given that the 'direct_access' method has been removed from
'block_device_operations', we can also go ahead and remove the
block-related dax helper functions from fs/block_dev.c to
drivers/dax/super.c. This keeps dax details out of the block layer and
lets the DAX core be built as a module in the FS_DAX=n case.

Filesystems need to include dax.h to call bdev_dax_supported().

Cc: linux-xfs@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.com>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 block/Kconfig          |  1 -
 drivers/dax/super.c    | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/Kconfig             |  1 +
 fs/block_dev.c         | 66 -----------------------------------------------
 fs/ext2/super.c        |  1 +
 fs/ext4/super.c        |  1 +
 fs/xfs/xfs_super.c     |  1 +
 include/linux/blkdev.h |  2 --
 include/linux/dax.h    | 30 ++++++++++++++++++++--
 9 files changed, 102 insertions(+), 71 deletions(-)

diff --git a/block/Kconfig b/block/Kconfig
index 93da7fc3f254..e9f780f815f5 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -6,7 +6,6 @@ menuconfig BLOCK
        default y
        select SBITMAP
        select SRCU
-       select DAX
        help
 	 Provide block layer support for the kernel.
 
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 465dcd7317d5..e8998d15f3cd 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/magic.h>
+#include <linux/genhd.h>
 #include <linux/cdev.h>
 #include <linux/hash.h>
 #include <linux/slab.h>
@@ -47,6 +48,75 @@ void dax_read_unlock(int id)
 }
 EXPORT_SYMBOL_GPL(dax_read_unlock);
 
+int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
+		pgoff_t *pgoff)
+{
+	phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
+
+	if (pgoff)
+		*pgoff = PHYS_PFN(phys_off);
+	if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(bdev_dax_pgoff);
+
+/**
+ * __bdev_dax_supported() - Check if the device supports dax for filesystem
+ * @sb: The superblock of the device
+ * @blocksize: The block size of the device
+ *
+ * This is a library function for filesystems to check if the block device
+ * can be mounted with dax option.
+ *
+ * Return: negative errno if unsupported, 0 if supported.
+ */
+int __bdev_dax_supported(struct super_block *sb, int blocksize)
+{
+	struct block_device *bdev = sb->s_bdev;
+	struct dax_device *dax_dev;
+	pgoff_t pgoff;
+	int err, id;
+	void *kaddr;
+	pfn_t pfn;
+	long len;
+
+	if (blocksize != PAGE_SIZE) {
+		pr_err("VFS (%s): error: unsupported blocksize for dax\n",
+				sb->s_id);
+		return -EINVAL;
+	}
+
+	err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
+	if (err) {
+		pr_err("VFS (%s): error: unaligned partition for dax\n",
+				sb->s_id);
+		return err;
+	}
+
+	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+	if (!dax_dev) {
+		pr_err("VFS (%s): error: device does not support dax\n",
+				sb->s_id);
+		return -EOPNOTSUPP;
+	}
+
+	id = dax_read_lock();
+	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
+	dax_read_unlock(id);
+
+	put_dax(dax_dev);
+
+	if (len < 1) {
+		pr_err("VFS (%s): error: dax access failed (%ld)",
+				sb->s_id, len);
+		return len < 0 ? len : -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__bdev_dax_supported);
+
 /**
  * struct dax_device - anchor object for dax services
  * @inode: core vfs
diff --git a/fs/Kconfig b/fs/Kconfig
index 83eab52fb3f6..b0e42b6a96b9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -39,6 +39,7 @@ config FS_DAX
 	depends on MMU
 	depends on !(ARM || MIPS || SPARC)
 	select FS_IOMAP
+	select DAX
 	help
 	  Direct Access (DAX) can be used on memory-backed block devices.
 	  If the block device supports DAX and the filesystem supports DAX,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 666367e13711..3096ecd48304 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -718,72 +718,6 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 }
 EXPORT_SYMBOL_GPL(bdev_write_page);
 
-int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
-		pgoff_t *pgoff)
-{
-	phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
-
-	if (pgoff)
-		*pgoff = PHYS_PFN(phys_off);
-	if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
-		return -EINVAL;
-	return 0;
-}
-EXPORT_SYMBOL(bdev_dax_pgoff);
-
-/**
- * bdev_dax_supported() - Check if the device supports dax for filesystem
- * @sb: The superblock of the device
- * @blocksize: The block size of the device
- *
- * This is a library function for filesystems to check if the block device
- * can be mounted with dax option.
- *
- * Return: negative errno if unsupported, 0 if supported.
- */
-int bdev_dax_supported(struct super_block *sb, int blocksize)
-{
-	struct block_device *bdev = sb->s_bdev;
-	struct dax_device *dax_dev;
-	pgoff_t pgoff;
-	int err, id;
-	void *kaddr;
-	pfn_t pfn;
-	long len;
-
-	if (blocksize != PAGE_SIZE) {
-		vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax");
-		return -EINVAL;
-	}
-
-	err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
-	if (err) {
-		vfs_msg(sb, KERN_ERR, "error: unaligned partition for dax");
-		return err;
-	}
-
-	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
-	if (!dax_dev) {
-		vfs_msg(sb, KERN_ERR, "error: device does not support dax");
-		return -EOPNOTSUPP;
-	}
-
-	id = dax_read_lock();
-	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
-	dax_read_unlock(id);
-
-	put_dax(dax_dev);
-
-	if (len < 1) {
-		vfs_msg(sb, KERN_ERR,
-				"error: dax access failed (%ld)", len);
-		return len < 0 ? len : -EIO;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bdev_dax_supported);
-
 /*
  * pseudo-fs
  */
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9e25a71fe1a2..d07773b81da9 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -32,6 +32,7 @@
 #include <linux/log2.h>
 #include <linux/quotaops.h>
 #include <linux/uaccess.h>
+#include <linux/dax.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a9448db1cf7e..bf6bb8997124 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,6 +37,7 @@
 #include <linux/ctype.h>
 #include <linux/log2.h>
 #include <linux/crc16.h>
+#include <linux/dax.h>
 #include <linux/cleancache.h>
 #include <linux/uaccess.h>
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 685c042a120f..f5c58d6dcafb 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -52,6 +52,7 @@
 #include "xfs_reflink.h"
 
 #include <linux/namei.h>
+#include <linux/dax.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 848f87eb1905..e4d9899755a7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1940,8 +1940,6 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
 extern int bdev_read_page(struct block_device *, sector_t, struct page *);
 extern int bdev_write_page(struct block_device *, sector_t, struct page *,
 						struct writeback_control *);
-extern int bdev_dax_supported(struct super_block *, int);
-int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #else /* CONFIG_BLOCK */
 
 struct block_device;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index d3158e74a59e..7fdf1d710042 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -18,12 +18,38 @@ struct dax_operations {
 			void **, pfn_t *);
 };
 
+int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
+#if IS_ENABLED(CONFIG_FS_DAX)
+int __bdev_dax_supported(struct super_block *sb, int blocksize);
+static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
+{
+	return __bdev_dax_supported(sb, blocksize);
+}
+#else
+static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_DAX)
+struct dax_device *dax_get_by_host(const char *host);
+void put_dax(struct dax_device *dax_dev);
+#else
+static inline struct dax_device *dax_get_by_host(const char *host)
+{
+	return NULL;
+}
+
+static inline void put_dax(struct dax_device *dax_dev)
+{
+}
+#endif
+
 int dax_read_lock(void);
 void dax_read_unlock(int id);
-struct dax_device *dax_get_by_host(const char *host);
 struct dax_device *alloc_dax(void *private, const char *host,
 		const struct dax_operations *ops);
-void put_dax(struct dax_device *dax_dev);
 bool dax_alive(struct dax_device *dax_dev);
 void kill_dax(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
-- 
cgit v1.2.3


From 82486aa6f1b9bc8145e6d0fa2bc0b44307f3b875 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 4 May 2017 14:54:17 -0700
Subject: ipv4: restore rt->fi for reference counting

IPv4 dst could use fi->fib_metrics to store metrics but fib_info
itself is refcnt'ed, so without taking a refcnt fi and
fi->fib_metrics could be freed while dst metrics still points to
it. This triggers use-after-free as reported by Andrey twice.

This patch reverts commit 2860583fe840 ("ipv4: Kill rt->fi") to
restore this reference counting. It is a quick fix for -net and
-stable, for -net-next, as Eric suggested, we can consider doing
reference counting for metrics itself instead of relying on fib_info.

IPv6 is very different, it copies or steals the metrics from mx6_config
in fib6_commit_metrics() so probably doesn't need a refcnt.

Decnet has already done the refcnt'ing, see dn_fib_semantic_match().

Fixes: 2860583fe840 ("ipv4: Kill rt->fi")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h |  1 +
 net/ipv4/route.c    | 18 +++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/net/route.h b/include/net/route.h
index 2cc0e14c6359..4335eb72a04c 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -69,6 +69,7 @@ struct rtable {
 
 	struct list_head	rt_uncached;
 	struct uncached_list	*rt_uncached_list;
+	struct fib_info		*fi; /* for refcnt to shared metrics */
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 655d9eebe43e..f647310f8e4d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1387,6 +1387,11 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 {
 	struct rtable *rt = (struct rtable *) dst;
 
+	if (rt->fi) {
+		fib_info_put(rt->fi);
+		rt->fi = NULL;
+	}
+
 	if (!list_empty(&rt->rt_uncached)) {
 		struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1424,6 +1429,16 @@ static bool rt_cache_valid(const struct rtable *rt)
 		!rt_is_expired(rt);
 }
 
+static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
+{
+	if (fi->fib_metrics != (u32 *)dst_default_metrics) {
+		fib_info_hold(fi);
+		rt->fi = fi;
+	}
+
+	dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+}
+
 static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			   const struct fib_result *res,
 			   struct fib_nh_exception *fnhe,
@@ -1438,7 +1453,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			rt->rt_gateway = nh->nh_gw;
 			rt->rt_uses_gateway = 1;
 		}
-		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+		rt_init_metrics(rt, fi);
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
 #endif
@@ -1490,6 +1505,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_gateway = 0;
 		rt->rt_uses_gateway = 0;
 		rt->rt_table_id = 0;
+		rt->fi = NULL;
 		INIT_LIST_HEAD(&rt->rt_uncached);
 
 		rt->dst.output = ip_output;
-- 
cgit v1.2.3


From 1b1fc3fddabfb8739ef2c8f04e05a9858b42c1f7 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Fri, 5 May 2017 12:53:23 -0700
Subject: tcp: make congestion control optionally skip slow start after idle

Congestion control modules that want full control over congestion
control behavior do not want the cwnd modifications controlled by
the sysctl_tcp_slow_start_after_idle code path.
So skip those code paths for CC modules that use the cong_control()
API.
As an example, those cwnd effects are not desired for the BBR congestion
control algorithm.

Fixes: c0402760f565 ("tcp: new CC hook to set sending rate with rate_sample in any CA state")
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     | 4 +++-
 net/ipv4/tcp_output.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8c0e5a901d64..38a7427ae902 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1234,10 +1234,12 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta);
 
 static inline void tcp_slow_start_after_idle_check(struct sock *sk)
 {
+	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
 	struct tcp_sock *tp = tcp_sk(sk);
 	s32 delta;
 
-	if (!sysctl_tcp_slow_start_after_idle || tp->packets_out)
+	if (!sysctl_tcp_slow_start_after_idle || tp->packets_out ||
+	    ca_ops->cong_control)
 		return;
 	delta = tcp_time_stamp - tp->lsndtime;
 	if (delta > inet_csk(sk)->icsk_rto)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 60111a0fc201..4858e190f6ac 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1514,6 +1514,7 @@ static void tcp_cwnd_application_limited(struct sock *sk)
 
 static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
 {
+	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Track the maximum number of outstanding packets in each
@@ -1536,7 +1537,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
 			tp->snd_cwnd_used = tp->packets_out;
 
 		if (sysctl_tcp_slow_start_after_idle &&
-		    (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
+		    (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
+		    !ca_ops->cong_control)
 			tcp_cwnd_application_limited(sk);
 
 		/* The following conditions together indicate the starvation
-- 
cgit v1.2.3


From 8403debeead8e8b296d7a7db2174af524695ab42 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Fri, 5 May 2017 16:17:41 -0400
Subject: vlan: Keep NETIF_F_HW_CSUM similar to other software devices

Vlan devices, like all other software devices, enable
NETIF_F_HW_CSUM feature.  However, unlike all the othe other
software devices, vlans will switch to using IP|IPV6_CSUM
features, if the underlying devices uses them.  In these situations,
checksum offload features on the vlan device can't be controlled
via ethtool.

This patch makes vlans keep HW_CSUM feature if the underlying
device supports checksum offloading.  This makes vlan devices
behave like other software devices, and restores control to the
user.

A side-effect is that some offload settings (typically UFO)
may be enabled on the vlan device while being disabled on the HW.
However, the GSO code will correctly process the packets. This
actually results in slightly better raw throughput.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 9ee5787634e5..953b6728bd00 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -626,11 +626,18 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
 {
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	netdev_features_t old_features = features;
+	netdev_features_t lower_features;
 
-	features = netdev_intersect_features(features, real_dev->vlan_features);
-	features |= NETIF_F_RXCSUM;
-	features = netdev_intersect_features(features, real_dev->features);
+	lower_features = netdev_intersect_features((real_dev->vlan_features |
+						    NETIF_F_RXCSUM),
+						   real_dev->features);
 
+	/* Add HW_CSUM setting to preserve user ability to control
+	 * checksum offload on the vlan device.
+	 */
+	if (lower_features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+		lower_features |= NETIF_F_HW_CSUM;
+	features = netdev_intersect_features(features, lower_features);
 	features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE);
 	features |= NETIF_F_LLTX;
 
-- 
cgit v1.2.3


From 9e4eb1ce472fbf7b007f23c88ec11c37265e401c Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 May 2017 15:25:32 -0700
Subject: bna: Avoid reading past end of buffer

Using memcpy() from a string that is shorter than the length copied means
the destination buffer is being filled with arbitrary data from the kernel
rodata segment. Instead, use strncpy() which will fill the trailing bytes
with zeros.

This was found with the future CONFIG_FORTIFY_SOURCE feature.

Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/brocade/bna/bfa_ioc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
index 0f6811860ad5..a36e38676640 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
@@ -2845,7 +2845,7 @@ bfa_ioc_get_adapter_optrom_ver(struct bfa_ioc *ioc, char *optrom_ver)
 static void
 bfa_ioc_get_adapter_manufacturer(struct bfa_ioc *ioc, char *manufacturer)
 {
-	memcpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN);
+	strncpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN);
 }
 
 static void
-- 
cgit v1.2.3


From 4dc69c1c1fff2f587f8e737e70b4a4e7565a5c94 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 May 2017 15:30:23 -0700
Subject: bna: ethtool: Avoid reading past end of buffer

Using memcpy() from a string that is shorter than the length copied means
the destination buffer is being filled with arbitrary data from the kernel
rodata segment. Instead, use strncpy() which will fill the trailing bytes
with zeros.

This was found with the future CONFIG_FORTIFY_SOURCE feature.

Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/brocade/bna/bnad_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index 286593922139..31032de5843b 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
@@ -547,8 +547,8 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string)
 		for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) {
 			BUG_ON(!(strlen(bnad_net_stats_strings[i]) <
 				   ETH_GSTRING_LEN));
-			memcpy(string, bnad_net_stats_strings[i],
-			       ETH_GSTRING_LEN);
+			strncpy(string, bnad_net_stats_strings[i],
+				ETH_GSTRING_LEN);
 			string += ETH_GSTRING_LEN;
 		}
 		bmap = bna_tx_rid_mask(&bnad->bna);
-- 
cgit v1.2.3


From df5303a8aa9a0a6934f4cea7427f1edf771f21c2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 May 2017 15:34:34 -0700
Subject: qlge: Avoid reading past end of buffer

Using memcpy() from a string that is shorter than the length copied means
the destination buffer is being filled with arbitrary data from the kernel
rodata segment. Instead, use strncpy() which will fill the trailing bytes
with zeros.

This was found with the future CONFIG_FORTIFY_SOURCE feature.

Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlge/qlge_dbg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
index 829be21f97b2..28ea0af89aef 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
@@ -765,7 +765,7 @@ int ql_core_dump(struct ql_adapter *qdev, struct ql_mpi_coredump *mpi_coredump)
 		sizeof(struct mpi_coredump_global_header);
 	mpi_coredump->mpi_global_header.imageSize =
 		sizeof(struct ql_mpi_coredump);
-	memcpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump",
+	strncpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump",
 		sizeof(mpi_coredump->mpi_global_header.idString));
 
 	/* Get generic NIC reg dump */
@@ -1255,7 +1255,7 @@ static void ql_gen_reg_dump(struct ql_adapter *qdev,
 		sizeof(struct mpi_coredump_global_header);
 	mpi_coredump->mpi_global_header.imageSize =
 		sizeof(struct ql_reg_dump);
-	memcpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump",
+	strncpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump",
 		sizeof(mpi_coredump->mpi_global_header.idString));
 
 
-- 
cgit v1.2.3


From ac45bd93a5035c2f39c9862b8b6ed692db0fdc87 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 6 May 2017 03:49:01 +0300
Subject: bnxt_en: allocate enough space for ->ntp_fltr_bmap

We have the number of longs, but we need to calculate the number of
bytes required.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b3ba66032980..b56c54d68d5e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3000,7 +3000,8 @@ static int bnxt_alloc_ntp_fltrs(struct bnxt *bp)
 		INIT_HLIST_HEAD(&bp->ntp_fltr_hash_tbl[i]);
 
 	bp->ntp_fltr_count = 0;
-	bp->ntp_fltr_bmap = kzalloc(BITS_TO_LONGS(BNXT_NTP_FLTR_MAX_FLTR),
+	bp->ntp_fltr_bmap = kcalloc(BITS_TO_LONGS(BNXT_NTP_FLTR_MAX_FLTR),
+				    sizeof(long),
 				    GFP_KERNEL);
 
 	if (!bp->ntp_fltr_bmap)
-- 
cgit v1.2.3


From 435009d4064af72b283fdd47025cede4dfefc6d2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 10 Jan 2017 14:56:42 +0200
Subject: ubi: Fix section mismatch

WARNING: vmlinux.o(.text+0x1f2a80): Section mismatch in reference from the variable __param_ops_mtd to the function .init.text:ubi_mtd_param_parse()
The function __param_ops_mtd() references
the function __init ubi_mtd_param_parse().
This is often because __param_ops_mtd lacks a __init
annotation or the annotation of ubi_mtd_param_parse is wrong.

Cc: Richard Weinberger <richard@nod.at>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/build.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 77513195f50e..5b167339e87e 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -74,10 +74,10 @@ struct mtd_dev_param {
 };
 
 /* Numbers of elements set in the @mtd_dev_param array */
-static int __initdata mtd_devs;
+static int mtd_devs;
 
 /* MTD devices specification parameters */
-static struct mtd_dev_param __initdata mtd_dev_param[UBI_MAX_DEVICES];
+static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
 #ifdef CONFIG_MTD_UBI_FASTMAP
 /* UBI module parameter to enable fastmap automatically on non-fastmap images */
 static bool fm_autoconvert;
@@ -1351,7 +1351,7 @@ module_exit(ubi_exit);
  * This function returns positive resulting integer in case of success and a
  * negative error code in case of failure.
  */
-static int __init bytes_str_to_int(const char *str)
+static int bytes_str_to_int(const char *str)
 {
 	char *endp;
 	unsigned long result;
@@ -1389,7 +1389,7 @@ static int __init bytes_str_to_int(const char *str)
  * This function returns zero in case of success and a negative error code in
  * case of error.
  */
-static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
+static int ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
 {
 	int i, len;
 	struct mtd_dev_param *p;
-- 
cgit v1.2.3


From 997d30cb7490eb1ac37a3fb02a222fabf1f25fa9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 10 Jan 2017 14:56:43 +0200
Subject: ubi: Make mtd parameter readable

Fix permissions to allow read mtd parameter back (only for owner).

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/build.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 5b167339e87e..d1594a1b72e6 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1470,7 +1470,7 @@ static int ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
 	return 0;
 }
 
-module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000);
+module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 0400);
 MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: mtd=<name|num|path>[,<vid_hdr_offs>[,max_beb_per1024[,ubi_num]]].\n"
 		      "Multiple \"mtd\" parameters may be specified.\n"
 		      "MTD devices may be specified by their number, name, or path to the MTD character device node.\n"
-- 
cgit v1.2.3


From 8326c1eec2449f0e868f7b19a5fa7bfa0386ab48 Mon Sep 17 00:00:00 2001
From: Hyunchul Lee <cheol.lee@lge.com>
Date: Fri, 3 Mar 2017 16:44:03 +0900
Subject: ubifs: Add CONFIG_UBIFS_FS_SECURITY to disable/enable security labels

When write syscall is called, every time security label is searched to
determine that file's privileges should be changed.
If LSM(Linux Security Model) is not used, this is useless.

So introduce CONFIG_UBIFS_SECURITY to disable security labels. it's default
value is "y".

Signed-off-by: Hyunchul Lee <cheol.lee@lge.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/Kconfig | 13 +++++++++++++
 fs/ubifs/ubifs.h | 14 ++++++++++++--
 fs/ubifs/xattr.c |  6 ++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index b0d0623c83ed..83a961bf7280 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -61,3 +61,16 @@ config UBIFS_FS_ENCRYPTION
 	  feature is similar to ecryptfs, but it is more memory
 	  efficient since it avoids caching the encrypted and
 	  decrypted pages in the page cache.
+
+config UBIFS_FS_SECURITY
+	bool "UBIFS Security Labels"
+	depends on UBIFS_FS
+	default y
+	help
+	  Security labels provide an access control facility to support Linux
+	  Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
+	  Linux. This option enables an extended attribute handler for file
+	  security labels in the ubifs filesystem, so that it requires enabling
+	  the extended attribute support in advance.
+
+	  If you are not using a security module, say N.
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 4d57e488038e..abdd11634ba4 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1756,13 +1756,23 @@ int ubifs_check_dir_empty(struct inode *dir);
 /* xattr.c */
 extern const struct xattr_handler *ubifs_xattr_handlers[];
 ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
-int ubifs_init_security(struct inode *dentry, struct inode *inode,
-			const struct qstr *qstr);
 int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
 		    size_t size, int flags);
 ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
 			size_t size);
 
+#ifdef CONFIG_UBIFS_FS_SECURITY
+extern int ubifs_init_security(struct inode *dentry, struct inode *inode,
+			const struct qstr *qstr);
+#else
+static inline int ubifs_init_security(struct inode *dentry,
+			struct inode *inode, const struct qstr *qstr)
+{
+	return 0;
+}
+#endif
+
+
 /* super.c */
 struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
 
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index efe00fcb8b75..de88732c680c 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -559,6 +559,7 @@ out_free:
 	return err;
 }
 
+#ifdef CONFIG_UBIFS_FS_SECURITY
 static int init_xattrs(struct inode *inode, const struct xattr *xattr_array,
 		      void *fs_info)
 {
@@ -599,6 +600,7 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode,
 	}
 	return err;
 }
+#endif
 
 static int xattr_get(const struct xattr_handler *handler,
 			   struct dentry *dentry, struct inode *inode,
@@ -639,15 +641,19 @@ static const struct xattr_handler ubifs_trusted_xattr_handler = {
 	.set = xattr_set,
 };
 
+#ifdef CONFIG_UBIFS_FS_SECURITY
 static const struct xattr_handler ubifs_security_xattr_handler = {
 	.prefix = XATTR_SECURITY_PREFIX,
 	.get = xattr_get,
 	.set = xattr_set,
 };
+#endif
 
 const struct xattr_handler *ubifs_xattr_handlers[] = {
 	&ubifs_user_xattr_handler,
 	&ubifs_trusted_xattr_handler,
+#ifdef CONFIG_UBIFS_FS_SECURITY
 	&ubifs_security_xattr_handler,
+#endif
 	NULL
 };
-- 
cgit v1.2.3


From 8a1435880f452430b41374d27ac4a33e7bd381ea Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabinv@axis.com>
Date: Mon, 3 Apr 2017 13:44:11 +0200
Subject: ubi: fastmap: Fix slab corruption

Booting with UBI fastmap and SLUB debugging enabled results in the
following splats.  The problem is that ubi_scan_fastmap() moves the
fastmap blocks from the scan_ai (allocated in scan_fast()) to the ai
allocated in ubi_attach().  This results in two problems:

 - When the scan_ai is freed, aebs which were allocated from its slab
   cache are still in use.

 - When the other ai is being destroyed in destroy_ai(), the
   arguments to kmem_cache_free() call are incorrect since aebs on its
   ->fastmap list were allocated with a slab cache from a differnt ai.

Fix this by making a copy of the aebs in ubi_scan_fastmap() instead of
moving them.

 =============================================================================
 BUG ubi_aeb_slab_cache (Not tainted): Objects remaining in ubi_aeb_slab_cache on __kmem_cache_shutdown()
 -----------------------------------------------------------------------------

 INFO: Slab 0xbfd2da3c objects=17 used=1 fp=0xb33d7748 flags=0x40000080
 CPU: 1 PID: 118 Comm: ubiattach Tainted: G    B           4.9.15 #3
 [<80111910>] (unwind_backtrace) from [<8010d498>] (show_stack+0x18/0x1c)
 [<8010d498>] (show_stack) from [<804a3274>] (dump_stack+0xb4/0xe0)
 [<804a3274>] (dump_stack) from [<8026c47c>] (slab_err+0x78/0x88)
 [<8026c47c>] (slab_err) from [<802735bc>] (__kmem_cache_shutdown+0x180/0x3e0)
 [<802735bc>] (__kmem_cache_shutdown) from [<8024e13c>] (shutdown_cache+0x1c/0x60)
 [<8024e13c>] (shutdown_cache) from [<8024ed64>] (kmem_cache_destroy+0x19c/0x20c)
 [<8024ed64>] (kmem_cache_destroy) from [<8057cc14>] (destroy_ai+0x1dc/0x1e8)
 [<8057cc14>] (destroy_ai) from [<8057f04c>] (ubi_attach+0x3f4/0x450)
 [<8057f04c>] (ubi_attach) from [<8056fe70>] (ubi_attach_mtd_dev+0x60c/0xff8)
 [<8056fe70>] (ubi_attach_mtd_dev) from [<80571d78>] (ctrl_cdev_ioctl+0x110/0x2b8)
 [<80571d78>] (ctrl_cdev_ioctl) from [<8029c77c>] (do_vfs_ioctl+0xac/0xa00)
 [<8029c77c>] (do_vfs_ioctl) from [<8029d10c>] (SyS_ioctl+0x3c/0x64)
 [<8029d10c>] (SyS_ioctl) from [<80108860>] (ret_fast_syscall+0x0/0x1c)
 INFO: Object 0xb33d7e88 @offset=3720
 INFO: Allocated in scan_peb+0x608/0x81c age=72 cpu=1 pid=118
 	kmem_cache_alloc+0x3b0/0x43c
 	scan_peb+0x608/0x81c
 	ubi_attach+0x124/0x450
 	ubi_attach_mtd_dev+0x60c/0xff8
 	ctrl_cdev_ioctl+0x110/0x2b8
 	do_vfs_ioctl+0xac/0xa00
 	SyS_ioctl+0x3c/0x64
 	ret_fast_syscall+0x0/0x1c
 kmem_cache_destroy ubi_aeb_slab_cache: Slab cache still has objects
 CPU: 1 PID: 118 Comm: ubiattach Tainted: G    B           4.9.15 #3
 [<80111910>] (unwind_backtrace) from [<8010d498>] (show_stack+0x18/0x1c)
 [<8010d498>] (show_stack) from [<804a3274>] (dump_stack+0xb4/0xe0)
 [<804a3274>] (dump_stack) from [<8024ed80>] (kmem_cache_destroy+0x1b8/0x20c)
 [<8024ed80>] (kmem_cache_destroy) from [<8057cc14>] (destroy_ai+0x1dc/0x1e8)
 [<8057cc14>] (destroy_ai) from [<8057f04c>] (ubi_attach+0x3f4/0x450)
 [<8057f04c>] (ubi_attach) from [<8056fe70>] (ubi_attach_mtd_dev+0x60c/0xff8)
 [<8056fe70>] (ubi_attach_mtd_dev) from [<80571d78>] (ctrl_cdev_ioctl+0x110/0x2b8)
 [<80571d78>] (ctrl_cdev_ioctl) from [<8029c77c>] (do_vfs_ioctl+0xac/0xa00)
 [<8029c77c>] (do_vfs_ioctl) from [<8029d10c>] (SyS_ioctl+0x3c/0x64)
 [<8029d10c>] (SyS_ioctl) from [<80108860>] (ret_fast_syscall+0x0/0x1c)
 cache_from_obj: Wrong slab cache. ubi_aeb_slab_cache but object is from ubi_aeb_slab_cache
 ------------[ cut here ]------------
 WARNING: CPU: 1 PID: 118 at mm/slab.h:354 kmem_cache_free+0x39c/0x450
 Modules linked in:
 CPU: 1 PID: 118 Comm: ubiattach Tainted: G    B           4.9.15 #3
 [<80111910>] (unwind_backtrace) from [<8010d498>] (show_stack+0x18/0x1c)
 [<8010d498>] (show_stack) from [<804a3274>] (dump_stack+0xb4/0xe0)
 [<804a3274>] (dump_stack) from [<80120e40>] (__warn+0xf4/0x10c)
 [<80120e40>] (__warn) from [<80120f20>] (warn_slowpath_null+0x28/0x30)
 [<80120f20>] (warn_slowpath_null) from [<80271fe0>] (kmem_cache_free+0x39c/0x450)
 [<80271fe0>] (kmem_cache_free) from [<8057cb88>] (destroy_ai+0x150/0x1e8)
 [<8057cb88>] (destroy_ai) from [<8057ef1c>] (ubi_attach+0x2c4/0x450)
 [<8057ef1c>] (ubi_attach) from [<8056fe70>] (ubi_attach_mtd_dev+0x60c/0xff8)
 [<8056fe70>] (ubi_attach_mtd_dev) from [<80571d78>] (ctrl_cdev_ioctl+0x110/0x2b8)
 [<80571d78>] (ctrl_cdev_ioctl) from [<8029c77c>] (do_vfs_ioctl+0xac/0xa00)
 [<8029c77c>] (do_vfs_ioctl) from [<8029d10c>] (SyS_ioctl+0x3c/0x64)
 [<8029d10c>] (SyS_ioctl) from [<80108860>] (ret_fast_syscall+0x0/0x1c)
 ---[ end trace 2bd8396277fd0a0b ]---
 =============================================================================
 BUG ubi_aeb_slab_cache (Tainted: G    B   W      ): page slab pointer corrupt.
 -----------------------------------------------------------------------------

 INFO: Allocated in scan_peb+0x608/0x81c age=104 cpu=1 pid=118
 	kmem_cache_alloc+0x3b0/0x43c
 	scan_peb+0x608/0x81c
 	ubi_attach+0x124/0x450
 	ubi_attach_mtd_dev+0x60c/0xff8
 	ctrl_cdev_ioctl+0x110/0x2b8
 	do_vfs_ioctl+0xac/0xa00
 	SyS_ioctl+0x3c/0x64
 	ret_fast_syscall+0x0/0x1c
 INFO: Slab 0xbfd2da3c objects=17 used=1 fp=0xb33d7748 flags=0x40000081
 INFO: Object 0xb33d7e88 @offset=3720 fp=0xb33d7da0

 Redzone b33d7e80: cc cc cc cc cc cc cc cc                          ........
 Object b33d7e88: 02 00 00 00 01 00 00 00 00 f0 ff 7f ff ff ff ff  ................
 Object b33d7e98: 00 00 00 00 00 00 00 00 bd 16 00 00 00 00 00 00  ................
 Object b33d7ea8: 00 01 00 00 00 02 00 00 00 00 00 00 00 00 00 00  ................
 Redzone b33d7eb8: cc cc cc cc                                      ....
 Padding b33d7f60: 5a 5a 5a 5a 5a 5a 5a 5a                          ZZZZZZZZ
 CPU: 1 PID: 118 Comm: ubiattach Tainted: G    B   W       4.9.15 #3
 [<80111910>] (unwind_backtrace) from [<8010d498>] (show_stack+0x18/0x1c)
 [<8010d498>] (show_stack) from [<804a3274>] (dump_stack+0xb4/0xe0)
 [<804a3274>] (dump_stack) from [<80271770>] (free_debug_processing+0x320/0x3c4)
 [<80271770>] (free_debug_processing) from [<80271ad0>] (__slab_free+0x2bc/0x430)
 [<80271ad0>] (__slab_free) from [<80272024>] (kmem_cache_free+0x3e0/0x450)
 [<80272024>] (kmem_cache_free) from [<8057cb88>] (destroy_ai+0x150/0x1e8)
 [<8057cb88>] (destroy_ai) from [<8057ef1c>] (ubi_attach+0x2c4/0x450)
 [<8057ef1c>] (ubi_attach) from [<8056fe70>] (ubi_attach_mtd_dev+0x60c/0xff8)
 [<8056fe70>] (ubi_attach_mtd_dev) from [<80571d78>] (ctrl_cdev_ioctl+0x110/0x2b8)
 [<80571d78>] (ctrl_cdev_ioctl) from [<8029c77c>] (do_vfs_ioctl+0xac/0xa00)
 [<8029c77c>] (do_vfs_ioctl) from [<8029d10c>] (SyS_ioctl+0x3c/0x64)
 [<8029d10c>] (SyS_ioctl) from [<80108860>] (ret_fast_syscall+0x0/0x1c)
 FIX ubi_aeb_slab_cache: Object at 0xb33d7e88 not freed

Signed-off-by: Rabin Vincent <rabinv@axis.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/fastmap.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index c1f5c29e458e..b44c8d348e78 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -828,6 +828,24 @@ static int find_fm_anchor(struct ubi_attach_info *ai)
 	return ret;
 }
 
+static struct ubi_ainf_peb *clone_aeb(struct ubi_attach_info *ai,
+				      struct ubi_ainf_peb *old)
+{
+	struct ubi_ainf_peb *new;
+
+	new = ubi_alloc_aeb(ai, old->pnum, old->ec);
+	if (!new)
+		return NULL;
+
+	new->vol_id = old->vol_id;
+	new->sqnum = old->sqnum;
+	new->lnum = old->lnum;
+	new->scrub = old->scrub;
+	new->copy_flag = old->copy_flag;
+
+	return new;
+}
+
 /**
  * ubi_scan_fastmap - scan the fastmap.
  * @ubi: UBI device object
@@ -847,7 +865,7 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
 	struct ubi_vid_hdr *vh;
 	struct ubi_ec_hdr *ech;
 	struct ubi_fastmap_layout *fm;
-	struct ubi_ainf_peb *tmp_aeb, *aeb;
+	struct ubi_ainf_peb *aeb;
 	int i, used_blocks, pnum, fm_anchor, ret = 0;
 	size_t fm_size;
 	__be32 crc, tmp_crc;
@@ -857,9 +875,16 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
 	if (fm_anchor < 0)
 		return UBI_NO_FASTMAP;
 
-	/* Move all (possible) fastmap blocks into our new attach structure. */
-	list_for_each_entry_safe(aeb, tmp_aeb, &scan_ai->fastmap, u.list)
-		list_move_tail(&aeb->u.list, &ai->fastmap);
+	/* Copy all (possible) fastmap blocks into our new attach structure. */
+	list_for_each_entry(aeb, &scan_ai->fastmap, u.list) {
+		struct ubi_ainf_peb *new;
+
+		new = clone_aeb(ai, aeb);
+		if (!new)
+			return -ENOMEM;
+
+		list_add(&new->u.list, &ai->fastmap);
+	}
 
 	down_write(&ubi->fm_protect);
 	memset(ubi->fm_buf, 0, ubi->fm_size);
-- 
cgit v1.2.3


From 6a258f7d0fbd9f5e93099018741a7e64dd3a4578 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 8 Apr 2017 00:22:03 +0100
Subject: ubifs: Fix cut and paste error on sb type comparisons

The check for the bad node type of sb->type is checking sa->type
and not sb-type. This looks like a cut and paste error. Fix this.

Detected by PVS-Studio, warning: V581

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/debug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 718b749fa11a..7cd8a7b95299 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2391,8 +2391,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
 			ubifs_dump_node(c, sa->node);
 			return -EINVAL;
 		}
-		if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
-		    sa->type != UBIFS_XENT_NODE) {
+		if (sb->type != UBIFS_INO_NODE && sb->type != UBIFS_DENT_NODE &&
+		    sb->type != UBIFS_XENT_NODE) {
 			ubifs_err(c, "bad node type %d", sb->type);
 			ubifs_dump_node(c, sb->node);
 			return -EINVAL;
-- 
cgit v1.2.3


From 2a068daf57424b98b3c9de8da8bbf47ca360a618 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sun, 16 Apr 2017 19:22:43 -0700
Subject: ubifs: Remove unnecessary assignment

Assigning a value of a variable to itself is not useful.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/recovery.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 586d59347fff..3af4472061cc 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -442,7 +442,6 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
 {
 	int empty_offs, pad_len;
 
-	lnum = lnum;
 	dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
 
 	ubifs_assert(!(*offs & 7));
-- 
cgit v1.2.3


From 798868c021016c1ee1825a3fbf10ae64ecc64c8e Mon Sep 17 00:00:00 2001
From: Rock Lee <rockdotlee@gmail.com>
Date: Thu, 13 Apr 2017 23:16:06 -0700
Subject: ubifs: Fix a typo in comment of ioctl2ubifs & ubifs2ioctl

Change 'convert' to 'converts'
Change 'UBIFS' to 'UBIFS inode flags'

Signed-off-by: Rock Lee <rockdotlee@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/ioctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index da519ba205f6..79ae0039676d 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -53,7 +53,7 @@ void ubifs_set_inode_flags(struct inode *inode)
  * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags.
  * @ioctl_flags: flags to convert
  *
- * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags
+ * This function converts ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags
  * (@UBIFS_COMPR_FL, etc).
  */
 static int ioctl2ubifs(int ioctl_flags)
@@ -78,8 +78,8 @@ static int ioctl2ubifs(int ioctl_flags)
  * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags.
  * @ubifs_flags: flags to convert
  *
- * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags
- * (@FS_COMPR_FL, etc).
+ * This function converts UBIFS inode flags (@UBIFS_COMPR_FL, etc) to ioctl
+ * flags (@FS_COMPR_FL, etc).
  */
 static int ubifs2ioctl(int ubifs_flags)
 {
-- 
cgit v1.2.3


From 7bccd12d27b7e358823feb5429731b8ee698b173 Mon Sep 17 00:00:00 2001
From: Ben Shelton <ben.shelton@ni.com>
Date: Mon, 27 Mar 2017 13:24:44 -0500
Subject: ubi: Add debugfs file for tracking PEB state

Add a file under debugfs to allow easy access to the erase count for
each physical erase block on an UBI device.  This is useful when
debugging data integrity issues with UBIFS on NAND flash devices.

Signed-off-by: Ben Shelton <ben.shelton@ni.com>
Signed-off-by: Zach Brown <zach.brown@ni.com>

v2:
* If ubi_io_is_bad eraseblk_count_seq_show just returns the err.
* if ubi->lookuptbl returns null, its no longer treated as an error
  instead info for that block is not printeded
* Removed check for UBI_MAX_ERASECOUNTER since it is impossible to hit
* Removed block state from print, if a block is printed then it is good and
  if it is not printed, then it is bad.

v3:
* Remove errant ! symbol from if statement checking if erase count is valid.
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/debug.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 125 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index f101a4985a7c..7bc96294ae4d 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -22,6 +22,7 @@
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/seq_file.h>
 
 
 /**
@@ -386,7 +387,9 @@ out:
 	return count;
 }
 
-/* File operations for all UBI debugfs files */
+/* File operations for all UBI debugfs files except
+ * detailed_erase_block_info
+ */
 static const struct file_operations dfs_fops = {
 	.read   = dfs_file_read,
 	.write  = dfs_file_write,
@@ -395,6 +398,121 @@ static const struct file_operations dfs_fops = {
 	.owner  = THIS_MODULE,
 };
 
+/* As long as the position is less then that total number of erase blocks,
+ * we still have more to print.
+ */
+static void *eraseblk_count_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct ubi_device *ubi = s->private;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	if (*pos < ubi->peb_count)
+		return pos;
+
+	return NULL;
+}
+
+/* Since we are using the position as the iterator, we just need to check if we
+ * are done and increment the position.
+ */
+static void *eraseblk_count_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct ubi_device *ubi = s->private;
+
+	if (v == SEQ_START_TOKEN)
+		return pos;
+	(*pos)++;
+
+	if (*pos < ubi->peb_count)
+		return pos;
+
+	return NULL;
+}
+
+static void eraseblk_count_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int eraseblk_count_seq_show(struct seq_file *s, void *iter)
+{
+	struct ubi_device *ubi = s->private;
+	struct ubi_wl_entry *wl;
+	int *block_number = iter;
+	int erase_count = -1;
+	int err;
+
+	/* If this is the start, print a header */
+	if (iter == SEQ_START_TOKEN) {
+		seq_puts(s,
+			 "physical_block_number\terase_count\tblock_status\tread_status\n");
+		return 0;
+	}
+
+	err = ubi_io_is_bad(ubi, *block_number);
+	if (err)
+		return err;
+
+	spin_lock(&ubi->wl_lock);
+
+	wl = ubi->lookuptbl[*block_number];
+	if (wl)
+		erase_count = wl->ec;
+
+	spin_unlock(&ubi->wl_lock);
+
+	if (erase_count < 0)
+		return 0;
+
+	seq_printf(s, "%-22d\t%-11d\n", *block_number, erase_count);
+
+	return 0;
+}
+
+static const struct seq_operations eraseblk_count_seq_ops = {
+	.start = eraseblk_count_seq_start,
+	.next = eraseblk_count_seq_next,
+	.stop = eraseblk_count_seq_stop,
+	.show = eraseblk_count_seq_show
+};
+
+static int eraseblk_count_open(struct inode *inode, struct file *f)
+{
+	struct seq_file *s;
+	int err;
+
+	err = seq_open(f, &eraseblk_count_seq_ops);
+	if (err)
+		return err;
+
+	s = f->private_data;
+	s->private = ubi_get_device((unsigned long)inode->i_private);
+
+	if (!s->private)
+		return -ENODEV;
+	else
+		return 0;
+}
+
+static int eraseblk_count_release(struct inode *inode, struct file *f)
+{
+	struct seq_file *s = f->private_data;
+	struct ubi_device *ubi = s->private;
+
+	ubi_put_device(ubi);
+
+	return seq_release(inode, f);
+}
+
+static const struct file_operations eraseblk_count_fops = {
+	.owner = THIS_MODULE,
+	.open = eraseblk_count_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = eraseblk_count_release,
+};
+
 /**
  * ubi_debugfs_init_dev - initialize debugfs for an UBI device.
  * @ubi: UBI device description object
@@ -491,6 +609,12 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi)
 		goto out_remove;
 	d->dfs_power_cut_max = dent;
 
+	fname = "detailed_erase_block_info";
+	dent = debugfs_create_file(fname, S_IRUSR, d->dfs_dir, (void *)ubi_num,
+				   &eraseblk_count_fops);
+	if (IS_ERR_OR_NULL(dent))
+		goto out_remove;
+
 	return 0;
 
 out_remove:
-- 
cgit v1.2.3


From d62844a825e87da345e11639e98deb617ef11e08 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 6 May 2017 11:17:06 +0800
Subject: bonding: check nla_put_be32 return value

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index c502c139d3bc..47a8103610bc 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -549,7 +549,8 @@ static int bond_fill_info(struct sk_buff *skb,
 	targets_added = 0;
 	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) {
 		if (bond->params.arp_targets[i]) {
-			nla_put_be32(skb, i, bond->params.arp_targets[i]);
+			if (nla_put_be32(skb, i, bond->params.arp_targets[i]))
+				goto nla_put_failure;
 			targets_added = 1;
 		}
 	}
-- 
cgit v1.2.3


From 8ce7aaaa9797f944ad2f50e5160c8a20a473aedf Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Sat, 6 May 2017 07:29:45 +0200
Subject: net: dsa: loop: Check for memory allocation failure

If 'devm_kzalloc' fails, a NULL pointer will be dereferenced.
Return -ENOMEM instead, as done for some other memory allocation just a
few lines above.

Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/dsa_loop.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index f0fc4de4fc9a..a19e1781e9bb 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -256,6 +256,9 @@ static int dsa_loop_drv_probe(struct mdio_device *mdiodev)
 		return -ENOMEM;
 
 	ps = devm_kzalloc(&mdiodev->dev, sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		return -ENOMEM;
+
 	ps->netdev = dev_get_by_name(&init_net, pdata->netdev);
 	if (!ps->netdev)
 		return -EPROBE_DEFER;
-- 
cgit v1.2.3


From 3bb4858fda1cac2ae677edb5134932af4c7f8ff8 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Sat, 6 May 2017 14:25:06 +0530
Subject: cxgb4: avoid disabling FEC by default

Recent Chelsio firmware started using few port capablity bits to
manage FEC and as driver was not aware of FEC changes those bits
were zeroed, consequently disabling FEC.

Avoid zeroing those bits and default to whatever the firmware
tells us the Link is currently advertising.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h    |  9 +++++++
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c    | 38 ++++++++++++++++++++++-----
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h |  6 ++---
 3 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 163543b1ea0b..862e0083420b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -108,6 +108,12 @@ enum {
 	PAUSE_AUTONEG = 1 << 2
 };
 
+enum {
+	FEC_AUTO      = 1 << 0,	 /* IEEE 802.3 "automatic" */
+	FEC_RS        = 1 << 1,  /* Reed-Solomon */
+	FEC_BASER_RS  = 1 << 2   /* BaseR/Reed-Solomon */
+};
+
 struct port_stats {
 	u64 tx_octets;            /* total # of octets in good frames */
 	u64 tx_frames;            /* all good frames */
@@ -432,6 +438,9 @@ struct link_config {
 	unsigned int   speed;            /* actual link speed */
 	unsigned char  requested_fc;     /* flow control user has requested */
 	unsigned char  fc;               /* actual link flow control */
+	unsigned char  auto_fec;	 /* Forward Error Correction: */
+	unsigned char  requested_fec;	 /* "automatic" (IEEE 802.3), */
+	unsigned char  fec;		 /* requested, and actual in use */
 	unsigned char  autoneg;          /* autonegotiating? */
 	unsigned char  link_ok;          /* link up? */
 	unsigned char  link_down_rc;     /* link down reason */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 0de8eb72325c..aded42b96f6d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -3707,7 +3707,8 @@ int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
 		  struct link_config *lc)
 {
 	struct fw_port_cmd c;
-	unsigned int fc = 0, mdi = FW_PORT_CAP_MDI_V(FW_PORT_CAP_MDI_AUTO);
+	unsigned int mdi = FW_PORT_CAP_MDI_V(FW_PORT_CAP_MDI_AUTO);
+	unsigned int fc = 0, fec = 0, fw_fec = 0;
 
 	lc->link_ok = 0;
 	if (lc->requested_fc & PAUSE_RX)
@@ -3715,6 +3716,13 @@ int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
 	if (lc->requested_fc & PAUSE_TX)
 		fc |= FW_PORT_CAP_FC_TX;
 
+	fec = lc->requested_fec & FEC_AUTO ? lc->auto_fec : lc->requested_fec;
+
+	if (fec & FEC_RS)
+		fw_fec |= FW_PORT_CAP_FEC_RS;
+	if (fec & FEC_BASER_RS)
+		fw_fec |= FW_PORT_CAP_FEC_BASER_RS;
+
 	memset(&c, 0, sizeof(c));
 	c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
 				     FW_CMD_REQUEST_F | FW_CMD_EXEC_F |
@@ -3725,13 +3733,15 @@ int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
 
 	if (!(lc->supported & FW_PORT_CAP_ANEG)) {
 		c.u.l1cfg.rcap = cpu_to_be32((lc->supported & ADVERT_MASK) |
-					     fc);
+					     fc | fw_fec);
 		lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
 	} else if (lc->autoneg == AUTONEG_DISABLE) {
-		c.u.l1cfg.rcap = cpu_to_be32(lc->requested_speed | fc | mdi);
+		c.u.l1cfg.rcap = cpu_to_be32(lc->requested_speed | fc |
+					     fw_fec | mdi);
 		lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
 	} else
-		c.u.l1cfg.rcap = cpu_to_be32(lc->advertising | fc | mdi);
+		c.u.l1cfg.rcap = cpu_to_be32(lc->advertising | fc |
+					     fw_fec | mdi);
 
 	return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
 }
@@ -7407,13 +7417,26 @@ static void get_pci_mode(struct adapter *adapter, struct pci_params *p)
  *	Initializes the SW state maintained for each link, including the link's
  *	capabilities and default speed/flow-control/autonegotiation settings.
  */
-static void init_link_config(struct link_config *lc, unsigned int caps)
+static void init_link_config(struct link_config *lc, unsigned int pcaps,
+			     unsigned int acaps)
 {
-	lc->supported = caps;
+	lc->supported = pcaps;
 	lc->lp_advertising = 0;
 	lc->requested_speed = 0;
 	lc->speed = 0;
 	lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
+	lc->auto_fec = 0;
+
+	/* For Forward Error Control, we default to whatever the Firmware
+	 * tells us the Link is currently advertising.
+	 */
+	if (acaps & FW_PORT_CAP_FEC_RS)
+		lc->auto_fec |= FEC_RS;
+	if (acaps & FW_PORT_CAP_FEC_BASER_RS)
+		lc->auto_fec |= FEC_BASER_RS;
+	lc->requested_fec = FEC_AUTO;
+	lc->fec = lc->auto_fec;
+
 	if (lc->supported & FW_PORT_CAP_ANEG) {
 		lc->advertising = lc->supported & ADVERT_MASK;
 		lc->autoneg = AUTONEG_ENABLE;
@@ -7991,7 +8014,8 @@ int t4_init_portinfo(struct port_info *pi, int mbox,
 	pi->port_type = FW_PORT_CMD_PTYPE_G(ret);
 	pi->mod_type = FW_PORT_MOD_TYPE_NA;
 
-	init_link_config(&pi->link_cfg, be16_to_cpu(c.u.info.pcap));
+	init_link_config(&pi->link_cfg, be16_to_cpu(c.u.info.pcap),
+			 be16_to_cpu(c.u.info.acap));
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 8f8c079d0d2b..251a35e9795c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -2263,9 +2263,9 @@ enum fw_port_cap {
 	FW_PORT_CAP_ANEG		= 0x0100,
 	FW_PORT_CAP_MDIX		= 0x0200,
 	FW_PORT_CAP_MDIAUTO		= 0x0400,
-	FW_PORT_CAP_FEC			= 0x0800,
-	FW_PORT_CAP_TECHKR		= 0x1000,
-	FW_PORT_CAP_TECHKX4		= 0x2000,
+	FW_PORT_CAP_FEC_RS		= 0x0800,
+	FW_PORT_CAP_FEC_BASER_RS	= 0x1000,
+	FW_PORT_CAP_FEC_RESERVED	= 0x2000,
 	FW_PORT_CAP_802_3_PAUSE		= 0x4000,
 	FW_PORT_CAP_802_3_ASM_DIR	= 0x8000,
 };
-- 
cgit v1.2.3


From 294316a4af16cd5c355a80bd2dc7ea803b88f6ba Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Sat, 6 May 2017 23:42:16 +0800
Subject: net/hippi/rrunner: use memdup_user

Use memdup_user() helper instead of open-coding to simplify the code.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hippi/rrunner.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index dd7fc6659ad4..365c4d987a01 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -1615,17 +1615,14 @@ static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 			return -EPERM;
 		}
 
-		image = kmalloc(EEPROM_WORDS * sizeof(u32), GFP_KERNEL);
-		oldimage = kmalloc(EEPROM_WORDS * sizeof(u32), GFP_KERNEL);
-		if (!image || !oldimage) {
-			error = -ENOMEM;
-			goto wf_out;
-		}
+		image = memdup_user(rq->ifr_data, EEPROM_BYTES);
+		if (IS_ERR(image))
+			return PTR_ERR(image);
 
-		error = copy_from_user(image, rq->ifr_data, EEPROM_BYTES);
-		if (error) {
-			error = -EFAULT;
-			goto wf_out;
+		oldimage = kmalloc(EEPROM_BYTES, GFP_KERNEL);
+		if (!oldimage) {
+			kfree(image);
+			return -ENOMEM;
 		}
 
 		if (rrpriv->fw_running){
-- 
cgit v1.2.3


From 871ff2ebe0af0d7f1050bfc8bce62fddea2b12ec Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Sat, 6 May 2017 23:42:22 +0800
Subject: yam: use memdup_user

Use memdup_user() helper instead of open-coding to simplify the code.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/yam.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index b6891ada1d7b..7a7c5224a336 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -976,12 +976,10 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	case SIOCYAMSMCS:
 		if (netif_running(dev))
 			return -EINVAL;		/* Cannot change this parameter when up */
-		if ((ym = kmalloc(sizeof(struct yamdrv_ioctl_mcs), GFP_KERNEL)) == NULL)
-			return -ENOBUFS;
-		if (copy_from_user(ym, ifr->ifr_data, sizeof(struct yamdrv_ioctl_mcs))) {
-			kfree(ym);
-			return -EFAULT;
-		}
+		ym = memdup_user(ifr->ifr_data,
+				 sizeof(struct yamdrv_ioctl_mcs));
+		if (IS_ERR(ym))
+			return PTR_ERR(ym);
 		if (ym->bitrate > YAM_MAXBITRATE) {
 			kfree(ym);
 			return -EINVAL;
-- 
cgit v1.2.3


From 0d0e57697f162da4aa218b5feafe614fb666db07 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 8 May 2017 00:04:09 +0200
Subject: bpf: don't let ldimm64 leak map addresses on unprivileged

The patch fixes two things at once:

1) It checks the env->allow_ptr_leaks and only prints the map address to
   the log if we have the privileges to do so, otherwise it just dumps 0
   as we would when kptr_restrict is enabled on %pK. Given the latter is
   off by default and not every distro sets it, I don't want to rely on
   this, hence the 0 by default for unprivileged.

2) Printing of ldimm64 in the verifier log is currently broken in that
   we don't print the full immediate, but only the 32 bit part of the
   first insn part for ldimm64. Thus, fix this up as well; it's okay to
   access, since we verified all ldimm64 earlier already (including just
   constants) through replace_map_fd_with_map_ptr().

Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs")
Fixes: cbd357008604 ("bpf: verifier (add ability to receive verification log)")
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c2ff608c1984..c5b56c92f8e2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -298,7 +298,8 @@ static const char *const bpf_jmp_string[16] = {
 	[BPF_EXIT >> 4] = "exit",
 };
 
-static void print_bpf_insn(struct bpf_insn *insn)
+static void print_bpf_insn(const struct bpf_verifier_env *env,
+			   const struct bpf_insn *insn)
 {
 	u8 class = BPF_CLASS(insn->code);
 
@@ -362,9 +363,19 @@ static void print_bpf_insn(struct bpf_insn *insn)
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->src_reg, insn->imm);
-		} else if (BPF_MODE(insn->code) == BPF_IMM) {
-			verbose("(%02x) r%d = 0x%x\n",
-				insn->code, insn->dst_reg, insn->imm);
+		} else if (BPF_MODE(insn->code) == BPF_IMM &&
+			   BPF_SIZE(insn->code) == BPF_DW) {
+			/* At this point, we already made sure that the second
+			 * part of the ldimm64 insn is accessible.
+			 */
+			u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+			bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
+
+			if (map_ptr && !env->allow_ptr_leaks)
+				imm = 0;
+
+			verbose("(%02x) r%d = 0x%llx\n", insn->code,
+				insn->dst_reg, (unsigned long long)imm);
 		} else {
 			verbose("BUG_ld_%02x\n", insn->code);
 			return;
@@ -2853,7 +2864,7 @@ static int do_check(struct bpf_verifier_env *env)
 
 		if (log_level) {
 			verbose("%d: ", insn_idx);
-			print_bpf_insn(insn);
+			print_bpf_insn(env, insn);
 		}
 
 		err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
-- 
cgit v1.2.3


From 8ed508fd4b84a38db63ddeee8ab6905f06cfa589 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 8 May 2017 17:57:13 +0800
Subject: vti: check nla_put_* return value

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_vti.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 40977413fd48..4ec9affb2252 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -546,12 +546,13 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	struct ip_tunnel *t = netdev_priv(dev);
 	struct ip_tunnel_parm *p = &t->parms;
 
-	nla_put_u32(skb, IFLA_VTI_LINK, p->link);
-	nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
-	nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
-	nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr);
-	nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr);
-	nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark);
+	if (nla_put_u32(skb, IFLA_VTI_LINK, p->link) ||
+	    nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key) ||
+	    nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key) ||
+	    nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr) ||
+	    nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr) ||
+	    nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark))
+		return -EMSGSIZE;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 05c5d0041972d3f3c6d881e94aa351f89a62eb2a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 May 2017 17:14:19 +0300
Subject: stmmac: pci: set default number of rx and tx queues

The commit 26d6851fd24e

	("net: stmmac: set default number of rx and tx queues in stmmac_pci")

missed Intel Quark configuration. Append it here.

Fixes: 26d6851fd24e ("net: stmmac: set default number of rx and tx queues in stmmac_pci")
Cc: Joao Pinto <Joao.Pinto@synopsys.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Joao Pinto <jpinto@synopsys.com>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 39be96779145..ae3e836f9bb6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -145,6 +145,10 @@ static int quark_default_data(struct plat_stmmacenet_data *plat,
 	/* Set the maxmtu to a default of JUMBO_LEN */
 	plat->maxmtu = JUMBO_LEN;
 
+	/* Set default number of RX and TX queues to use */
+	plat->tx_queues_to_use = 1;
+	plat->rx_queues_to_use = 1;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From a1437e57af1c07ef01b921c4a1ab8efd5134b597 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 May 2017 17:14:20 +0300
Subject: stmmac: pci: TX and RX queue priority configuration

The commit a8f5102af2a7

	("net: stmmac: TX and RX queue priority configuration")

missed Intel Quark configuration. Append it here.

Fixes: a8f5102af2a7 ("net: stmmac: TX and RX queue priority configuration")
Cc: Joao Pinto <Joao.Pinto@synopsys.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Joao Pinto <jpinto@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index ae3e836f9bb6..c015a715a8ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -149,6 +149,10 @@ static int quark_default_data(struct plat_stmmacenet_data *plat,
 	plat->tx_queues_to_use = 1;
 	plat->rx_queues_to_use = 1;
 
+	/* Disable Priority config by default */
+	plat->tx_queues_cfg[0].use_prio = false;
+	plat->rx_queues_cfg[0].use_prio = false;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From efcd24147fa03817485dab9ff9d5542a0f28684b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 May 2017 17:14:21 +0300
Subject: stmmac: pci: RX queue routing configuration

The commit abe80fdc6ee6

    ("net: stmmac: RX queue routing configuration")

missed Intel Quark configuration. Append it here.

Fixes: abe80fdc6ee6 ("net: stmmac: RX queue routing configuration")
Cc: Joao Pinto <Joao.Pinto@synopsys.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Joao Pinto <jpinto@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index c015a715a8ac..2456e0a945ef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -153,6 +153,9 @@ static int quark_default_data(struct plat_stmmacenet_data *plat,
 	plat->tx_queues_cfg[0].use_prio = false;
 	plat->rx_queues_cfg[0].use_prio = false;
 
+	/* Disable RX queues routing by default */
+	plat->rx_queues_cfg[0].pkt_route = 0x0;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 70fe4432bb360207fba04b4137f330a7a109f7a6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 May 2017 17:14:22 +0300
Subject: stmmac: pci: split out common_default_data() helper

New helper is added in order to prevent misconfiguration happened
for one of the platforms when configuration data is expanded.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Joao Pinto <jpinto@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 52 ++++++++----------------
 1 file changed, 18 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 2456e0a945ef..22f910795be4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -70,11 +70,8 @@ static int stmmac_pci_find_phy_addr(struct stmmac_pci_info *info)
 	return -ENODEV;
 }
 
-static void stmmac_default_data(struct plat_stmmacenet_data *plat)
+static void common_default_data(struct plat_stmmacenet_data *plat)
 {
-	plat->bus_id = 1;
-	plat->phy_addr = 0;
-	plat->interface = PHY_INTERFACE_MODE_GMII;
 	plat->clk_csr = 2;	/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
 	plat->has_gmac = 1;
 	plat->force_sf_dma_mode = 1;
@@ -82,10 +79,6 @@ static void stmmac_default_data(struct plat_stmmacenet_data *plat)
 	plat->mdio_bus_data->phy_reset = NULL;
 	plat->mdio_bus_data->phy_mask = 0;
 
-	plat->dma_cfg->pbl = 32;
-	plat->dma_cfg->pblx8 = true;
-	/* TODO: AXI */
-
 	/* Set default value for multicast hash bins */
 	plat->multicast_filter_bins = HASH_TABLE_SIZE;
 
@@ -107,12 +100,29 @@ static void stmmac_default_data(struct plat_stmmacenet_data *plat)
 	plat->rx_queues_cfg[0].pkt_route = 0x0;
 }
 
+static void stmmac_default_data(struct plat_stmmacenet_data *plat)
+{
+	/* Set common default data first */
+	common_default_data(plat);
+
+	plat->bus_id = 1;
+	plat->phy_addr = 0;
+	plat->interface = PHY_INTERFACE_MODE_GMII;
+
+	plat->dma_cfg->pbl = 32;
+	plat->dma_cfg->pblx8 = true;
+	/* TODO: AXI */
+}
+
 static int quark_default_data(struct plat_stmmacenet_data *plat,
 			      struct stmmac_pci_info *info)
 {
 	struct pci_dev *pdev = info->pdev;
 	int ret;
 
+	/* Set common default data first */
+	common_default_data(plat);
+
 	/*
 	 * Refuse to load the driver and register net device if MAC controller
 	 * does not connect to any PHY interface.
@@ -124,38 +134,12 @@ static int quark_default_data(struct plat_stmmacenet_data *plat,
 	plat->bus_id = PCI_DEVID(pdev->bus->number, pdev->devfn);
 	plat->phy_addr = ret;
 	plat->interface = PHY_INTERFACE_MODE_RMII;
-	plat->clk_csr = 2;
-	plat->has_gmac = 1;
-	plat->force_sf_dma_mode = 1;
-
-	plat->mdio_bus_data->phy_reset = NULL;
-	plat->mdio_bus_data->phy_mask = 0;
 
 	plat->dma_cfg->pbl = 16;
 	plat->dma_cfg->pblx8 = true;
 	plat->dma_cfg->fixed_burst = 1;
 	/* AXI (TODO) */
 
-	/* Set default value for multicast hash bins */
-	plat->multicast_filter_bins = HASH_TABLE_SIZE;
-
-	/* Set default value for unicast filter entries */
-	plat->unicast_filter_entries = 1;
-
-	/* Set the maxmtu to a default of JUMBO_LEN */
-	plat->maxmtu = JUMBO_LEN;
-
-	/* Set default number of RX and TX queues to use */
-	plat->tx_queues_to_use = 1;
-	plat->rx_queues_to_use = 1;
-
-	/* Disable Priority config by default */
-	plat->tx_queues_cfg[0].use_prio = false;
-	plat->rx_queues_cfg[0].use_prio = false;
-
-	/* Disable RX queues routing by default */
-	plat->rx_queues_cfg[0].pkt_route = 0x0;
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From aeca3a77b1e0ed06a095933b89c86aed007383eb Mon Sep 17 00:00:00 2001
From: Jim Baxter <jim_baxter@mentor.com>
Date: Mon, 8 May 2017 13:49:57 +0100
Subject: net: cdc_ncm: Fix TX zero padding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The zero padding that is added to NTB's does
not zero the memory correctly.
This is because the skb_put modifies the value
of skb_out->len which results in the memset
command not setting any memory to zero as
(ctx->tx_max - skb_out->len) == 0.

I have resolved this by storing the size of
the memory to be zeroed before the skb_put
and using this in the memset call.

Signed-off-by: Jim Baxter <jim_baxter@mentor.com>
Reviewed-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index bb3f71f9fbde..b5cec1824a78 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1088,6 +1088,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	u16 n = 0, index, ndplen;
 	u8 ready2send = 0;
 	u32 delayed_ndp_size;
+	size_t padding_count;
 
 	/* When our NDP gets written in cdc_ncm_ndp(), then skb_out->len gets updated
 	 * accordingly. Otherwise, we should check here.
@@ -1244,11 +1245,13 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	 * a ZLP after full sized NTBs.
 	 */
 	if (!(dev->driver_info->flags & FLAG_SEND_ZLP) &&
-	    skb_out->len > ctx->min_tx_pkt)
-		memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0,
-		       ctx->tx_max - skb_out->len);
-	else if (skb_out->len < ctx->tx_max && (skb_out->len % dev->maxpacket) == 0)
+	    skb_out->len > ctx->min_tx_pkt) {
+		padding_count = ctx->tx_max - skb_out->len;
+		memset(skb_put(skb_out, padding_count), 0, padding_count);
+	} else if (skb_out->len < ctx->tx_max &&
+		   (skb_out->len % dev->maxpacket) == 0) {
 		*skb_put(skb_out, 1) = 0;	/* force short packet */
+	}
 
 	/* set final frame length */
 	nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data;
-- 
cgit v1.2.3


From 242d3a49a2a1a71d8eb9f953db1bcaa9d698ce00 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 8 May 2017 10:12:13 -0700
Subject: ipv6: reorder ip6_route_dev_notifier after ipv6_dev_notf

For each netns (except init_net), we initialize its null entry
in 3 places:

1) The template itself, as we use kmemdup()
2) Code around dst_init_metrics() in ip6_route_net_init()
3) ip6_route_dev_notify(), which is supposed to initialize it after
   loopback registers

Unfortunately the last one still happens in a wrong order because
we expect to initialize net->ipv6.ip6_null_entry->rt6i_idev to
net->loopback_dev's idev, thus we have to do that after we add
idev to loopback. However, this notifier has priority == 0 same as
ipv6_dev_notf, and ipv6_dev_notf is registered after
ip6_route_dev_notifier so it is called actually after
ip6_route_dev_notifier. This is similar to commit 2f460933f58e
("ipv6: initialize route null entry in addrconf_init()") which
fixes init_net.

Fix it by picking a smaller priority for ip6_route_dev_notifier.
Also, we have to release the refcnt accordingly when unregistering
loopback_dev because device exit functions are called before subsys
exit functions.

Acked-by: David Ahern <dsahern@gmail.com>
Tested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h |  2 ++
 net/ipv6/addrconf.c    |  1 +
 net/ipv6/route.c       | 13 +++++++++++--
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 2452e6449532..b43a4eec3cec 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -20,6 +20,8 @@
 #define ADDRCONF_TIMER_FUZZ		(HZ / 4)
 #define ADDRCONF_TIMER_FUZZ_MAX		(HZ)
 
+#define ADDRCONF_NOTIFY_PRIORITY	0
+
 #include <linux/in.h>
 #include <linux/in6.h>
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 77a4bd526d6e..8d297a79b568 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3548,6 +3548,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
  */
 static struct notifier_block ipv6_dev_notf = {
 	.notifier_call = addrconf_notify,
+	.priority = ADDRCONF_NOTIFY_PRIORITY,
 };
 
 static void addrconf_type_change(struct net_device *dev, unsigned long event)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2f1136627dcb..dc61b0b5e64e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3709,7 +3709,10 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct net *net = dev_net(dev);
 
-	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
+	if (!(dev->flags & IFF_LOOPBACK))
+		return NOTIFY_OK;
+
+	if (event == NETDEV_REGISTER) {
 		net->ipv6.ip6_null_entry->dst.dev = dev;
 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -3717,6 +3720,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
+#endif
+	 } else if (event == NETDEV_UNREGISTER) {
+		in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+		in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
+		in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
 #endif
 	}
 
@@ -4024,7 +4033,7 @@ static struct pernet_operations ip6_route_net_late_ops = {
 
 static struct notifier_block ip6_route_dev_notifier = {
 	.notifier_call = ip6_route_dev_notify,
-	.priority = 0,
+	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
 };
 
 void __init ip6_route_init_special_entries(void)
-- 
cgit v1.2.3


From 48f5bccc60675f8426a6159935e8636a1fd89f56 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Mon, 8 May 2017 14:21:21 -0500
Subject: net: ethernet: ti: cpsw: adjust cpsw fifos depth for fullduplex flow
 control

When users set flow control using ethtool the bits are set properly in the
CPGMAC_SL MACCONTROL register, but the FIFO depth in the respective Port n
Maximum FIFO Blocks (Pn_MAX_BLKS) registers remains set to the minimum size
reset value. When receive flow control is enabled on a port, the port's
associated FIFO block allocation must be adjusted. The port RX allocation
must increase to accommodate the flow control runout. The TRM recommends
numbers of 5 or 6.

Hence, apply required Port FIFO configuration to
Pn_MAX_BLKS.Pn_TX_MAX_BLKS=0xF and Pn_MAX_BLKS.Pn_RX_MAX_BLKS=0x5 during
interface initialization.

Cc: Schuyler Patton <spatton@ti.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index fa674a8bda0c..f4d7aec50479 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -287,6 +287,10 @@ struct cpsw_ss_regs {
 /* Bit definitions for the CPSW1_TS_SEQ_LTYPE register */
 #define CPSW_V1_SEQ_ID_OFS_SHIFT	16
 
+#define CPSW_MAX_BLKS_TX		15
+#define CPSW_MAX_BLKS_TX_SHIFT		4
+#define CPSW_MAX_BLKS_RX		5
+
 struct cpsw_host_regs {
 	u32	max_blks;
 	u32	blk_cnt;
@@ -1278,11 +1282,23 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 	switch (cpsw->version) {
 	case CPSW_VERSION_1:
 		slave_write(slave, TX_PRIORITY_MAPPING, CPSW1_TX_PRI_MAP);
+		/* Increase RX FIFO size to 5 for supporting fullduplex
+		 * flow control mode
+		 */
+		slave_write(slave,
+			    (CPSW_MAX_BLKS_TX << CPSW_MAX_BLKS_TX_SHIFT) |
+			    CPSW_MAX_BLKS_RX, CPSW1_MAX_BLKS);
 		break;
 	case CPSW_VERSION_2:
 	case CPSW_VERSION_3:
 	case CPSW_VERSION_4:
 		slave_write(slave, TX_PRIORITY_MAPPING, CPSW2_TX_PRI_MAP);
+		/* Increase RX FIFO size to 5 for supporting fullduplex
+		 * flow control mode
+		 */
+		slave_write(slave,
+			    (CPSW_MAX_BLKS_TX << CPSW_MAX_BLKS_TX_SHIFT) |
+			    CPSW_MAX_BLKS_RX, CPSW2_MAX_BLKS);
 		break;
 	}
 
-- 
cgit v1.2.3


From 1a1df90ae195d9c676b6f313375f3e8f13752e13 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Sun, 9 Apr 2017 09:41:32 +0800
Subject: ide: use setup_timer

Use setup_timer() instead of init_timer() to simplify the code.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-probe.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index a74ae8df4bb8..023562565d11 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1183,9 +1183,7 @@ static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
 
 	spin_lock_init(&hwif->lock);
 
-	init_timer(&hwif->timer);
-	hwif->timer.function = &ide_timer_expiry;
-	hwif->timer.data = (unsigned long)hwif;
+	setup_timer(&hwif->timer, &ide_timer_expiry, (unsigned long)hwif);
 
 	init_completion(&hwif->gendev_rel_comp);
 
-- 
cgit v1.2.3


From acfead32f3f938e456d7d5e1a82c743d30b8b886 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 14 Apr 2017 14:35:33 -0400
Subject: ide: don't call memcpy with the same source and destination

The parisc architecture recently reimplemented the memcpy function and
their reimplementation crashed when source and destination overlapped.

The crash happened in the function ide_complete_cmd where memcpy is called
with the same source and destination pointer. According to the C
specification, memcpy behavior is undefined if the source and destination
range overlaps. This patches fixes the undefined behavior.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 45b3f41a43d4..323af721f8cb 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -107,7 +107,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
 
 		if (cmd->tf_flags & IDE_TFLAG_DYN)
 			kfree(orig_cmd);
-		else
+		else if (cmd != orig_cmd)
 			memcpy(orig_cmd, cmd, sizeof(*cmd));
 	}
 }
-- 
cgit v1.2.3


From 922c60e89d52730050c6ccca218bff40cc8bcd8e Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Mon, 8 May 2017 17:48:35 -0400
Subject: net: mdio-mux: bcm-iproc: call mdiobus_free() in error path

If an error is encountered in mdio_mux_init(), the error path will call
mdiobus_free().  Since mdiobus_register() has been called prior to
mdio_mux_init(), the bus->state will not be MDIOBUS_UNREGISTERED.  This
causes a BUG_ON() in mdiobus_free().  To correct this issue, add an
error path for mdio_mux_init() which calls mdiobus_unregister() prior to
mdiobus_free().

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: 98bc865a1ec8 ("net: mdio-mux: Add MDIO mux driver for iProc SoCs")
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mux-bcm-iproc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c
index 0a0412524cec..0a5f62e0efcc 100644
--- a/drivers/net/phy/mdio-mux-bcm-iproc.c
+++ b/drivers/net/phy/mdio-mux-bcm-iproc.c
@@ -203,11 +203,14 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev)
 			   &md->mux_handle, md, md->mii_bus);
 	if (rc) {
 		dev_info(md->dev, "mdiomux initialization failed\n");
-		goto out;
+		goto out_register;
 	}
 
 	dev_info(md->dev, "iProc mdiomux registered\n");
 	return 0;
+
+out_register:
+	mdiobus_unregister(bus);
 out:
 	mdiobus_free(bus);
 	return rc;
-- 
cgit v1.2.3


From efda760fe95ea15291853c8fa9235c32d319cd98 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 28 Mar 2017 21:25:08 -0400
Subject: lockd: fix lockd shutdown race

As reported by David Jeffery: "a signal was sent to lockd while lockd
was shutting down from a request to stop nfs.  The signal causes lockd
to call restart_grace() which puts the lockd_net structure on the grace
list.  If this signal is received at the wrong time, it will occur after
lockd_down_net() has called locks_end_grace() but before
lockd_down_net() stops the lockd thread.  This leads to lockd putting
the lockd_net structure back on the grace list, then exiting without
anything removing it from the list."

So, perform the final locks_end_grace() from the the lockd thread; this
ensures it's serialized with respect to restart_grace().

Reported-by: David Jeffery <djeffery@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index e7c8b9c76e48..5d481e8a1b5d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -132,6 +132,8 @@ lockd(void *vrqstp)
 {
 	int		err = 0;
 	struct svc_rqst *rqstp = vrqstp;
+	struct net *net = &init_net;
+	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
 	/* try_to_freeze() is called from svc_recv() */
 	set_freezable();
@@ -176,6 +178,8 @@ lockd(void *vrqstp)
 	if (nlmsvc_ops)
 		nlmsvc_invalidate_all();
 	nlm_shutdown_hosts();
+	cancel_delayed_work_sync(&ln->grace_period_end);
+	locks_end_grace(&ln->lockd_manager);
 	return 0;
 }
 
@@ -270,8 +274,6 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
 	if (ln->nlmsvc_users) {
 		if (--ln->nlmsvc_users == 0) {
 			nlm_shutdown_hosts_net(net);
-			cancel_delayed_work_sync(&ln->grace_period_end);
-			locks_end_grace(&ln->lockd_manager);
 			svc_shutdown_net(serv, net);
 			dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net);
 		}
-- 
cgit v1.2.3


From e092693443b995c8e3a565a73b5fdb05f1260f9b Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 8 May 2017 18:02:24 -0400
Subject: NFS append COMMIT after synchronous COPY

Instead of messing with the commit path which has been causing issues,
add a COMMIT op after the COPY and ask for stable copies in the first
space.

It saves a round trip, since after the COPY, the client sends a COMMIT
anyway.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h       |  1 -
 fs/nfs/nfs42proc.c      | 21 +++++++++++++++------
 fs/nfs/nfs42xdr.c       | 22 ++++++++++++++++++++--
 fs/nfs/write.c          | 30 ------------------------------
 include/linux/nfs_xdr.h |  1 +
 5 files changed, 36 insertions(+), 39 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 31b26cf1b476..e9b4c3320e37 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -495,7 +495,6 @@ void nfs_mark_request_commit(struct nfs_page *req,
 			     u32 ds_commit_idx);
 int nfs_write_need_commit(struct nfs_pgio_header *);
 void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
-int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf);
 int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
 			    int how, struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 87f5b7b971ca..929d09a5310a 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -167,23 +167,29 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 	if (status)
 		return status;
 
+	res->commit_res.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
+	if (!res->commit_res.verf)
+		return -ENOMEM;
 	status = nfs4_call_sync(server->client, server, &msg,
 				&args->seq_args, &res->seq_res, 0);
 	if (status == -ENOTSUPP)
 		server->caps &= ~NFS_CAP_COPY;
 	if (status)
-		return status;
+		goto out;
 
-	if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
-		status = nfs_commit_file(dst, &res->write_res.verifier.verifier);
-		if (status)
-			return status;
+	if (!nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
+				    &res->commit_res.verf->verifier)) {
+		status = -EAGAIN;
+		goto out;
 	}
 
 	truncate_pagecache_range(dst_inode, pos_dst,
 				 pos_dst + res->write_res.count);
 
-	return res->write_res.count;
+	status = res->write_res.count;
+out:
+	kfree(res->commit_res.verf);
+	return status;
 }
 
 ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
@@ -240,6 +246,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 		if (err == -ENOTSUPP) {
 			err = -EOPNOTSUPP;
 			break;
+		} if (err == -EAGAIN) {
+			dst_exception.retry = 1;
+			continue;
 		}
 
 		err2 = nfs4_handle_exception(server, err, &src_exception);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 6c7296454bbc..528362f69cc1 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -66,12 +66,14 @@
 					 encode_putfh_maxsz + \
 					 encode_savefh_maxsz + \
 					 encode_putfh_maxsz + \
-					 encode_copy_maxsz)
+					 encode_copy_maxsz + \
+					 encode_commit_maxsz)
 #define NFS4_dec_copy_sz		(compound_decode_hdr_maxsz + \
 					 decode_putfh_maxsz + \
 					 decode_savefh_maxsz + \
 					 decode_putfh_maxsz + \
-					 decode_copy_maxsz)
+					 decode_copy_maxsz + \
+					 decode_commit_maxsz)
 #define NFS4_enc_deallocate_sz		(compound_encode_hdr_maxsz + \
 					 encode_putfh_maxsz + \
 					 encode_deallocate_maxsz + \
@@ -222,6 +224,18 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+static void encode_copy_commit(struct xdr_stream *xdr,
+			  struct nfs42_copy_args *args,
+			  struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
+	p = reserve_space(xdr, 12);
+	p = xdr_encode_hyper(p, args->dst_pos);
+	*p = cpu_to_be32(args->count);
+}
+
 /*
  * Encode COPY request
  */
@@ -239,6 +253,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
 	encode_savefh(xdr, &hdr);
 	encode_putfh(xdr, args->dst_fh, &hdr);
 	encode_copy(xdr, args, &hdr);
+	encode_copy_commit(xdr, args, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -481,6 +496,9 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
 	if (status)
 		goto out;
 	status = decode_copy(xdr, res);
+	if (status)
+		goto out;
+	status = decode_commit(xdr, &res->commit_res);
 out:
 	return status;
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 59e21cc0a266..85bfa416fcc8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1742,36 +1742,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
 				   data->mds_ops, how, 0);
 }
 
-int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf)
-{
-	struct inode *inode = file_inode(file);
-	struct nfs_open_context *open;
-	struct nfs_commit_info cinfo;
-	struct nfs_page *req;
-	int ret;
-
-	open = get_nfs_open_context(nfs_file_open_context(file));
-	req  = nfs_create_request(open, NULL, NULL, 0, i_size_read(inode));
-	if (IS_ERR(req)) {
-		ret = PTR_ERR(req);
-		goto out_put;
-	}
-
-	nfs_init_cinfo_from_inode(&cinfo, inode);
-
-	memcpy(&req->wb_verf, verf, sizeof(struct nfs_write_verifier));
-	nfs_request_add_commit_list(req, &cinfo);
-	ret = nfs_commit_inode(inode, FLUSH_SYNC);
-	if (ret > 0)
-		ret = 0;
-
-	nfs_free_request(req);
-out_put:
-	put_nfs_open_context(open);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nfs_commit_file);
-
 /*
  * COMMIT call returned
  */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 677c6b91dfcd..b28c83475ee8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1383,6 +1383,7 @@ struct nfs42_copy_res {
 	struct nfs42_write_res		write_res;
 	bool				consecutive;
 	bool				synchronous;
+	struct nfs_commitres		commit_res;
 };
 
 struct nfs42_seek_args {
-- 
cgit v1.2.3


From 4dbbe2d8e95c351157f292ece067f985c30c7b53 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Fri, 21 Apr 2017 16:41:10 -0700
Subject: tracing: Use cpumask_available() to check if cpumask variable may be
 used

This fixes the following clang warning:

kernel/trace/trace.c:3231:12: warning: address of array 'iter->started'
  will always evaluate to 'true' [-Wpointer-bool-conversion]
        if (iter->started)

Link: http://lkml.kernel.org/r/20170421234110.117075-1-mka@chromium.org

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4ad4420b33d6..c4536c449021 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3311,13 +3311,14 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
 		return;
 
-	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
+	if (cpumask_available(iter->started) &&
+	    cpumask_test_cpu(iter->cpu, iter->started))
 		return;
 
 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
 		return;
 
-	if (iter->started)
+	if (cpumask_available(iter->started))
 		cpumask_set_cpu(iter->cpu, iter->started);
 
 	/* Don't print started cpu buffer for the first entry of the trace */
-- 
cgit v1.2.3


From 32f1bc0f3d262125169f9212aac306c638e34b54 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 8 May 2017 22:35:32 -0400
Subject: Revert "ipv4: restore rt->fi for reference counting"

This reverts commit 82486aa6f1b9bc8145e6d0fa2bc0b44307f3b875.

As implemented, this causes dangling netdevice refs.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h |  1 -
 net/ipv4/route.c    | 18 +-----------------
 2 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index 4335eb72a04c..2cc0e14c6359 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -69,7 +69,6 @@ struct rtable {
 
 	struct list_head	rt_uncached;
 	struct uncached_list	*rt_uncached_list;
-	struct fib_info		*fi; /* for refcnt to shared metrics */
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f647310f8e4d..655d9eebe43e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1387,11 +1387,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 {
 	struct rtable *rt = (struct rtable *) dst;
 
-	if (rt->fi) {
-		fib_info_put(rt->fi);
-		rt->fi = NULL;
-	}
-
 	if (!list_empty(&rt->rt_uncached)) {
 		struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1429,16 +1424,6 @@ static bool rt_cache_valid(const struct rtable *rt)
 		!rt_is_expired(rt);
 }
 
-static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
-{
-	if (fi->fib_metrics != (u32 *)dst_default_metrics) {
-		fib_info_hold(fi);
-		rt->fi = fi;
-	}
-
-	dst_init_metrics(&rt->dst, fi->fib_metrics, true);
-}
-
 static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			   const struct fib_result *res,
 			   struct fib_nh_exception *fnhe,
@@ -1453,7 +1438,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			rt->rt_gateway = nh->nh_gw;
 			rt->rt_uses_gateway = 1;
 		}
-		rt_init_metrics(rt, fi);
+		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
 #endif
@@ -1505,7 +1490,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_gateway = 0;
 		rt->rt_uses_gateway = 0;
 		rt->rt_table_id = 0;
-		rt->fi = NULL;
 		INIT_LIST_HEAD(&rt->rt_uncached);
 
 		rt->dst.output = ip_output;
-- 
cgit v1.2.3


From 861ce4a3244c21b0af64f880d5bfe5e6e2fb9e4a Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Mon, 8 May 2017 14:23:16 -0700
Subject: x86/mm/32: Set the '__vmalloc_start_set' flag in initmem_init()

'__vmalloc_start_set' currently only gets set in initmem_init() when
!CONFIG_NEED_MULTIPLE_NODES. This breaks detection of vmalloc address
with virt_addr_valid() with CONFIG_NEED_MULTIPLE_NODES=y, causing
a kernel crash:

  [mm/usercopy] 517e1fbeb6: kernel BUG at arch/x86/mm/physaddr.c:78!

Set '__vmalloc_start_set' appropriately for that case as well.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: dc16ecf7fd1f ("x86-32: use specific __vmalloc_start_set flag in __virt_addr_valid")
Link: http://lkml.kernel.org/r/1494278596-30373-1-git-send-email-labbott@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/numa_32.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 6b7ce6279133..aca6295350f3 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -100,5 +100,6 @@ void __init initmem_init(void)
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
 
+	__vmalloc_start_set = true;
 	setup_bootmem_allocator();
 }
-- 
cgit v1.2.3


From d2b6dc61a8dd3c429609b993778cb54e75a5c5f0 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Mon, 8 May 2017 17:09:10 -0700
Subject: x86/boot/32: Fix UP boot on Quark and possibly other platforms

This partially reverts commit:

  23b2a4ddebdd17f ("x86/boot/32: Defer resyncing initial_page_table until per-cpu is set up")

That commit had one definite bug and one potential bug.  The
definite bug is that setup_per_cpu_areas() uses a differnet generic
implementation on UP kernels, so initial_page_table never got
resynced.  This was fine for access to percpu data (it's in the
identity map on UP), but it breaks other users of
initial_page_table.  The potential bug is that helpers like
efi_init() would be called before the tables were synced.

Avoid both problems by just syncing the page tables in setup_arch()
*and* setup_per_cpu_areas().

Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/setup.c        | 15 +++++++++++++++
 arch/x86/kernel/setup_percpu.c | 10 +++++-----
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 603a1669a2ec..0b4d3c686b1e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1225,6 +1225,21 @@ void __init setup_arch(char **cmdline_p)
 
 	kasan_init();
 
+#ifdef CONFIG_X86_32
+	/* sync back kernel address range */
+	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+
+	/*
+	 * sync back low identity map too.  It is used for example
+	 * in the 32-bit EFI stub.
+	 */
+	clone_pgd_range(initial_page_table,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+#endif
+
 	tboot_probe();
 
 	map_vsyscall();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index bb1e8cc0bc84..10edd1e69a68 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -291,11 +291,11 @@ void __init setup_per_cpu_areas(void)
 
 #ifdef CONFIG_X86_32
 	/*
-	 * Sync back kernel address range.  We want to make sure that
-	 * all kernel mappings, including percpu mappings, are available
-	 * in the smpboot asm.  We can't reliably pick up percpu
-	 * mappings using vmalloc_fault(), because exception dispatch
-	 * needs percpu data.
+	 * Sync back kernel address range again.  We already did this in
+	 * setup_arch(), but percpu data also needs to be available in
+	 * the smpboot asm.  We can't reliably pick up percpu mappings
+	 * using vmalloc_fault(), because exception dispatch needs
+	 * percpu data.
 	 */
 	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
 			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-- 
cgit v1.2.3


From 4a1bec4605838fd7872ec41677585e241e256785 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <nick.desaulniers@gmail.com>
Date: Mon, 8 May 2017 20:29:46 -0700
Subject: x86/build: Don't add -maccumulate-outgoing-args w/o compiler support

Clang does not support this machine dependent option.

Older versions of GCC (pre 3.0) may not support this option, added in
2000, but it's unlikely they can still compile a working kernel.

Signed-off-by: Nick Desaulniers <nick.desaulniers@gmail.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170509032946.20444-1-nick.desaulniers@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4430dd489620..5851411e60fb 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -179,7 +179,8 @@ ifdef CONFIG_JUMP_LABEL
 endif
 
 ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
-	KBUILD_CFLAGS += -maccumulate-outgoing-args
+	# This compiler flag is not supported by Clang:
+	KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
 endif
 
 # Stackpointer is addressed different for 32 bit and 64 bit x86
-- 
cgit v1.2.3


From f63d947c1673930bfc5f2f9bd1073a02c179a890 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 8 May 2017 13:32:27 +0100
Subject: clocksource/arm_arch_timer: Fix arch_timer_mem_find_best_frame()

arch_timer_mem_find_best_frame() looks through ARCH_TIMER_MEM_MAX_FRAMES
frames even after finding matches to ensure the best frame is chosen,
which means the variable frame will point to the last valid frame but
not necessarily the best frame.

On Juno, we get the following error as the wrong frame is returned as the
best frame from arch_timer_mem_find_best_frame():

  arch_timer: Unable to map frame @ 0x0000000000000000
  arch_timer: Frame missing phys irq.
  Failed to initialize '/timer@2a810000': -22

Fix the issue by correctly returning the best frame from
arch_timer_mem_find_best_frame().

Fixes: c389d701dfb7 ("clocksource: arm_arch_timer: split MMIO timer probing.")
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1494246747-17267-1-git-send-email-sudeep.holla@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/clocksource/arm_arch_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index a1fb918b8021..4bed671e490e 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1268,7 +1268,7 @@ arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem)
 		pr_err("Unable to find a suitable frame in timer @ %pa\n",
 			&timer_mem->cntctlbase);
 
-	return frame;
+	return best_frame;
 }
 
 static int __init
-- 
cgit v1.2.3


From fb8fb46c56289b3f34b5d90a4ec65e9e4e4544a5 Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Wed, 3 May 2017 11:15:56 +0800
Subject: x86/intel_rdt: Fix a typo in Documentation

Example 3 contains a typo:

"C0" in "# echo C0 > p0/cpus" is wrong because it specifies core
6-7 instead of wanted core 4-7.

Correct this typo to avoid confusion.

Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Acked-by: Fenghua Yu <fenghua.yu@intel.com>
Cc: vikas.shivappa@linux.intel.com
Cc: tony.luck@intel.com
Link: http://lkml.kernel.org/r/1493781356-24229-1-git-send-email-xiaochen.shen@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/x86/intel_rdt_ui.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 0f6d8477b66c..c491a1b82de2 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -295,7 +295,7 @@ kernel and the tasks running there get 50% of the cache. They should
 also get 50% of memory bandwidth assuming that the cores 4-7 are SMT
 siblings and only the real time threads are scheduled on the cores 4-7.
 
-# echo C0 > p0/cpus
+# echo F0 > p0/cpus
 
 4) Locking between applications
 
-- 
cgit v1.2.3


From cb9d043469d216416d75efe05f3bce279bab2262 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Mon, 8 May 2017 09:31:53 +0200
Subject: KVM: arm/arm64: Clarification and relaxation to ITS save/restore ABI

Clarify what is meant by the save/restore ABI only supporting virtual
physical interrupts.

Relax the requirement of the order that the collection entries are
written in and be clear that there is no particular ordering enforced.

Some cosmetic changes in the capitalization of ID names to align with
the GICv3 manual and remove the empty line in the bottom of the patch.

Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 Documentation/virtual/kvm/devices/arm-vgic-its.txt | 23 +++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
index ba132e9885b3..eb06beb75960 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
@@ -97,8 +97,8 @@ Groups:
 The following ordering must be followed when restoring the GIC and the ITS:
 a) restore all guest memory and create vcpus
 b) restore all redistributors
-c) initialize the ITS and then provide its base address
-   (KVM_DEV_ARM_VGIC_CTRL_INIT, KVM_DEV_ARM_VGIC_GRP_ADDR)
+c) provide the its base address
+   (KVM_DEV_ARM_VGIC_GRP_ADDR)
 d) restore the ITS in the following order:
    1. Restore GITS_CBASER
    2. Restore all other GITS_ registers, except GITS_CTLR!
@@ -110,12 +110,14 @@ Then vcpus can be started.
  ITS Table ABI REV0:
  -------------------
 
- Revision 0 of the ABI only supports physical LPIs.
+ Revision 0 of the ABI only supports the features of a virtual GICv3, and does
+ not support a virtual GICv4 with support for direct injection of virtual
+ interrupts for nested hypervisors.
 
- The device table and ITT are indexed by the deviceid and eventid,
- respectively. The collection table is not indexed by collectionid:
- CTEs are written in the table in the order of collection creation. All
- entries are 8 bytes.
+ The device table and ITT are indexed by the DeviceID and EventID,
+ respectively. The collection table is not indexed by CollectionID, and the
+ entries in the collection are listed in no particular order.
+ All entries are 8 bytes.
 
  Device Table Entry (DTE):
 
@@ -126,10 +128,10 @@ Then vcpus can be started.
  - V indicates whether the entry is valid. If not, other fields
    are not meaningful.
  - next: equals to 0 if this entry is the last one; otherwise it
-   corresponds to the deviceid offset to the next DTE, capped by
+   corresponds to the DeviceID offset to the next DTE, capped by
    2^14 -1.
  - ITT_addr matches bits [51:8] of the ITT address (256 Byte aligned).
- - Size specifies the supported number of bits for the eventid,
+ - Size specifies the supported number of bits for the EventID,
    minus one
 
  Collection Table Entry (CTE):
@@ -151,8 +153,7 @@ Then vcpus can be started.
 
  where:
  - next: equals to 0 if this entry is the last one; otherwise it corresponds
-   to the eventid offset to the next ITE capped by 2^16 -1.
+   to the EventID offset to the next ITE capped by 2^16 -1.
  - pINTID is the physical LPI ID; if zero, it means the entry is not valid
    and other fields are not meaningful.
  - ICID is the collection ID
-
-- 
cgit v1.2.3


From 443c3a9e686aa44cf00c677498a6b715f9a43a45 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Mon, 8 May 2017 12:09:13 +0200
Subject: KVM: arm/arm64: vgic: Rename kvm_vgic_vcpu_init to
 kvm_vgic_vcpu_enable

This function really doesn't init anything, it enables the CPU
interface, so name it as such, which gives us the name to use for actual
init work later on.

Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 virt/kvm/arm/vgic/vgic-init.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 87de048fe147..0ea64a1beff9 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -226,11 +226,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
 	return 0;
 }
 
-/**
- * kvm_vgic_vcpu_init() - Enable the VCPU interface
- * @vcpu: the VCPU which's VGIC should be enabled
- */
-static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
 {
 	if (kvm_vgic_global_state.type == VGIC_V2)
 		vgic_v2_enable(vcpu);
@@ -269,7 +265,7 @@ int vgic_init(struct kvm *kvm)
 		dist->msis_require_devid = true;
 
 	kvm_for_each_vcpu(i, vcpu, kvm)
-		kvm_vgic_vcpu_init(vcpu);
+		kvm_vgic_vcpu_enable(vcpu);
 
 	ret = kvm_vgic_setup_default_irq_routing(kvm);
 	if (ret)
-- 
cgit v1.2.3


From 24e0bfbf63bac18495b0ad76115269f2158e9234 Mon Sep 17 00:00:00 2001
From: Horia Geantă <horia.geanta@nxp.com>
Date: Mon, 8 May 2017 11:50:16 +0300
Subject: powerpc: Fix distclean with Makefile.postlink
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Makefile.postlink always includes include/config/auto.conf, however
this file is not present in a clean kernel tree, causing make to fail:

  $ git clone linuxppc.git
  $ cd linuxppc.git
  $ make distclean
  arch/powerpc/Makefile.postlink:10: include/config/auto.conf: No such file or directory
  make[1]: *** No rule to make target `include/config/auto.conf'.  Stop.
  make: *** [vmlinuxclean] Error 2

Equally running 'make distclean; make distclean' will trip the error case.

Change the inclusion such that file not being found does not trigger an error.

Fixes: f188d0524d7e ("powerpc: Use the new post-link pass to check relocations")
Reported-by: Mircea Pop <mircea.pop@nxp.com>
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Tested-by: Justin M. Forbes <jforbes@fedoraproject.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Makefile.postlink | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index 3c22d64b2de9..eccfcc88afae 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -7,7 +7,7 @@
 PHONY := __archpost
 __archpost:
 
-include include/config/auto.conf
+-include include/config/auto.conf
 include scripts/Kbuild.include
 
 quiet_cmd_relocs_check = CHKREL  $@
-- 
cgit v1.2.3


From ba95b5d0359609b4ec8010f77c40ab3c595a6ac6 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 9 May 2017 15:39:04 +1000
Subject: powerpc/mm/book3s/64: Rework page table geometry for lower memory
 usage

Recently in commit f6eedbba7a26 ("powerpc/mm/hash: Increase VA range to 128TB")
we increased the virtual address space for user processes to 128TB by default,
and up to 512TB if user space opts in.

This obviously required expanding the range of the Linux page tables. For Book3s
64-bit using hash and with PAGE_SIZE=64K, we increased the PGD to 2^15 entries.
This meant we could cover the full address range, while still being able to
insert a 16G hugepage at the PGD level and a 16M hugepage in the PMD.

The downside of that geometry is that it uses a lot of memory for the PGD, and
in particular makes the PGD a 4-page allocation, which means it's much more
likely to fail under memory pressure.

Instead we can make the PMD larger, so that a single PUD entry maps 16G,
allowing the 16G hugepages to sit at that level in the tree. We're then able to
split the remaining bits between the PUG and PGD. We make the PGD slightly
larger as that results in lower memory usage for typical programs.

When THP is enabled the PMD actually doubles in size, to 2^11 entries, or 2^14
bytes, which is large but still < PAGE_SIZE.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 214219dff87c..9732837aaae8 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -2,9 +2,9 @@
 #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
 
 #define H_PTE_INDEX_SIZE  8
-#define H_PMD_INDEX_SIZE  5
-#define H_PUD_INDEX_SIZE  5
-#define H_PGD_INDEX_SIZE  15
+#define H_PMD_INDEX_SIZE  10
+#define H_PUD_INDEX_SIZE  7
+#define H_PGD_INDEX_SIZE  8
 
 /*
  * 64k aligned address free up few of the lower bits of RPN for us
-- 
cgit v1.2.3


From 6102c005a71b4a7a6433d43b887662fb0780746d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 4 May 2017 20:41:12 +1000
Subject: powerpc/64s: Fix unnecessary machine check handler relocation branch

Similarly to commit 2563a70c3b ("powerpc/64s: Remove unnecessary relocation
branch from idle handler"), the machine check handler has a BRANCH_TO from
relocated to relocated code, which is unnecessary.

It has also caused build errors with some toolchains:

  arch/powerpc/kernel/exceptions-64s.S: Assembler messages:
  arch/powerpc/kernel/exceptions-64s.S:395: Error: operand out of range
  (0xffffffffffff8280 is not between 0x0000000000000000 and
  0x000000000000ffff)

Fixes: 1945bc4549e5 ("powerpc/64s: Fix POWER9 machine check handler from stop state")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reported-and-tested-by : Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/exceptions-64s.S | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 3840a7700285..ef72065f684c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -391,9 +391,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 */
 	BEGIN_FTR_SECTION
 	rlwinm.	r11,r12,47-31,30,31
-	beq-	4f
-	BRANCH_TO_COMMON(r10, machine_check_idle_common)
-4:
+	bne	machine_check_idle_common
 	END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
 
-- 
cgit v1.2.3


From 3bed8888edc8f60d734c44fdcdb7c6abd5e2d02e Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Sat, 6 May 2017 23:37:19 +0800
Subject: KVM: set no_llseek in stat_fops_per_vm

In vm_stat_get_per_vm_fops and vcpu_stat_get_per_vm_fops, since we
use nonseekable_open() to open, we should use no_llseek() to seek,
not generic_file_llseek().

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b3d151ee2a67..a9a04625bb61 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3715,7 +3715,7 @@ static const struct file_operations vm_stat_get_per_vm_fops = {
 	.release = kvm_debugfs_release,
 	.read    = simple_attr_read,
 	.write   = simple_attr_write,
-	.llseek  = generic_file_llseek,
+	.llseek  = no_llseek,
 };
 
 static int vcpu_stat_get_per_vm(void *data, u64 *val)
@@ -3760,7 +3760,7 @@ static const struct file_operations vcpu_stat_get_per_vm_fops = {
 	.release = kvm_debugfs_release,
 	.read    = simple_attr_read,
 	.write   = simple_attr_write,
-	.llseek  = generic_file_llseek,
+	.llseek  = no_llseek,
 };
 
 static const struct file_operations *stat_fops_per_vm[] = {
-- 
cgit v1.2.3


From c7c2c709b60ed2d7e6e6871496f0e963cfad121f Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 5 May 2017 11:28:09 -0700
Subject: kvm: nVMX: Validate CR3 target count on nested VM-entry

According to the SDM, the CR3-target count must not be greater than
4. Future processors may support a different number of CR3-target
values. Software should read the VMX capability MSR IA32_VMX_MISC to
determine the number of values supported.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c5fd459c4043..44508522a1c7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1314,6 +1314,11 @@ static inline bool report_flexpriority(void)
 	return flexpriority_enabled;
 }
 
+static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
+{
+	return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low);
+}
+
 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
 {
 	return vmcs12->cpu_based_vm_exec_control & bit;
@@ -10266,6 +10271,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 				vmx->nested.nested_vmx_entry_ctls_high))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+	if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
+		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
 	if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
 	    !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
 	    !nested_cr3_valid(vcpu, vmcs12->host_cr3))
-- 
cgit v1.2.3


From bab4165e2f031905bd0a2bb8b6ad65c5c8cfa870 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Fri, 5 May 2017 15:25:13 -0400
Subject: kvm: x86: Add a hook for arch specific dirty logging emulation

When KVM updates accessed/dirty bits, this hook can be used
to invoke an arch specific function that implements/emulates
dirty logging such as PML.

Signed-off-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/mmu.c              | 15 +++++++++++++++
 arch/x86/kvm/mmu.h              |  1 +
 arch/x86/kvm/paging_tmpl.h      |  4 ++++
 4 files changed, 22 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f5bddf92faba..9c761fea0c98 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1020,6 +1020,8 @@ struct kvm_x86_ops {
 	void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
 					   struct kvm_memory_slot *slot,
 					   gfn_t offset, unsigned long mask);
+	int (*write_log_dirty)(struct kvm_vcpu *vcpu);
+
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 558676538fca..5d3376f67794 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1498,6 +1498,21 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 		kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
+/**
+ * kvm_arch_write_log_dirty - emulate dirty page logging
+ * @vcpu: Guest mode vcpu
+ *
+ * Emulate arch specific page modification logging for the
+ * nested hypervisor
+ */
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
+{
+	if (kvm_x86_ops->write_log_dirty)
+		return kvm_x86_ops->write_log_dirty(vcpu);
+
+	return 0;
+}
+
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    struct kvm_memory_slot *slot, u64 gfn)
 {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d8ccb32f7308..27975807cc64 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -202,4 +202,5 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    struct kvm_memory_slot *slot, u64 gfn);
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 314d2071b337..56241746abbd 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -226,6 +226,10 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
 		if (level == walker->level && write_fault &&
 				!(pte & PT_GUEST_DIRTY_MASK)) {
 			trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
+#if PTTYPE == PTTYPE_EPT
+			if (kvm_arch_write_log_dirty(vcpu))
+				return -EINVAL;
+#endif
 			pte |= PT_GUEST_DIRTY_MASK;
 		}
 		if (pte == orig_pte)
-- 
cgit v1.2.3


From c5f983f6e8455bbff8b6b39f3ad470317fcd808e Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Fri, 5 May 2017 15:25:14 -0400
Subject: nVMX: Implement emulated Page Modification Logging

With EPT A/D enabled, processor access to L2 guest
paging structures will result in a write violation.
When this happens, write the GUEST_PHYSICAL_ADDRESS
to the pml buffer provided by L1 if the access is
write and the dirty bit is being set.

This patch also adds necessary checks during VMEntry if L1
has enabled PML. If the PML index overflows, we change the
exit reason and run L1 to simulate a PML full event.

Signed-off-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 44508522a1c7..7d056f5385e7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -248,6 +248,7 @@ struct __packed vmcs12 {
 	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
+	u64 pml_address;
 	u64 guest_ia32_debugctl;
 	u64 guest_ia32_pat;
 	u64 guest_ia32_efer;
@@ -369,6 +370,7 @@ struct __packed vmcs12 {
 	u16 guest_ldtr_selector;
 	u16 guest_tr_selector;
 	u16 guest_intr_status;
+	u16 guest_pml_index;
 	u16 host_es_selector;
 	u16 host_cs_selector;
 	u16 host_ss_selector;
@@ -407,6 +409,7 @@ struct nested_vmx {
 	/* Has the level1 guest done vmxon? */
 	bool vmxon;
 	gpa_t vmxon_ptr;
+	bool pml_full;
 
 	/* The guest-physical address of the current VMCS L1 keeps for L2 */
 	gpa_t current_vmptr;
@@ -742,6 +745,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
 	FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
 	FIELD(GUEST_INTR_STATUS, guest_intr_status),
+	FIELD(GUEST_PML_INDEX, guest_pml_index),
 	FIELD(HOST_ES_SELECTOR, host_es_selector),
 	FIELD(HOST_CS_SELECTOR, host_cs_selector),
 	FIELD(HOST_SS_SELECTOR, host_ss_selector),
@@ -767,6 +771,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
+	FIELD64(PML_ADDRESS, pml_address),
 	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
 	FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
 	FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
@@ -1353,6 +1358,11 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
 		vmx_xsaves_supported();
 }
 
+static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
+}
+
 static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
 {
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@@ -9369,13 +9379,20 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
 		struct x86_exception *fault)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exit_reason;
+	unsigned long exit_qualification = vcpu->arch.exit_qualification;
 
-	if (fault->error_code & PFERR_RSVD_MASK)
+	if (vmx->nested.pml_full) {
+		exit_reason = EXIT_REASON_PML_FULL;
+		vmx->nested.pml_full = false;
+		exit_qualification &= INTR_INFO_UNBLOCK_NMI;
+	} else if (fault->error_code & PFERR_RSVD_MASK)
 		exit_reason = EXIT_REASON_EPT_MISCONFIG;
 	else
 		exit_reason = EXIT_REASON_EPT_VIOLATION;
-	nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
+
+	nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
 	vmcs12->guest_physical_address = fault->address;
 }
 
@@ -9718,6 +9735,22 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
+					 struct vmcs12 *vmcs12)
+{
+	u64 address = vmcs12->pml_address;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
+		if (!nested_cpu_has_ept(vmcs12) ||
+		    !IS_ALIGNED(address, 4096)  ||
+		    address >> maxphyaddr)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
 				       struct vmx_msr_entry *e)
 {
@@ -10253,6 +10286,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+	if (nested_vmx_check_pml_controls(vcpu, vmcs12))
+		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
 	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
 				vmx->nested.nested_vmx_procbased_ctls_low,
 				vmx->nested.nested_vmx_procbased_ctls_high) ||
@@ -11151,6 +11187,46 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
 	kvm_flush_pml_buffers(kvm);
 }
 
+static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
+{
+	struct vmcs12 *vmcs12;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	gpa_t gpa;
+	struct page *page = NULL;
+	u64 *pml_address;
+
+	if (is_guest_mode(vcpu)) {
+		WARN_ON_ONCE(vmx->nested.pml_full);
+
+		/*
+		 * Check if PML is enabled for the nested guest.
+		 * Whether eptp bit 6 is set is already checked
+		 * as part of A/D emulation.
+		 */
+		vmcs12 = get_vmcs12(vcpu);
+		if (!nested_cpu_has_pml(vmcs12))
+			return 0;
+
+		if (vmcs12->guest_pml_index > PML_ENTITY_NUM) {
+			vmx->nested.pml_full = true;
+			return 1;
+		}
+
+		gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
+
+		page = nested_get_page(vcpu, vmcs12->pml_address);
+		if (!page)
+			return 0;
+
+		pml_address = kmap(page);
+		pml_address[vmcs12->guest_pml_index--] = gpa;
+		kunmap(page);
+		nested_release_page_clean(page);
+	}
+
+	return 0;
+}
+
 static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
 					   struct kvm_memory_slot *memslot,
 					   gfn_t offset, unsigned long mask)
@@ -11510,6 +11586,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
 	.flush_log_dirty = vmx_flush_log_dirty,
 	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+	.write_log_dirty = vmx_write_pml_buffer,
 
 	.pre_block = vmx_pre_block,
 	.post_block = vmx_post_block,
-- 
cgit v1.2.3


From 03efce6f935f89f90a98997ceea514aeff47b6dc Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Fri, 5 May 2017 15:25:15 -0400
Subject: nVMX: Advertise PML to L1 hypervisor

Advertise the PML bit in vmcs12 but don't try to enable
it in hardware when running L2 since L0 is emulating it. Also,
preserve L0's settings for PML since it may still
want to log writes.

Signed-off-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7d056f5385e7..c6f4ad44aa95 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2766,8 +2766,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
 			VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
 			VMX_EPT_1GB_PAGE_BIT;
-	       if (enable_ept_ad_bits)
+		if (enable_ept_ad_bits) {
+			vmx->nested.nested_vmx_secondary_ctls_high |=
+				SECONDARY_EXEC_ENABLE_PML;
 		       vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+		}
 	} else
 		vmx->nested.nested_vmx_ept_caps = 0;
 
@@ -8129,7 +8132,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_PREEMPTION_TIMER:
 		return false;
 	case EXIT_REASON_PML_FULL:
-		/* We don't expose PML support to L1. */
+		/* We emulate PML support to L1. */
 		return false;
 	default:
 		return true;
@@ -9924,7 +9927,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 			  bool from_vmentry, u32 *entry_failure_code)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 exec_control;
+	u32 exec_control, vmcs12_exec_ctrl;
 
 	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
 	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10055,8 +10058,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 				  SECONDARY_EXEC_APIC_REGISTER_VIRT);
 		if (nested_cpu_has(vmcs12,
-				CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
-			exec_control |= vmcs12->secondary_vm_exec_control;
+				   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
+			vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
+				~SECONDARY_EXEC_ENABLE_PML;
+			exec_control |= vmcs12_exec_ctrl;
+		}
 
 		if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
 			vmcs_write64(EOI_EXIT_BITMAP0,
-- 
cgit v1.2.3


From 497d72d80a789501501cccabdad6b145f9e31371 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Mon, 8 May 2017 20:38:40 +0200
Subject: KVM: Add kvm_vcpu_get_idx to get vcpu index in kvm->vcpus

There are occasional needs to use the index of vcpu in the kvm->vcpus
array to map something related to a VCPU.  For example, unlike the
vcpu->vcpu_id, the vcpu index is guaranteed to not be sparse across all
vcpus which is useful when allocating a memory area for each vcpu.

Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 include/linux/kvm_host.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c14ad9809da..12eb26d665e8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -490,6 +490,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 	return NULL;
 }
 
+static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu *tmp;
+	int idx;
+
+	kvm_for_each_vcpu(idx, tmp, vcpu->kvm)
+		if (tmp == vcpu)
+			return idx;
+	BUG();
+}
+
 #define kvm_for_each_memslot(memslot, slots)	\
 	for (memslot = &slots->memslots[0];	\
 	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
-- 
cgit v1.2.3


From 7fadcd3a859b218fa27c399db0aa8d6d62ad84a9 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Mon, 8 May 2017 12:18:26 +0200
Subject: KVM: arm/arm64: Refactor vgic_register_redist_iodevs

Split out the function to register all the redistributor iodevs into a
function that handles a single redistributor at a time in preparation
for being able to call this per VCPU as these get created.

Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 virt/kvm/arm/vgic/vgic-mmio-v3.c | 108 ++++++++++++++++++++++++---------------
 virt/kvm/arm/vgic/vgic-v3.c      |   2 +-
 virt/kvm/arm/vgic/vgic.h         |   2 +-
 3 files changed, 68 insertions(+), 44 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 6afb3b484886..168269b4e28d 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -556,61 +556,85 @@ unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev)
 	return SZ_64K;
 }
 
-int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address)
+/**
+ * vgic_register_redist_iodev - register a single redist iodev
+ * @vcpu:    The VCPU to which the redistributor belongs
+ *
+ * Register a KVM iodev for this VCPU's redistributor using the address
+ * provided.
+ *
+ * Return 0 on success, -ERRNO otherwise.
+ */
+static int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
+	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
+	gpa_t rd_base, sgi_base;
+	int ret;
+
+	rd_base = vgic->vgic_redist_base + kvm_vcpu_get_idx(vcpu) * SZ_64K * 2;
+	sgi_base = rd_base + SZ_64K;
+
+	kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
+	rd_dev->base_addr = rd_base;
+	rd_dev->iodev_type = IODEV_REDIST;
+	rd_dev->regions = vgic_v3_rdbase_registers;
+	rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+	rd_dev->redist_vcpu = vcpu;
+
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
+				      SZ_64K, &rd_dev->dev);
+	mutex_unlock(&kvm->slots_lock);
+
+	if (ret)
+		return ret;
+
+	kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
+	sgi_dev->base_addr = sgi_base;
+	sgi_dev->iodev_type = IODEV_REDIST;
+	sgi_dev->regions = vgic_v3_sgibase_registers;
+	sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
+	sgi_dev->redist_vcpu = vcpu;
+
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
+				      SZ_64K, &sgi_dev->dev);
+	mutex_unlock(&kvm->slots_lock);
+	if (ret)
+		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+					  &rd_dev->dev);
+
+	return ret;
+}
+
+static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
+{
+	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
+	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
+
+	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev);
+	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev);
+}
+
+int vgic_register_redist_iodevs(struct kvm *kvm)
 {
 	struct kvm_vcpu *vcpu;
 	int c, ret = 0;
 
 	kvm_for_each_vcpu(c, vcpu, kvm) {
-		gpa_t rd_base = redist_base_address + c * SZ_64K * 2;
-		gpa_t sgi_base = rd_base + SZ_64K;
-		struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
-		struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
-
-		kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
-		rd_dev->base_addr = rd_base;
-		rd_dev->iodev_type = IODEV_REDIST;
-		rd_dev->regions = vgic_v3_rdbase_registers;
-		rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
-		rd_dev->redist_vcpu = vcpu;
-
-		mutex_lock(&kvm->slots_lock);
-		ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
-					      SZ_64K, &rd_dev->dev);
-		mutex_unlock(&kvm->slots_lock);
-
+		ret = vgic_register_redist_iodev(vcpu);
 		if (ret)
 			break;
-
-		kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
-		sgi_dev->base_addr = sgi_base;
-		sgi_dev->iodev_type = IODEV_REDIST;
-		sgi_dev->regions = vgic_v3_sgibase_registers;
-		sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
-		sgi_dev->redist_vcpu = vcpu;
-
-		mutex_lock(&kvm->slots_lock);
-		ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
-					      SZ_64K, &sgi_dev->dev);
-		mutex_unlock(&kvm->slots_lock);
-		if (ret) {
-			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
-						  &rd_dev->dev);
-			break;
-		}
 	}
 
 	if (ret) {
 		/* The current c failed, so we start with the previous one. */
 		for (c--; c >= 0; c--) {
-			struct vgic_cpu *vgic_cpu;
-
 			vcpu = kvm_get_vcpu(kvm, c);
-			vgic_cpu = &vcpu->arch.vgic_cpu;
-			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
-						  &vgic_cpu->rd_iodev.dev);
-			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
-						  &vgic_cpu->sgi_iodev.dev);
+			vgic_unregister_redist_iodev(vcpu);
 		}
 	}
 
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 54dee725da18..12e52a06e146 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -386,7 +386,7 @@ int vgic_v3_map_resources(struct kvm *kvm)
 		goto out;
 	}
 
-	ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base);
+	ret = vgic_register_redist_iodevs(kvm);
 	if (ret) {
 		kvm_err("Unable to register VGICv3 redist MMIO regions\n");
 		goto out;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 8259f0a859ab..a2aeaa866b18 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -174,7 +174,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
 int vgic_v3_save_pending_tables(struct kvm *kvm);
-int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
+int vgic_register_redist_iodevs(struct kvm *kvm);
 
 void vgic_v3_load(struct kvm_vcpu *vcpu);
 void vgic_v3_put(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From 9a746d75c06e2a68f27886d041c6e46df0aa86d8 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Mon, 8 May 2017 12:23:51 +0200
Subject: KVM: arm/arm64: Make vgic_v3_check_base more broadly usable

As we are about to fiddle with the IO device registration mechanism,
let's be a little more careful when setting base addresses as early as
possible.  When setting a base address, we can check that there's
address space enough for its scope and when the last of the two
base addresses (dist and redist) get set, we can also check if the
regions overlap at that time.

This allows us to provide error messages to the user at time when trying
to set the base address, as opposed to later when trying to run the VM.

To do this,  we make vgic_v3_check_base available in the core vgic-v3
code as well as in the other parts of the GICv3 code, namely the MMIO
config code.

We also return true for undefined base addresses so that the function
can be used before all base addresses are set; all callers already check
for uninitialized addresses before calling this function.

Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 virt/kvm/arm/vgic/vgic-v3.c | 19 +++++++++++++++----
 virt/kvm/arm/vgic/vgic.h    |  1 +
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 12e52a06e146..2d53d7a24bda 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -329,19 +329,30 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
 	return 0;
 }
 
-/* check for overlapping regions and for regions crossing the end of memory */
-static bool vgic_v3_check_base(struct kvm *kvm)
+/*
+ * Check for overlapping regions and for regions crossing the end of memory
+ * for base addresses which have already been set.
+ */
+bool vgic_v3_check_base(struct kvm *kvm)
 {
 	struct vgic_dist *d = &kvm->arch.vgic;
 	gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE;
 
 	redist_size *= atomic_read(&kvm->online_vcpus);
 
-	if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
+	if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
+	    d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
 		return false;
-	if (d->vgic_redist_base + redist_size < d->vgic_redist_base)
+
+	if (!IS_VGIC_ADDR_UNDEF(d->vgic_redist_base) &&
+	    d->vgic_redist_base + redist_size < d->vgic_redist_base)
 		return false;
 
+	/* Both base addresses must be set to check if they overlap */
+	if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) ||
+	    IS_VGIC_ADDR_UNDEF(d->vgic_redist_base))
+		return true;
+
 	if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base)
 		return true;
 	if (d->vgic_redist_base + redist_size <= d->vgic_dist_base)
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index a2aeaa866b18..89eb935c05be 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -175,6 +175,7 @@ int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
 int vgic_v3_save_pending_tables(struct kvm *kvm);
 int vgic_register_redist_iodevs(struct kvm *kvm);
+bool vgic_v3_check_base(struct kvm *kvm);
 
 void vgic_v3_load(struct kvm_vcpu *vcpu);
 void vgic_v3_put(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From 72030536ebf5e5e512771922efa472a80f2c969f Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Mon, 8 May 2017 12:28:19 +0200
Subject: KVM: arm/arm64: Slightly rework kvm_vgic_addr

As we are about to handle setting the address for the redistributor base
region separately from some of the other base addresses, let's rework
this function to leave a little more room for being flexible in what
each type of base address does.

Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 virt/kvm/arm/vgic/vgic-kvm-device.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index d48743cafedc..69ccfd5b2271 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -37,6 +37,14 @@ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
 	return 0;
 }
 
+static int vgic_check_type(struct kvm *kvm, int type_needed)
+{
+	if (kvm->arch.vgic.vgic_model != type_needed)
+		return -ENODEV;
+	else
+		return 0;
+}
+
 /**
  * kvm_vgic_addr - set or get vgic VM base addresses
  * @kvm:   pointer to the vm struct
@@ -57,40 +65,36 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 {
 	int r = 0;
 	struct vgic_dist *vgic = &kvm->arch.vgic;
-	int type_needed;
 	phys_addr_t *addr_ptr, alignment;
 
 	mutex_lock(&kvm->lock);
 	switch (type) {
 	case KVM_VGIC_V2_ADDR_TYPE_DIST:
-		type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
+		r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
 		addr_ptr = &vgic->vgic_dist_base;
 		alignment = SZ_4K;
 		break;
 	case KVM_VGIC_V2_ADDR_TYPE_CPU:
-		type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
+		r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
 		addr_ptr = &vgic->vgic_cpu_base;
 		alignment = SZ_4K;
 		break;
 	case KVM_VGIC_V3_ADDR_TYPE_DIST:
-		type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
+		r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
 		addr_ptr = &vgic->vgic_dist_base;
 		alignment = SZ_64K;
 		break;
 	case KVM_VGIC_V3_ADDR_TYPE_REDIST:
-		type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
+		r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
 		addr_ptr = &vgic->vgic_redist_base;
 		alignment = SZ_64K;
 		break;
 	default:
 		r = -ENODEV;
-		goto out;
 	}
 
-	if (vgic->vgic_model != type_needed) {
-		r = -ENODEV;
+	if (r)
 		goto out;
-	}
 
 	if (write) {
 		r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment);
-- 
cgit v1.2.3


From 1aab6f468c10a1ec3977fb6f7bc12869174d516e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Mon, 8 May 2017 12:30:24 +0200
Subject: KVM: arm/arm64: Register iodevs when setting redist base and creating
 VCPUs

Instead of waiting with registering KVM iodevs until the first VCPU is
run, we can actually create the iodevs when the redist base address is
set.  The only downside is that we must now also check if we need to do
this for VCPUs which are created after creating the VGIC, because there
is no enforced ordering between creating the VGIC (and setting its base
addresses) and creating the VCPUs.

Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 include/kvm/arm_vgic.h              |  1 +
 virt/kvm/arm/arm.c                  |  2 +-
 virt/kvm/arm/vgic/vgic-init.c       | 21 ++++++++++++++++++
 virt/kvm/arm/vgic/vgic-kvm-device.c |  7 +++++-
 virt/kvm/arm/vgic/vgic-mmio-v3.c    | 43 +++++++++++++++++++++++++++++++++++--
 virt/kvm/arm/vgic/vgic-v3.c         |  6 ------
 virt/kvm/arm/vgic/vgic.h            |  3 ++-
 7 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index fabcc649e2ce..4ff65efcfebd 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -286,6 +286,7 @@ extern struct static_key_false vgic_v2_cpuif_trap;
 
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 void kvm_vgic_early_init(struct kvm *kvm);
+int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
 int kvm_vgic_create(struct kvm *kvm, u32 type);
 void kvm_vgic_destroy(struct kvm *kvm);
 void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 7941699a766d..9f6f522a4bfc 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -335,7 +335,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	kvm_arm_reset_debug_ptr(vcpu);
 
-	return 0;
+	return kvm_vgic_vcpu_init(vcpu);
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 0ea64a1beff9..962bb577a1a0 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -226,6 +226,27 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
 	return 0;
 }
 
+/**
+ * kvm_vgic_vcpu_init() - Register VCPU-specific KVM iodevs
+ * @vcpu: pointer to the VCPU being created and initialized
+ */
+int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	int ret = 0;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 0;
+
+	/*
+	 * If we are creating a VCPU with a GICv3 we must also register the
+	 * KVM io device for the redistributor that belongs to this VCPU.
+	 */
+	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+		ret = vgic_register_redist_iodev(vcpu);
+	return ret;
+}
+
 static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
 {
 	if (kvm_vgic_global_state.type == VGIC_V2)
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index 69ccfd5b2271..10ae6f394b71 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -86,8 +86,13 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 		break;
 	case KVM_VGIC_V3_ADDR_TYPE_REDIST:
 		r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
+		if (r)
+			break;
+		if (write) {
+			r = vgic_v3_set_redist_base(kvm, *addr);
+			goto out;
+		}
 		addr_ptr = &vgic->vgic_redist_base;
-		alignment = SZ_64K;
 		break;
 	default:
 		r = -ENODEV;
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 168269b4e28d..99da1a207c19 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -565,7 +565,7 @@ unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev)
  *
  * Return 0 on success, -ERRNO otherwise.
  */
-static int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
+int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct vgic_dist *vgic = &kvm->arch.vgic;
@@ -574,6 +574,18 @@ static int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 	gpa_t rd_base, sgi_base;
 	int ret;
 
+	/*
+	 * We may be creating VCPUs before having set the base address for the
+	 * redistributor region, in which case we will come back to this
+	 * function for all VCPUs when the base address is set.  Just return
+	 * without doing any work for now.
+	 */
+	if (IS_VGIC_ADDR_UNDEF(vgic->vgic_redist_base))
+		return 0;
+
+	if (!vgic_v3_check_base(kvm))
+		return -EINVAL;
+
 	rd_base = vgic->vgic_redist_base + kvm_vcpu_get_idx(vcpu) * SZ_64K * 2;
 	sgi_base = rd_base + SZ_64K;
 
@@ -619,7 +631,7 @@ static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
 	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev);
 }
 
-int vgic_register_redist_iodevs(struct kvm *kvm)
+static int vgic_register_all_redist_iodevs(struct kvm *kvm)
 {
 	struct kvm_vcpu *vcpu;
 	int c, ret = 0;
@@ -641,6 +653,33 @@ int vgic_register_redist_iodevs(struct kvm *kvm)
 	return ret;
 }
 
+int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr)
+{
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	int ret;
+
+	/* vgic_check_ioaddr makes sure we don't do this twice */
+	ret = vgic_check_ioaddr(kvm, &vgic->vgic_redist_base, addr, SZ_64K);
+	if (ret)
+		return ret;
+
+	vgic->vgic_redist_base = addr;
+	if (!vgic_v3_check_base(kvm)) {
+		vgic->vgic_redist_base = VGIC_ADDR_UNDEF;
+		return -EINVAL;
+	}
+
+	/*
+	 * Register iodevs for each existing VCPU.  Adding more VCPUs
+	 * afterwards will register the iodevs when needed.
+	 */
+	ret = vgic_register_all_redist_iodevs(kvm);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	const struct vgic_register_region *region;
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 2d53d7a24bda..bb35078f4e40 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -397,12 +397,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
 		goto out;
 	}
 
-	ret = vgic_register_redist_iodevs(kvm);
-	if (ret) {
-		kvm_err("Unable to register VGICv3 redist MMIO regions\n");
-		goto out;
-	}
-
 	if (vgic_has_its(kvm)) {
 		ret = vgic_register_its_iodevs(kvm);
 		if (ret) {
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 89eb935c05be..5f17eace077c 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -174,7 +174,8 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
 int vgic_v3_save_pending_tables(struct kvm *kvm);
-int vgic_register_redist_iodevs(struct kvm *kvm);
+int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr);
+int vgic_register_redist_iodev(struct kvm_vcpu *vcpu);
 bool vgic_v3_check_base(struct kvm *kvm);
 
 void vgic_v3_load(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From 6cc40f273b30ef8f7b37f95cd2e6456d652808c0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 8 May 2017 18:15:40 +0100
Subject: KVM: arm/arm64: Get rid of its->initialized field

The its->initialized doesn't bring much to the table, and creates
unnecessary ordering between setting the address and initializing it
(which amounts to exactly nothing).

Let's kill it altogether, making KVM_DEV_ARM_VGIC_CTRL_INIT the no-op
it deserves to be.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 include/kvm/arm_vgic.h       | 1 -
 virt/kvm/arm/vgic/vgic-its.c | 7 +------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 4ff65efcfebd..bfde6fb78e8a 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -148,7 +148,6 @@ struct vgic_its {
 	gpa_t			vgic_its_base;
 
 	bool			enabled;
-	bool			initialized;
 	struct vgic_io_device	iodev;
 	struct kvm_device	*dev;
 
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 9f7105c61ece..18318c67ed93 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1545,9 +1545,6 @@ static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
 	struct vgic_io_device *iodev = &its->iodev;
 	int ret;
 
-	if (!its->initialized)
-		return -EBUSY;
-
 	if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
 		return -ENXIO;
 
@@ -1597,7 +1594,6 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
 	INIT_LIST_HEAD(&its->collection_list);
 
 	dev->kvm->arch.vgic.has_its = true;
-	its->initialized = false;
 	its->enabled = false;
 	its->dev = dev;
 
@@ -2397,8 +2393,7 @@ static int vgic_its_set_attr(struct kvm_device *dev,
 
 		switch (attr->attr) {
 		case KVM_DEV_ARM_VGIC_CTRL_INIT:
-			its->initialized = true;
-
+			/* Nothing to do */
 			return 0;
 		case KVM_DEV_ARM_ITS_SAVE_TABLES:
 			return abi->save_tables(its);
-- 
cgit v1.2.3


From 30e1b684f0afd94745821f27ce1165226df02ba9 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Mon, 8 May 2017 13:14:57 +0200
Subject: KVM: arm/arm64: Register ITS iodev when setting base address

We have to register the ITS iodevice before running the VM, because in
migration scenarios, we may be restoring a live device that wishes to
inject MSIs before the VCPUs have started.

All we need to register the ITS io device is the base address of the
ITS, so we can simply register that when the base address of the ITS is
set.

  [ Code to fix concurrency issues when setting the ITS base address and
    to fix the undef base address check written by Marc Zyngier ]

Signed-off-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 44 ++++++++++----------------------------------
 virt/kvm/arm/vgic/vgic-v3.c  |  8 --------
 virt/kvm/arm/vgic/vgic.h     |  1 -
 3 files changed, 10 insertions(+), 43 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 18318c67ed93..89acaef44965 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1540,14 +1540,19 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu)
 		its_sync_lpi_pending_table(vcpu);
 }
 
-static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
+static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its,
+				   u64 addr)
 {
 	struct vgic_io_device *iodev = &its->iodev;
 	int ret;
 
-	if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
-		return -ENXIO;
+	mutex_lock(&kvm->slots_lock);
+	if (!IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
+		ret = -EBUSY;
+		goto out;
+	}
 
+	its->vgic_its_base = addr;
 	iodev->regions = its_registers;
 	iodev->nr_regions = ARRAY_SIZE(its_registers);
 	kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops);
@@ -1555,9 +1560,9 @@ static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
 	iodev->base_addr = its->vgic_its_base;
 	iodev->iodev_type = IODEV_ITS;
 	iodev->its = its;
-	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr,
 				      KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
+out:
 	mutex_unlock(&kvm->slots_lock);
 
 	return ret;
@@ -2384,9 +2389,7 @@ static int vgic_its_set_attr(struct kvm_device *dev,
 		if (ret)
 			return ret;
 
-		its->vgic_its_base = addr;
-
-		return 0;
+		return vgic_register_its_iodev(dev->kvm, its, addr);
 	}
 	case KVM_DEV_ARM_VGIC_GRP_CTRL: {
 		const struct vgic_its_abi *abi = vgic_its_get_abi(its);
@@ -2462,30 +2465,3 @@ int kvm_vgic_register_its_device(void)
 	return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
 				       KVM_DEV_TYPE_ARM_VGIC_ITS);
 }
-
-/*
- * Registers all ITSes with the kvm_io_bus framework.
- * To follow the existing VGIC initialization sequence, this has to be
- * done as late as possible, just before the first VCPU runs.
- */
-int vgic_register_its_iodevs(struct kvm *kvm)
-{
-	struct kvm_device *dev;
-	int ret = 0;
-
-	list_for_each_entry(dev, &kvm->devices, vm_node) {
-		if (dev->ops != &kvm_arm_vgic_its_ops)
-			continue;
-
-		ret = vgic_register_its_iodev(kvm, dev->private);
-		if (ret)
-			return ret;
-		/*
-		 * We don't need to care about tearing down previously
-		 * registered ITSes, as the kvm_io_bus framework removes
-		 * them for us if the VM gets destroyed.
-		 */
-	}
-
-	return ret;
-}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index bb35078f4e40..8fa737edde6f 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -397,14 +397,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
 		goto out;
 	}
 
-	if (vgic_has_its(kvm)) {
-		ret = vgic_register_its_iodevs(kvm);
-		if (ret) {
-			kvm_err("Unable to register VGIC ITS MMIO regions\n");
-			goto out;
-		}
-	}
-
 	dist->ready = true;
 
 out:
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 5f17eace077c..8ac739734874 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -181,7 +181,6 @@ bool vgic_v3_check_base(struct kvm *kvm);
 void vgic_v3_load(struct kvm_vcpu *vcpu);
 void vgic_v3_put(struct kvm_vcpu *vcpu);
 
-int vgic_register_its_iodevs(struct kvm *kvm);
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);
 void vgic_enable_lpis(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From 67723c25ce7fc0bd4f1b0f4bcbee5f0d114516ca Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Mon, 8 May 2017 14:43:45 +0200
Subject: KVM: arm/arm64: Don't call map_resources when restoring ITS tables

The only reason we called kvm_vgic_map_resources() when restoring the
ITS tables was because we wanted to have the KVM iodevs registered in
the KVM IO bus framework at the time when the ITS was restored such that
a restored and active device can inject MSIs prior to otherwise calling
kvm_vgic_map_resources() from the first run of a VCPU.

Since we now register the KVM iodevs for the redestributors and ITS as
soon as possible (when setting the base addresses), we no longer need
this call and kvm_vgic_map_resources() is again called only when first
running a VCPU.

Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 89acaef44965..9aeaff0512d8 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -2308,20 +2308,12 @@ static int vgic_its_restore_tables_v0(struct vgic_its *its)
 		goto out;
 
 	ret = vgic_its_restore_device_tables(its);
-
 out:
 	unlock_all_vcpus(kvm);
 	mutex_unlock(&its->its_lock);
 	mutex_unlock(&kvm->lock);
 
-	if (ret)
-		return ret;
-
-	/*
-	 * On restore path, MSI injections can happen before the
-	 * first VCPU run so let's complete the GIC init here.
-	 */
-	return kvm_vgic_map_resources(its->dev->kvm);
+	return ret;
 }
 
 static int vgic_its_commit_v0(struct vgic_its *its)
-- 
cgit v1.2.3


From a2b19e6e2d4bb662a64799541c144fd94f8fb024 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Mon, 8 May 2017 13:31:12 +0200
Subject: KVM: arm/arm64: vgic-its: Cleanup after failed ITT restore

When failing to restore the ITT for a DTE, we should remove the failed
device entry from the list and free the object.

We slightly refactor vgic_its_destroy to be able to reuse the now
separate vgic_its_free_dte() function.

Signed-off-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 9aeaff0512d8..2dff288b3a66 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1613,13 +1613,20 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
 	return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
 }
 
+static void vgic_its_free_device(struct kvm *kvm, struct its_device *dev)
+{
+	struct its_ite *ite, *tmp;
+
+	list_for_each_entry_safe(ite, tmp, &dev->itt_head, ite_list)
+		its_free_ite(kvm, ite);
+	list_del(&dev->dev_list);
+	kfree(dev);
+}
+
 static void vgic_its_destroy(struct kvm_device *kvm_dev)
 {
 	struct kvm *kvm = kvm_dev->kvm;
 	struct vgic_its *its = kvm_dev->private;
-	struct its_device *dev;
-	struct its_ite *ite;
-	struct list_head *dev_cur, *dev_temp;
 	struct list_head *cur, *temp;
 
 	/*
@@ -1630,19 +1637,19 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
 		return;
 
 	mutex_lock(&its->its_lock);
-	list_for_each_safe(dev_cur, dev_temp, &its->device_list) {
-		dev = container_of(dev_cur, struct its_device, dev_list);
-		list_for_each_safe(cur, temp, &dev->itt_head) {
-			ite = (container_of(cur, struct its_ite, ite_list));
-			its_free_ite(kvm, ite);
-		}
-		list_del(dev_cur);
-		kfree(dev);
+	list_for_each_safe(cur, temp, &its->device_list) {
+		struct its_device *dev;
+
+		dev = list_entry(cur, struct its_device, dev_list);
+		vgic_its_free_device(kvm, dev);
 	}
 
 	list_for_each_safe(cur, temp, &its->collection_list) {
+		struct its_collection *coll;
+
+		coll = list_entry(cur, struct its_collection, coll_list);
 		list_del(cur);
-		kfree(container_of(cur, struct its_collection, coll_list));
+		kfree(coll);
 	}
 	mutex_unlock(&its->its_lock);
 
@@ -2012,8 +2019,10 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
 		return PTR_ERR(dev);
 
 	ret = vgic_its_restore_itt(its, dev);
-	if (ret)
+	if (ret) {
+		vgic_its_free_device(its->dev->kvm, dev);
 		return ret;
+	}
 
 	return offset;
 }
-- 
cgit v1.2.3


From ea47dd191d543f81e0912b5dc0471b48346b016e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 19 Apr 2017 05:12:18 +1000
Subject: of/fdt: introduce of_scan_flat_dt_subnodes and of_get_flat_dt_phandle

Introduce primitives for FDT parsing. These will be used for powerpc
cpufeatures node scanning, which has quite complex structure but should
be processed early.

Cc: devicetree@vger.kernel.org
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/of/fdt.c       | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/of_fdt.h |  6 ++++++
 2 files changed, 44 insertions(+)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index e5ce4b59e162..961ca97072a9 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -753,6 +753,36 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 	return rc;
 }
 
+/**
+ * of_scan_flat_dt_subnodes - scan sub-nodes of a node call callback on each.
+ * @it: callback function
+ * @data: context data pointer
+ *
+ * This function is used to scan sub-nodes of a node.
+ */
+int __init of_scan_flat_dt_subnodes(unsigned long parent,
+				    int (*it)(unsigned long node,
+					      const char *uname,
+					      void *data),
+				    void *data)
+{
+	const void *blob = initial_boot_params;
+	int node;
+
+	fdt_for_each_subnode(node, blob, parent) {
+		const char *pathp;
+		int rc;
+
+		pathp = fdt_get_name(blob, node, NULL);
+		if (*pathp == '/')
+			pathp = kbasename(pathp);
+		rc = it(node, pathp, data);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
 /**
  * of_get_flat_dt_subnode_by_name - get the subnode by given name
  *
@@ -812,6 +842,14 @@ int __init of_flat_dt_match(unsigned long node, const char *const *compat)
 	return of_fdt_match(initial_boot_params, node, compat);
 }
 
+/**
+ * of_get_flat_dt_prop - Given a node in the flat blob, return the phandle
+ */
+uint32_t __init of_get_flat_dt_phandle(unsigned long node)
+{
+	return fdt_get_phandle(initial_boot_params, node);
+}
+
 struct fdt_scan_status {
 	const char *name;
 	int namelen;
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 271b3fdf0070..1dfbfd0d8040 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -54,6 +54,11 @@ extern char __dtb_end[];
 extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
 				     int depth, void *data),
 			   void *data);
+extern int of_scan_flat_dt_subnodes(unsigned long node,
+				    int (*it)(unsigned long node,
+					      const char *uname,
+					      void *data),
+				    void *data);
 extern int of_get_flat_dt_subnode_by_name(unsigned long node,
 					  const char *uname);
 extern const void *of_get_flat_dt_prop(unsigned long node, const char *name,
@@ -62,6 +67,7 @@ extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern int of_flat_dt_match(unsigned long node, const char *const *matches);
 extern unsigned long of_get_flat_dt_root(void);
 extern int of_get_flat_dt_size(void);
+extern uint32_t of_get_flat_dt_phandle(unsigned long node);
 
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
-- 
cgit v1.2.3


From 75bda95048a4d2c5ff04036bd0181bc84085d8b2 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 9 May 2017 13:17:08 +1000
Subject: powerpc: Don't print cpu_spec->cpu_name if it's NULL

Currently we assume that if the cpu_spec has a pvr_mask then it must also have a
cpu_name. But that will change in a subsequent commit when we do CPU feature
discovery via the device tree, so check explicitly if cpu_name is NULL.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup-common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 5c10b5925ac2..b57df7fc9e5f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -256,7 +256,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	seq_printf(m, "processor\t: %lu\n", cpu_id);
 	seq_printf(m, "cpu\t\t: ");
 
-	if (cur_cpu_spec->pvr_mask)
+	if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name)
 		seq_printf(m, "%s", cur_cpu_spec->cpu_name);
 	else
 		seq_printf(m, "unknown (%08x)", pvr);
-- 
cgit v1.2.3


From f92ceb01c2cad7092af89c32dde5b14d4fdf8a09 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 8 May 2017 15:31:44 -0700
Subject: DECnet: Use container_of() for embedded struct

Instead of a direct cross-type cast, use conatiner_of() to locate
the embedded structure, even in the face of future struct layout
randomization.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_neigh.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 482730cd8a56..eeb5fc561f80 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -110,7 +110,7 @@ struct neigh_table dn_neigh_table = {
 static int dn_neigh_construct(struct neighbour *neigh)
 {
 	struct net_device *dev = neigh->dev;
-	struct dn_neigh *dn = (struct dn_neigh *)neigh;
+	struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
 	struct dn_dev *dn_db;
 	struct neigh_parms *parms;
 
@@ -339,7 +339,7 @@ int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *) dst;
 	struct neighbour *neigh = rt->n;
-	struct dn_neigh *dn = (struct dn_neigh *)neigh;
+	struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
 	struct dn_dev *dn_db;
 	bool use_long;
 
@@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
 
-	dn = (struct dn_neigh *)neigh;
+	dn = container_of(neigh, struct dn_neigh, n);
 
 	if (neigh) {
 		write_lock(&neigh->lock);
@@ -451,7 +451,7 @@ int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb
 
 	neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
 
-	dn = (struct dn_neigh *)neigh;
+	dn = container_of(neigh, struct dn_neigh, n);
 
 	if (neigh) {
 		write_lock(&neigh->lock);
@@ -510,7 +510,7 @@ static void neigh_elist_cb(struct neighbour *neigh, void *_info)
 	if (neigh->dev != s->dev)
 		return;
 
-	dn = (struct dn_neigh *) neigh;
+	dn = container_of(neigh, struct dn_neigh, n);
 	if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2)))
 		return;
 
@@ -549,7 +549,7 @@ int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n)
 static inline void dn_neigh_format_entry(struct seq_file *seq,
 					 struct neighbour *n)
 {
-	struct dn_neigh *dn = (struct dn_neigh *) n;
+	struct dn_neigh *dn = container_of(n, struct dn_neigh, n);
 	char buf[DN_ASCBUF_LEN];
 
 	read_lock(&n->lock);
-- 
cgit v1.2.3


From 4c19e2f2a86f2281014c1e06785715ad9864c9cc Mon Sep 17 00:00:00 2001
From: Karim Eshapa <karim.eshapa@gmail.com>
Date: Tue, 9 May 2017 02:06:50 +0200
Subject: drivers: net: wimax: i2400m: i2400m-usb: Use time_after for time
 comparison

Use time_after() for time comparison with the new fix.

Signed-off-by: Karim Eshapa <karim.eshapa@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/i2400m-usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wimax/i2400m/i2400m-usb.h b/drivers/net/wimax/i2400m/i2400m-usb.h
index 649ecad6844c..eff4f464a23e 100644
--- a/drivers/net/wimax/i2400m/i2400m-usb.h
+++ b/drivers/net/wimax/i2400m/i2400m-usb.h
@@ -131,7 +131,7 @@ static inline int edc_inc(struct edc *edc, u16 max_err, u16 timeframe)
 	unsigned long now;
 
 	now = jiffies;
-	if (now - edc->timestart > timeframe) {
+	if (time_after(now, edc->timestart + timeframe)) {
 		edc->errorcount = 1;
 		edc->timestart = now;
 	} else if (++edc->errorcount > max_err) {
-- 
cgit v1.2.3


From 5a61ef74f269f2573f48fa53607a8911216c3326 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 9 May 2017 13:16:52 +1000
Subject: powerpc/64s: Support new device tree binding for discovering CPU
 features

The ibm,powerpc-cpu-features device tree binding describes CPU features with
ASCII names and extensible compatibility, privilege, and enablement metadata
that allows improved flexibility and compatibility with new hardware.

The interface is described in detail in ibm,powerpc-cpu-features.txt in this
patch.

Currently this code is not enabled by default, and there are no released
firmwares that provide the binding.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../bindings/powerpc/ibm,powerpc-cpu-features.txt  |  248 +++++
 arch/powerpc/Kconfig                               |   16 +
 arch/powerpc/include/asm/cpu_has_feature.h         |    6 +-
 arch/powerpc/include/asm/cputable.h                |    2 +
 arch/powerpc/include/asm/dt_cpu_ftrs.h             |   26 +
 arch/powerpc/include/asm/reg.h                     |    1 +
 arch/powerpc/include/uapi/asm/cputable.h           |    7 +
 arch/powerpc/kernel/Makefile                       |    1 +
 arch/powerpc/kernel/cputable.c                     |   37 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c                  | 1031 ++++++++++++++++++++
 arch/powerpc/kernel/prom.c                         |   29 +-
 arch/powerpc/kernel/setup_64.c                     |   10 +-
 12 files changed, 1398 insertions(+), 16 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt
 create mode 100644 arch/powerpc/include/asm/dt_cpu_ftrs.h
 create mode 100644 arch/powerpc/kernel/dt_cpu_ftrs.c

diff --git a/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt b/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt
new file mode 100644
index 000000000000..5af426e13334
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt
@@ -0,0 +1,248 @@
+*** NOTE ***
+This document is copied from OPAL firmware
+(skiboot/doc/device-tree/ibm,powerpc-cpu-features/binding.txt)
+
+There is more complete overview and documentation of features in that
+source tree.  All patches and modifications should go there.
+************
+
+ibm,powerpc-cpu-features binding
+================================
+
+This device tree binding describes CPU features available to software, with
+enablement, privilege, and compatibility metadata.
+
+More general description of design and implementation of this binding is
+found in design.txt, which also points to documentation of specific features.
+
+
+/cpus/ibm,powerpc-cpu-features node binding
+-------------------------------------------
+
+Node: ibm,powerpc-cpu-features
+
+Description: Container of CPU feature nodes.
+
+The node name must be "ibm,powerpc-cpu-features".
+
+It is implemented as a child of the node "/cpus", but this must not be
+assumed by parsers.
+
+The node is optional but should be provided by new OPAL firmware.
+
+Properties:
+
+- compatible
+  Usage: required
+  Value type: string
+  Definition: "ibm,powerpc-cpu-features"
+
+  This compatibility refers to backwards compatibility of the overall
+  design with parsers that behave according to these guidelines. This can
+  be extended in a backward compatible manner which would not warrant a
+  revision of the compatible property.
+
+- isa
+  Usage: required
+  Value type: <u32>
+  Definition:
+
+  isa that the CPU is currently running in. This provides instruction set
+  compatibility, less the individual feature nodes. For example, an ISA v3.0
+  implementation that lacks the "transactional-memory" cpufeature node
+  should not use transactional memory facilities.
+
+  Value corresponds to the "Power ISA Version" multiplied by 1000.
+  For example, <3000> corresponds to Version 3.0, <2070> to Version 2.07.
+  The minor digit is available for revisions.
+
+- display-name
+  Usage: optional
+  Value type: string
+  Definition:
+
+  A human readable name for the CPU.
+
+/cpus/ibm,powerpc-cpu-features/example-feature node bindings
+----------------------------------------------------------------
+
+Each child node of cpu-features represents a CPU feature / capability.
+
+Node: A string describing an architected CPU feature, e.g., "floating-point".
+
+Description: A feature or capability supported by the CPUs.
+
+The name of the node is a human readable string that forms the interface
+used to describe features to software. Features are currently documented
+in the code where they are implemented in skiboot/core/cpufeatures.c
+
+Presence of the node indicates the feature is available.
+
+Properties:
+
+- isa
+  Usage: required
+  Value type: <u32>
+  Definition:
+
+  First level of the Power ISA that the feature appears in.
+  Software should filter out features when constraining the
+  environment to a particular ISA version.
+
+  Value is defined similarly to /cpus/features/isa
+
+- usable-privilege
+  Usage: required
+  Value type: <u32> bit mask
+  Definition:
+              Bit numbers are LSB0
+              bit 0 - PR (problem state / user mode)
+              bit 1 - OS (privileged state)
+              bit 2 - HV (hypervisor state)
+              All other bits reserved and should be zero.
+
+  This property describes the privilege levels and/or software components
+  that can use the feature.
+
+  If bit 0 is set, then the hwcap-bit-nr property will exist.
+
+
+- hv-support
+  Usage: optional
+  Value type: <u32> bit mask
+  Definition:
+              Bit numbers are LSB0
+              bit 0 -  HFSCR
+              All other bits reserved and should be zero.
+
+  This property describes the HV privilege support required to enable the
+  feature to lesser privilege levels. If the property does not exist then no
+  support is required.
+
+  If no bits are set, the hypervisor must have explicit/custom support for
+  this feature.
+
+  If the HFSCR bit is set, then the hfscr-bit-nr property will exist and
+  the feature may be enabled by setting this bit in the HFSCR register.
+
+
+- os-support
+  Usage: optional
+  Value type: <u32> bit mask
+  Definition:
+              Bit numbers are LSB0
+              bit 0 -  FSCR
+              All other bits reserved and should be zero.
+
+  This property describes the OS privilege support required to enable the
+  feature to lesser privilege levels. If the property does not exist then no
+  support is required.
+
+  If no bits are set, the operating system must have explicit/custom support
+  for this feature.
+
+  If the FSCR bit is set, then the fscr-bit-nr property will exist and
+  the feature may be enabled by setting this bit in the FSCR register.
+
+
+- hfscr-bit-nr
+  Usage: optional
+  Value type: <u32>
+  Definition: HFSCR bit position (LSB0)
+
+  This property exists when the hv-support property HFSCR bit is set. This
+  property describes the bit number in the HFSCR register that the
+  hypervisor must set in order to enable this feature.
+
+  This property also exists if an HFSCR bit corresponds with this feature.
+  This makes CPU feature parsing slightly simpler.
+
+
+- fscr-bit-nr
+  Usage: optional
+  Value type: <u32>
+  Definition: FSCR bit position (LSB0)
+
+  This property exists when the os-support property FSCR bit is set. This
+  property describes the bit number in the FSCR register that the
+  operating system must set in order to enable this feature.
+
+  This property also exists if an FSCR bit corresponds with this feature.
+  This makes CPU feature parsing slightly simpler.
+
+
+- hwcap-bit-nr
+  Usage: optional
+  Value type: <u32>
+  Definition: Linux ELF AUX vector bit position (LSB0)
+
+  This property may exist when the usable-privilege property value has PR bit set.
+  This property describes the bit number that should be set in the ELF AUX
+  hardware capability vectors in order to advertise this feature to userspace.
+  Bits 0-31 correspond to bits 0-31 in AT_HWCAP vector. Bits 32-63 correspond
+  to 0-31 in AT_HWCAP2 vector, and so on.  Missing AT_HWCAPx vectors implies
+  that the feature is not enabled or can not be advertised. Operating systems
+  may provide a number of unassigned hardware capability bits to allow for new
+  features to be advertised.
+
+  Some properties representing features created before this binding are
+  advertised to userspace without a one-to-one hwcap bit number may not specify
+  this bit. Operating system will handle those bits specifically.  All new
+  features usable by userspace will have a hwcap-bit-nr property.
+
+
+- dependencies
+  Usage: optional
+  Value type: <prop-encoded-array>
+  Definition:
+
+  If this property exists then it is a list of phandles to cpu feature
+  nodes that must be enabled for this feature to be enabled.
+
+
+Example
+-------
+
+	/cpus/ibm,powerpc-cpu-features {
+		compatible = "ibm,powerpc-cpu-features";
+
+		isa = <3020>;
+
+		darn {
+			isa = <3000>;
+			usable-privilege = <1 | 2 | 4>;
+			hwcap-bit-nr = <xx>;
+		};
+
+		scv {
+			isa = <3000>;
+			usable-privilege = <1 | 2>;
+			os-support = <0>;
+			hwcap-bit-nr = <xx>;
+		};
+
+		stop {
+			isa = <3000>;
+			usable-privilege = <2 | 4>;
+			hv-support = <0>;
+			os-support = <0>;
+		};
+
+		vsx2 (hypothetical) {
+			isa = <3010>;
+			usable-privilege = <1 | 2 | 4>;
+			hv-support = <0>;
+			os-support = <0>;
+			hwcap-bit-nr = <xx>;
+		};
+
+		vsx2-newinsns {
+			isa = <3020>;
+			usable-privilege = <1 | 2 | 4>;
+			os-support = <1>;
+			fscr-bit-nr = <xx>;
+			hwcap-bit-nr = <xx>;
+			dependencies = <&vsx2>;
+		};
+
+	};
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 67ee6731f4e9..5bd868f2e813 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -380,6 +380,22 @@ source "arch/powerpc/platforms/Kconfig"
 
 menu "Kernel options"
 
+config PPC_DT_CPU_FTRS
+	bool "Device-tree based CPU feature discovery & setup"
+	depends on PPC_BOOK3S_64
+	default n
+	help
+	  This enables code to use a new device tree binding for describing CPU
+	  compatibility and features. Saying Y here will attempt to use the new
+	  binding if the firmware provides it. Currently only the skiboot
+	  firmware provides this binding.
+	  If you're not sure say Y.
+
+config PPC_CPUFEATURES_ENABLE_UNKNOWN
+	bool "cpufeatures pass through unknown features to guest/userspace"
+	depends on PPC_DT_CPU_FTRS
+	default y
+
 config HIGHMEM
 	bool "High memory support"
 	depends on PPC32
diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h
index 6e834caa3720..0d1df02bf99d 100644
--- a/arch/powerpc/include/asm/cpu_has_feature.h
+++ b/arch/powerpc/include/asm/cpu_has_feature.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_POWERPC_CPUFEATURES_H
-#define __ASM_POWERPC_CPUFEATURES_H
+#ifndef __ASM_POWERPC_CPU_HAS_FEATURE_H
+#define __ASM_POWERPC_CPU_HAS_FEATURE_H
 
 #ifndef __ASSEMBLY__
 
@@ -52,4 +52,4 @@ static inline bool cpu_has_feature(unsigned long feature)
 #endif
 
 #endif /* __ASSEMBLY__ */
-#endif /* __ASM_POWERPC_CPUFEATURE_H */
+#endif /* __ASM_POWERPC_CPU_HAS_FEATURE_H */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 1f6847b107e4..c2d509584a98 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -118,7 +118,9 @@ extern struct cpu_spec		*cur_cpu_spec;
 
 extern unsigned int __start___ftr_fixup, __stop___ftr_fixup;
 
+extern void set_cur_cpu_spec(struct cpu_spec *s);
 extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
+extern void identify_cpu_name(unsigned int pvr);
 extern void do_feature_fixups(unsigned long value, void *fixup_start,
 			      void *fixup_end);
 
diff --git a/arch/powerpc/include/asm/dt_cpu_ftrs.h b/arch/powerpc/include/asm/dt_cpu_ftrs.h
new file mode 100644
index 000000000000..7a34fc11bf63
--- /dev/null
+++ b/arch/powerpc/include/asm/dt_cpu_ftrs.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_POWERPC_DT_CPU_FTRS_H
+#define __ASM_POWERPC_DT_CPU_FTRS_H
+
+/*
+ *  Copyright 2017, IBM Corporation
+ *  cpufeatures is the new way to discover CPU features with /cpus/features
+ *  devicetree. This supersedes PVR based discovery ("cputable"), and older
+ *  device tree feature advertisement.
+ */
+
+#include <linux/types.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+#include <uapi/asm/cputable.h>
+
+#ifdef CONFIG_PPC_DT_CPU_FTRS
+bool dt_cpu_ftrs_init(void *fdt);
+void dt_cpu_ftrs_scan(void);
+bool dt_cpu_ftrs_in_use(void);
+#else
+static inline bool dt_cpu_ftrs_init(void *fdt) { return false; }
+static inline void dt_cpu_ftrs_scan(void) { }
+static inline bool dt_cpu_ftrs_in_use(void) { return false; }
+#endif
+
+#endif /* __ASM_POWERPC_DT_CPU_FTRS_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d4f653c9259a..7e50e47375d6 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1229,6 +1229,7 @@
 #define PVR_POWER8E	0x004B
 #define PVR_POWER8NVL	0x004C
 #define PVR_POWER8	0x004D
+#define PVR_POWER9	0x004E
 #define PVR_BE		0x0070
 #define PVR_PA6T	0x0090
 
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index f63c96cd3608..3e7ce86d5c13 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -47,4 +47,11 @@
 #define PPC_FEATURE2_ARCH_3_00		0x00800000 /* ISA 3.00 */
 #define PPC_FEATURE2_HAS_IEEE128	0x00400000 /* VSX IEEE Binary Float 128-bit */
 
+/*
+ * IMPORTANT!
+ * All future PPC_FEATURE definitions should be allocated in cooperation with
+ * OPAL / skiboot firmware, in accordance with the ibm,powerpc-cpu-features
+ * device tree binding.
+ */
+
 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b9db46ae545b..e132902e1f14 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_PPC_RTAS)		+= rtas.o rtas-rtc.o $(rtaspci-y-y)
 obj-$(CONFIG_PPC_RTAS_DAEMON)	+= rtasd.o
 obj-$(CONFIG_RTAS_FLASH)	+= rtas_flash.o
 obj-$(CONFIG_RTAS_PROC)		+= rtas-proc.o
+obj-$(CONFIG_PPC_DT_CPU_FTRS)	+= dt_cpu_ftrs.o
 obj-$(CONFIG_EEH)              += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
 				  eeh_driver.o eeh_event.o eeh_sysfs.o
 obj-$(CONFIG_GENERIC_TBSYNC)	+= smp-tbsync.o
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index e79b9daa873c..9b3e88b1a9c8 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -23,7 +23,9 @@
 #include <asm/mmu.h>
 #include <asm/setup.h>
 
-struct cpu_spec* cur_cpu_spec = NULL;
+static struct cpu_spec the_cpu_spec __read_mostly;
+
+struct cpu_spec* cur_cpu_spec __read_mostly = NULL;
 EXPORT_SYMBOL(cur_cpu_spec);
 
 /* The platform string corresponding to the real PVR */
@@ -2179,7 +2181,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
 #endif /* CONFIG_E500 */
 };
 
-static struct cpu_spec the_cpu_spec;
+void __init set_cur_cpu_spec(struct cpu_spec *s)
+{
+	struct cpu_spec *t = &the_cpu_spec;
+
+	t = PTRRELOC(t);
+	*t = *s;
+
+	*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+}
 
 static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
 					       struct cpu_spec *s)
@@ -2266,6 +2276,29 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
 	return NULL;
 }
 
+/*
+ * Used by cpufeatures to get the name for CPUs with a PVR table.
+ * If they don't hae a PVR table, cpufeatures gets the name from
+ * cpu device-tree node.
+ */
+void __init identify_cpu_name(unsigned int pvr)
+{
+	struct cpu_spec *s = cpu_specs;
+	struct cpu_spec *t = &the_cpu_spec;
+	int i;
+
+	s = PTRRELOC(s);
+	t = PTRRELOC(t);
+
+	for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
+		if ((pvr & s->pvr_mask) == s->pvr_value) {
+			t->cpu_name = s->cpu_name;
+			return;
+		}
+	}
+}
+
+
 #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
 struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = {
 			[0 ... NUM_CPU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
new file mode 100644
index 000000000000..fcc7588a96d6
--- /dev/null
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -0,0 +1,1031 @@
+/*
+ * Copyright 2017, Nicholas Piggin, IBM Corporation
+ * Licensed under GPLv2.
+ */
+
+#define pr_fmt(fmt) "dt-cpu-ftrs: " fmt
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/memblock.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+
+#include <asm/cputable.h>
+#include <asm/dt_cpu_ftrs.h>
+#include <asm/mmu.h>
+#include <asm/oprofile_impl.h>
+#include <asm/prom.h>
+#include <asm/setup.h>
+
+
+/* Device-tree visible constants follow */
+#define ISA_V2_07B      2070
+#define ISA_V3_0B       3000
+
+#define USABLE_PR               (1U << 0)
+#define USABLE_OS               (1U << 1)
+#define USABLE_HV               (1U << 2)
+
+#define HV_SUPPORT_HFSCR        (1U << 0)
+#define OS_SUPPORT_FSCR         (1U << 0)
+
+/* For parsing, we define all bits set as "NONE" case */
+#define HV_SUPPORT_NONE		0xffffffffU
+#define OS_SUPPORT_NONE		0xffffffffU
+
+struct dt_cpu_feature {
+	const char *name;
+	uint32_t isa;
+	uint32_t usable_privilege;
+	uint32_t hv_support;
+	uint32_t os_support;
+	uint32_t hfscr_bit_nr;
+	uint32_t fscr_bit_nr;
+	uint32_t hwcap_bit_nr;
+	/* fdt parsing */
+	unsigned long node;
+	int enabled;
+	int disabled;
+};
+
+#define CPU_FTRS_BASE \
+	   (CPU_FTR_USE_TB | \
+	    CPU_FTR_LWSYNC | \
+	    CPU_FTR_FPU_UNAVAILABLE |\
+	    CPU_FTR_NODSISRALIGN |\
+	    CPU_FTR_NOEXECUTE |\
+	    CPU_FTR_COHERENT_ICACHE | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS |\
+	    CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_DAWR | \
+	    CPU_FTR_ARCH_206 |\
+	    CPU_FTR_ARCH_207S)
+
+#define MMU_FTRS_HASH_BASE (MMU_FTRS_POWER8)
+
+#define COMMON_USER_BASE	(PPC_FEATURE_32 | PPC_FEATURE_64 | \
+				 PPC_FEATURE_ARCH_2_06 |\
+				 PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER2_BASE	(PPC_FEATURE2_ARCH_2_07 | \
+				 PPC_FEATURE2_ISEL)
+/*
+ * Set up the base CPU
+ */
+
+extern void __flush_tlb_power8(unsigned int action);
+extern void __flush_tlb_power9(unsigned int action);
+extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
+
+static int hv_mode;
+
+static struct {
+	u64	lpcr;
+	u64	hfscr;
+	u64	fscr;
+} system_registers;
+
+static void (*init_pmu_registers)(void);
+
+static void cpufeatures_flush_tlb(void)
+{
+	unsigned long rb;
+	unsigned int i, num_sets;
+
+	/*
+	 * This is a temporary measure to keep equivalent TLB flush as the
+	 * cputable based setup code.
+	 */
+	switch (PVR_VER(mfspr(SPRN_PVR))) {
+	case PVR_POWER8:
+	case PVR_POWER8E:
+	case PVR_POWER8NVL:
+		num_sets = POWER8_TLB_SETS;
+		break;
+	case PVR_POWER9:
+		num_sets = POWER9_TLB_SETS_HASH;
+		break;
+	default:
+		num_sets = 1;
+		pr_err("unknown CPU version for boot TLB flush\n");
+		break;
+	}
+
+	asm volatile("ptesync" : : : "memory");
+	rb = TLBIEL_INVAL_SET;
+	for (i = 0; i < num_sets; i++) {
+		asm volatile("tlbiel %0" : : "r" (rb));
+		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
+	}
+	asm volatile("ptesync" : : : "memory");
+}
+
+static void __restore_cpu_cpufeatures(void)
+{
+	/*
+	 * LPCR is restored by the power on engine already. It can be changed
+	 * after early init e.g., by radix enable, and we have no unified API
+	 * for saving and restoring such SPRs.
+	 *
+	 * This ->restore hook should really be removed from idle and register
+	 * restore moved directly into the idle restore code, because this code
+	 * doesn't know how idle is implemented or what it needs restored here.
+	 *
+	 * The best we can do to accommodate secondary boot and idle restore
+	 * for now is "or" LPCR with existing.
+	 */
+
+	mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR));
+	if (hv_mode) {
+		mtspr(SPRN_LPID, 0);
+		mtspr(SPRN_HFSCR, system_registers.hfscr);
+	}
+	mtspr(SPRN_FSCR, system_registers.fscr);
+
+	if (init_pmu_registers)
+		init_pmu_registers();
+
+	cpufeatures_flush_tlb();
+}
+
+static char dt_cpu_name[64];
+
+static struct cpu_spec __initdata base_cpu_spec = {
+	.cpu_name		= NULL,
+	.cpu_features		= CPU_FTRS_BASE,
+	.cpu_user_features	= COMMON_USER_BASE,
+	.cpu_user_features2	= COMMON_USER2_BASE,
+	.mmu_features		= 0,
+	.icache_bsize		= 32, /* minimum block size, fixed by */
+	.dcache_bsize		= 32, /* cache info init.             */
+	.num_pmcs		= 0,
+	.pmc_type		= PPC_PMC_DEFAULT,
+	.oprofile_cpu_type	= NULL,
+	.oprofile_type		= PPC_OPROFILE_INVALID,
+	.cpu_setup		= NULL,
+	.cpu_restore		= __restore_cpu_cpufeatures,
+	.flush_tlb		= NULL,
+	.machine_check_early	= NULL,
+	.platform		= NULL,
+};
+
+static void __init cpufeatures_setup_cpu(void)
+{
+	set_cur_cpu_spec(&base_cpu_spec);
+
+	cur_cpu_spec->pvr_mask = -1;
+	cur_cpu_spec->pvr_value = mfspr(SPRN_PVR);
+
+	/* Initialize the base environment -- clear FSCR/HFSCR.  */
+	hv_mode = !!(mfmsr() & MSR_HV);
+	if (hv_mode) {
+		/* CPU_FTR_HVMODE is used early in PACA setup */
+		cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+		mtspr(SPRN_HFSCR, 0);
+	}
+	mtspr(SPRN_FSCR, 0);
+
+	/*
+	 * LPCR does not get cleared, to match behaviour with secondaries
+	 * in __restore_cpu_cpufeatures. Once the idle code is fixed, this
+	 * could clear LPCR too.
+	 */
+}
+
+static int __init feat_try_enable_unknown(struct dt_cpu_feature *f)
+{
+	if (f->hv_support == HV_SUPPORT_NONE) {
+	} else if (f->hv_support & HV_SUPPORT_HFSCR) {
+		u64 hfscr = mfspr(SPRN_HFSCR);
+		hfscr |= 1UL << f->hfscr_bit_nr;
+		mtspr(SPRN_HFSCR, hfscr);
+	} else {
+		/* Does not have a known recipe */
+		return 0;
+	}
+
+	if (f->os_support == OS_SUPPORT_NONE) {
+	} else if (f->os_support & OS_SUPPORT_FSCR) {
+		u64 fscr = mfspr(SPRN_FSCR);
+		fscr |= 1UL << f->fscr_bit_nr;
+		mtspr(SPRN_FSCR, fscr);
+	} else {
+		/* Does not have a known recipe */
+		return 0;
+	}
+
+	if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) {
+		uint32_t word = f->hwcap_bit_nr / 32;
+		uint32_t bit = f->hwcap_bit_nr % 32;
+
+		if (word == 0)
+			cur_cpu_spec->cpu_user_features |= 1U << bit;
+		else if (word == 1)
+			cur_cpu_spec->cpu_user_features2 |= 1U << bit;
+		else
+			pr_err("%s could not advertise to user (no hwcap bits)\n", f->name);
+	}
+
+	return 1;
+}
+
+static int __init feat_enable(struct dt_cpu_feature *f)
+{
+	if (f->hv_support != HV_SUPPORT_NONE) {
+		if (f->hfscr_bit_nr != -1) {
+			u64 hfscr = mfspr(SPRN_HFSCR);
+			hfscr |= 1UL << f->hfscr_bit_nr;
+			mtspr(SPRN_HFSCR, hfscr);
+		}
+	}
+
+	if (f->os_support != OS_SUPPORT_NONE) {
+		if (f->fscr_bit_nr != -1) {
+			u64 fscr = mfspr(SPRN_FSCR);
+			fscr |= 1UL << f->fscr_bit_nr;
+			mtspr(SPRN_FSCR, fscr);
+		}
+	}
+
+	if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) {
+		uint32_t word = f->hwcap_bit_nr / 32;
+		uint32_t bit = f->hwcap_bit_nr % 32;
+
+		if (word == 0)
+			cur_cpu_spec->cpu_user_features |= 1U << bit;
+		else if (word == 1)
+			cur_cpu_spec->cpu_user_features2 |= 1U << bit;
+		else
+			pr_err("CPU feature: %s could not advertise to user (no hwcap bits)\n", f->name);
+	}
+
+	return 1;
+}
+
+static int __init feat_disable(struct dt_cpu_feature *f)
+{
+	return 0;
+}
+
+static int __init feat_enable_hv(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	if (!hv_mode) {
+		pr_err("CPU feature hypervisor present in device tree but HV mode not enabled in the CPU. Ignoring.\n");
+		return 0;
+	}
+
+	mtspr(SPRN_LPID, 0);
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &=  ~LPCR_LPES0; /* HV external interrupts */
+	mtspr(SPRN_LPCR, lpcr);
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+
+	return 1;
+}
+
+static int __init feat_enable_le(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_TRUE_LE;
+	return 1;
+}
+
+static int __init feat_enable_smt(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_SMT;
+	return 1;
+}
+
+static int __init feat_enable_idle_nap(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/* Set PECE wakeup modes for ISA 207 */
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |=  LPCR_PECE0;
+	lpcr |=  LPCR_PECE1;
+	lpcr |=  LPCR_PECE2;
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_align_dsisr(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_features &= ~CPU_FTR_NODSISRALIGN;
+
+	return 1;
+}
+
+static int __init feat_enable_idle_stop(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/* Set PECE wakeup modes for ISAv3.0B */
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |=  LPCR_PECE0;
+	lpcr |=  LPCR_PECE1;
+	lpcr |=  LPCR_PECE2;
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &= ~LPCR_ISL;
+
+	/* VRMASD */
+	lpcr |= LPCR_VPM0;
+	lpcr &= ~LPCR_VPM1;
+	lpcr |= 0x10UL << LPCR_VRMASD_SH; /* L=1 LP=00 */
+	mtspr(SPRN_LPCR, lpcr);
+
+	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+	return 1;
+}
+
+static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &= ~LPCR_ISL;
+	mtspr(SPRN_LPCR, lpcr);
+
+	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+	return 1;
+}
+
+
+static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_PPC_RADIX_MMU
+	cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_dscr(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	feat_enable(f);
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &= ~LPCR_DPFD;
+	lpcr |=  (4UL << LPCR_DPFD_SH);
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static void hfscr_pmu_enable(void)
+{
+	u64 hfscr = mfspr(SPRN_HFSCR);
+	hfscr |= PPC_BIT(60);
+	mtspr(SPRN_HFSCR, hfscr);
+}
+
+static void init_pmu_power8(void)
+{
+	if (hv_mode) {
+		mtspr(SPRN_MMCRC, 0);
+		mtspr(SPRN_MMCRH, 0);
+	}
+
+	mtspr(SPRN_MMCRA, 0);
+	mtspr(SPRN_MMCR0, 0);
+	mtspr(SPRN_MMCR1, 0);
+	mtspr(SPRN_MMCR2, 0);
+	mtspr(SPRN_MMCRS, 0);
+}
+
+static int __init feat_enable_mce_power8(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->platform = "power8";
+	cur_cpu_spec->flush_tlb = __flush_tlb_power8;
+	cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8;
+
+	return 1;
+}
+
+static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f)
+{
+	hfscr_pmu_enable();
+
+	init_pmu_power8();
+	init_pmu_registers = init_pmu_power8;
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+	if (pvr_version_is(PVR_POWER8E))
+		cur_cpu_spec->cpu_features |= CPU_FTR_PMAO_BUG;
+
+	cur_cpu_spec->num_pmcs		= 6;
+	cur_cpu_spec->pmc_type		= PPC_PMC_IBM;
+	cur_cpu_spec->oprofile_cpu_type	= "ppc64/power8";
+
+	return 1;
+}
+
+static void init_pmu_power9(void)
+{
+	if (hv_mode)
+		mtspr(SPRN_MMCRC, 0);
+
+	mtspr(SPRN_MMCRA, 0);
+	mtspr(SPRN_MMCR0, 0);
+	mtspr(SPRN_MMCR1, 0);
+	mtspr(SPRN_MMCR2, 0);
+}
+
+static int __init feat_enable_mce_power9(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->platform = "power9";
+	cur_cpu_spec->flush_tlb = __flush_tlb_power9;
+	cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9;
+
+	return 1;
+}
+
+static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f)
+{
+	hfscr_pmu_enable();
+
+	init_pmu_power9();
+	init_pmu_registers = init_pmu_power9;
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+
+	cur_cpu_spec->num_pmcs		= 6;
+	cur_cpu_spec->pmc_type		= PPC_PMC_IBM;
+	cur_cpu_spec->oprofile_cpu_type	= "ppc64/power9";
+
+	return 1;
+}
+
+static int __init feat_enable_tm(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	feat_enable(f);
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NOSC;
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_fp(struct dt_cpu_feature *f)
+{
+	feat_enable(f);
+	cur_cpu_spec->cpu_features &= ~CPU_FTR_FPU_UNAVAILABLE;
+
+	return 1;
+}
+
+static int __init feat_enable_vector(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_ALTIVEC
+	feat_enable(f);
+	cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
+	cur_cpu_spec->cpu_features |= CPU_FTR_VMX_COPY;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
+
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_vsx(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_VSX
+	feat_enable(f);
+	cur_cpu_spec->cpu_features |= CPU_FTR_VSX;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_VSX;
+
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_purr(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_features |= CPU_FTR_PURR | CPU_FTR_SPURR;
+
+	return 1;
+}
+
+static int __init feat_enable_ebb(struct dt_cpu_feature *f)
+{
+	/*
+	 * PPC_FEATURE2_EBB is enabled in PMU init code because it has
+	 * historically been related to the PMU facility. This may have
+	 * to be decoupled if EBB becomes more generic. For now, follow
+	 * existing convention.
+	 */
+	f->hwcap_bit_nr = -1;
+	feat_enable(f);
+
+	return 1;
+}
+
+static int __init feat_enable_dbell(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/* P9 has an HFSCR for privileged state */
+	feat_enable(f);
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_DBELL;
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |=  LPCR_PECEDH; /* hyp doorbell wakeup */
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_hvi(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/*
+	 * POWER9 XIVE interrupts including in OPAL XICS compatibility
+	 * are always delivered as hypervisor virtualization interrupts (HVI)
+	 * rather than EE.
+	 *
+	 * However LPES0 is not set here, in the chance that an EE does get
+	 * delivered to the host somehow, the EE handler would not expect it
+	 * to be delivered in LPES0 mode (e.g., using SRR[01]). This could
+	 * happen if there is a bug in interrupt controller code, or IC is
+	 * misconfigured in systemsim.
+	 */
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |= LPCR_HVICE;	/* enable hvi interrupts */
+	lpcr |= LPCR_HEIC;	/* disable ee interrupts when MSR_HV */
+	lpcr |= LPCR_PECE_HVEE; /* hvi can wake from stop */
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_large_ci(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->mmu_features |= MMU_FTR_CI_LARGE_PAGE;
+
+	return 1;
+}
+
+struct dt_cpu_feature_match {
+	const char *name;
+	int (*enable)(struct dt_cpu_feature *f);
+	u64 cpu_ftr_bit_mask;
+};
+
+static struct dt_cpu_feature_match __initdata
+		dt_cpu_feature_match_table[] = {
+	{"hypervisor", feat_enable_hv, 0},
+	{"big-endian", feat_enable, 0},
+	{"little-endian", feat_enable_le, CPU_FTR_REAL_LE},
+	{"smt", feat_enable_smt, 0},
+	{"interrupt-facilities", feat_enable, 0},
+	{"timer-facilities", feat_enable, 0},
+	{"timer-facilities-v3", feat_enable, 0},
+	{"debug-facilities", feat_enable, 0},
+	{"come-from-address-register", feat_enable, CPU_FTR_CFAR},
+	{"branch-tracing", feat_enable, 0},
+	{"floating-point", feat_enable_fp, 0},
+	{"vector", feat_enable_vector, 0},
+	{"vector-scalar", feat_enable_vsx, 0},
+	{"vector-scalar-v3", feat_enable, 0},
+	{"decimal-floating-point", feat_enable, 0},
+	{"decimal-integer", feat_enable, 0},
+	{"quadword-load-store", feat_enable, 0},
+	{"vector-crypto", feat_enable, 0},
+	{"mmu-hash", feat_enable_mmu_hash, 0},
+	{"mmu-radix", feat_enable_mmu_radix, 0},
+	{"mmu-hash-v3", feat_enable_mmu_hash_v3, 0},
+	{"virtual-page-class-key-protection", feat_enable, 0},
+	{"transactional-memory", feat_enable_tm, CPU_FTR_TM},
+	{"transactional-memory-v3", feat_enable_tm, 0},
+	{"idle-nap", feat_enable_idle_nap, 0},
+	{"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
+	{"idle-stop", feat_enable_idle_stop, 0},
+	{"machine-check-power8", feat_enable_mce_power8, 0},
+	{"performance-monitor-power8", feat_enable_pmu_power8, 0},
+	{"data-stream-control-register", feat_enable_dscr, CPU_FTR_DSCR},
+	{"event-based-branch", feat_enable_ebb, 0},
+	{"target-address-register", feat_enable, 0},
+	{"branch-history-rolling-buffer", feat_enable, 0},
+	{"control-register", feat_enable, CPU_FTR_CTRL},
+	{"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL},
+	{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
+	{"processor-utilization-of-resources-register", feat_enable_purr, 0},
+	{"subcore", feat_enable, CPU_FTR_SUBCORE},
+	{"no-execute", feat_enable, 0},
+	{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
+	{"cache-inhibited-large-page", feat_enable_large_ci, 0},
+	{"coprocessor-icswx", feat_enable, CPU_FTR_ICSWX},
+	{"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
+	{"program-priority-register", feat_enable, CPU_FTR_HAS_PPR},
+	{"wait", feat_enable, 0},
+	{"atomic-memory-operations", feat_enable, 0},
+	{"branch-v3", feat_enable, 0},
+	{"copy-paste", feat_enable, 0},
+	{"decimal-floating-point-v3", feat_enable, 0},
+	{"decimal-integer-v3", feat_enable, 0},
+	{"fixed-point-v3", feat_enable, 0},
+	{"floating-point-v3", feat_enable, 0},
+	{"group-start-register", feat_enable, 0},
+	{"pc-relative-addressing", feat_enable, 0},
+	{"machine-check-power9", feat_enable_mce_power9, 0},
+	{"performance-monitor-power9", feat_enable_pmu_power9, 0},
+	{"event-based-branch-v3", feat_enable, 0},
+	{"random-number-generator", feat_enable, 0},
+	{"system-call-vectored", feat_disable, 0},
+	{"trace-interrupt-v3", feat_enable, 0},
+	{"vector-v3", feat_enable, 0},
+	{"vector-binary128", feat_enable, 0},
+	{"vector-binary16", feat_enable, 0},
+	{"wait-v3", feat_enable, 0},
+};
+
+/* XXX: how to configure this? Default + boot time? */
+#ifdef CONFIG_PPC_CPUFEATURES_ENABLE_UNKNOWN
+#define CPU_FEATURE_ENABLE_UNKNOWN 1
+#else
+#define CPU_FEATURE_ENABLE_UNKNOWN 0
+#endif
+
+static void __init cpufeatures_setup_start(u32 isa)
+{
+	pr_info("setup for ISA %d\n", isa);
+
+	if (isa >= 3000) {
+		cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300;
+		cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00;
+	}
+}
+
+static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
+{
+	const struct dt_cpu_feature_match *m;
+	bool known = false;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dt_cpu_feature_match_table); i++) {
+		m = &dt_cpu_feature_match_table[i];
+		if (!strcmp(f->name, m->name)) {
+			known = true;
+			if (m->enable(f))
+				break;
+
+			pr_info("not enabling: %s (disabled or unsupported by kernel)\n",
+				f->name);
+			return false;
+		}
+	}
+
+	if (!known && CPU_FEATURE_ENABLE_UNKNOWN) {
+		if (!feat_try_enable_unknown(f)) {
+			pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
+				f->name);
+			return false;
+		}
+	}
+
+	if (m->cpu_ftr_bit_mask)
+		cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
+
+	if (known)
+		pr_debug("enabling: %s\n", f->name);
+	else
+		pr_debug("enabling: %s (unknown)\n", f->name);
+
+	return true;
+}
+
+static __init void cpufeatures_cpu_quirks(void)
+{
+	int version = mfspr(SPRN_PVR);
+
+	/*
+	 * Not all quirks can be derived from the cpufeatures device tree.
+	 */
+	if ((version & 0xffffff00) == 0x004e0100)
+		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+}
+
+static void __init cpufeatures_setup_finished(void)
+{
+	cpufeatures_cpu_quirks();
+
+	if (hv_mode && !(cur_cpu_spec->cpu_features & CPU_FTR_HVMODE)) {
+		pr_err("hypervisor not present in device tree but HV mode is enabled in the CPU. Enabling.\n");
+		cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+	}
+
+	system_registers.lpcr = mfspr(SPRN_LPCR);
+	system_registers.hfscr = mfspr(SPRN_HFSCR);
+	system_registers.fscr = mfspr(SPRN_FSCR);
+
+	cpufeatures_flush_tlb();
+
+	pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
+		cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
+}
+
+static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
+					int depth, void *data)
+{
+	if (of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features")
+	    && of_get_flat_dt_prop(node, "isa", NULL))
+		return 1;
+
+	return 0;
+}
+
+static bool __initdata using_dt_cpu_ftrs = false;
+
+bool __init dt_cpu_ftrs_in_use(void)
+{
+	return using_dt_cpu_ftrs;
+}
+
+bool __init dt_cpu_ftrs_init(void *fdt)
+{
+	/* Setup and verify the FDT, if it fails we just bail */
+	if (!early_init_dt_verify(fdt))
+		return false;
+
+	if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
+		return false;
+
+	cpufeatures_setup_cpu();
+
+	using_dt_cpu_ftrs = true;
+	return true;
+}
+
+static int nr_dt_cpu_features;
+static struct dt_cpu_feature *dt_cpu_features;
+
+static int __init process_cpufeatures_node(unsigned long node,
+					  const char *uname, int i)
+{
+	const __be32 *prop;
+	struct dt_cpu_feature *f;
+	int len;
+
+	f = &dt_cpu_features[i];
+	memset(f, 0, sizeof(struct dt_cpu_feature));
+
+	f->node = node;
+
+	f->name = uname;
+
+	prop = of_get_flat_dt_prop(node, "isa", &len);
+	if (!prop) {
+		pr_warn("%s: missing isa property\n", uname);
+		return 0;
+	}
+	f->isa = be32_to_cpup(prop);
+
+	prop = of_get_flat_dt_prop(node, "usable-privilege", &len);
+	if (!prop) {
+		pr_warn("%s: missing usable-privilege property", uname);
+		return 0;
+	}
+	f->usable_privilege = be32_to_cpup(prop);
+
+	prop = of_get_flat_dt_prop(node, "hv-support", &len);
+	if (prop)
+		f->hv_support = be32_to_cpup(prop);
+	else
+		f->hv_support = HV_SUPPORT_NONE;
+
+	prop = of_get_flat_dt_prop(node, "os-support", &len);
+	if (prop)
+		f->os_support = be32_to_cpup(prop);
+	else
+		f->os_support = OS_SUPPORT_NONE;
+
+	prop = of_get_flat_dt_prop(node, "hfscr-bit-nr", &len);
+	if (prop)
+		f->hfscr_bit_nr = be32_to_cpup(prop);
+	else
+		f->hfscr_bit_nr = -1;
+	prop = of_get_flat_dt_prop(node, "fscr-bit-nr", &len);
+	if (prop)
+		f->fscr_bit_nr = be32_to_cpup(prop);
+	else
+		f->fscr_bit_nr = -1;
+	prop = of_get_flat_dt_prop(node, "hwcap-bit-nr", &len);
+	if (prop)
+		f->hwcap_bit_nr = be32_to_cpup(prop);
+	else
+		f->hwcap_bit_nr = -1;
+
+	if (f->usable_privilege & USABLE_HV) {
+		if (!(mfmsr() & MSR_HV)) {
+			pr_warn("%s: HV feature passed to guest\n", uname);
+			return 0;
+		}
+
+		if (f->hv_support == HV_SUPPORT_NONE && f->hfscr_bit_nr != -1) {
+			pr_warn("%s: unwanted hfscr_bit_nr\n", uname);
+			return 0;
+		}
+
+		if (f->hv_support == HV_SUPPORT_HFSCR) {
+			if (f->hfscr_bit_nr == -1) {
+				pr_warn("%s: missing hfscr_bit_nr\n", uname);
+				return 0;
+			}
+		}
+	} else {
+		if (f->hv_support != HV_SUPPORT_NONE || f->hfscr_bit_nr != -1) {
+			pr_warn("%s: unwanted hv_support/hfscr_bit_nr\n", uname);
+			return 0;
+		}
+	}
+
+	if (f->usable_privilege & USABLE_OS) {
+		if (f->os_support == OS_SUPPORT_NONE && f->fscr_bit_nr != -1) {
+			pr_warn("%s: unwanted fscr_bit_nr\n", uname);
+			return 0;
+		}
+
+		if (f->os_support == OS_SUPPORT_FSCR) {
+			if (f->fscr_bit_nr == -1) {
+				pr_warn("%s: missing fscr_bit_nr\n", uname);
+				return 0;
+			}
+		}
+	} else {
+		if (f->os_support != OS_SUPPORT_NONE || f->fscr_bit_nr != -1) {
+			pr_warn("%s: unwanted os_support/fscr_bit_nr\n", uname);
+			return 0;
+		}
+	}
+
+	if (!(f->usable_privilege & USABLE_PR)) {
+		if (f->hwcap_bit_nr != -1) {
+			pr_warn("%s: unwanted hwcap_bit_nr\n", uname);
+			return 0;
+		}
+	}
+
+	/* Do all the independent features in the first pass */
+	if (!of_get_flat_dt_prop(node, "dependencies", &len)) {
+		if (cpufeatures_process_feature(f))
+			f->enabled = 1;
+		else
+			f->disabled = 1;
+	}
+
+	return 0;
+}
+
+static void __init cpufeatures_deps_enable(struct dt_cpu_feature *f)
+{
+	const __be32 *prop;
+	int len;
+	int nr_deps;
+	int i;
+
+	if (f->enabled || f->disabled)
+		return;
+
+	prop = of_get_flat_dt_prop(f->node, "dependencies", &len);
+	if (!prop) {
+		pr_warn("%s: missing dependencies property", f->name);
+		return;
+	}
+
+	nr_deps = len / sizeof(int);
+
+	for (i = 0; i < nr_deps; i++) {
+		unsigned long phandle = be32_to_cpu(prop[i]);
+		int j;
+
+		for (j = 0; j < nr_dt_cpu_features; j++) {
+			struct dt_cpu_feature *d = &dt_cpu_features[j];
+
+			if (of_get_flat_dt_phandle(d->node) == phandle) {
+				cpufeatures_deps_enable(d);
+				if (d->disabled) {
+					f->disabled = 1;
+					return;
+				}
+			}
+		}
+	}
+
+	if (cpufeatures_process_feature(f))
+		f->enabled = 1;
+	else
+		f->disabled = 1;
+}
+
+static int __init scan_cpufeatures_subnodes(unsigned long node,
+					  const char *uname,
+					  void *data)
+{
+	int *count = data;
+
+	process_cpufeatures_node(node, uname, *count);
+
+	(*count)++;
+
+	return 0;
+}
+
+static int __init count_cpufeatures_subnodes(unsigned long node,
+					  const char *uname,
+					  void *data)
+{
+	int *count = data;
+
+	(*count)++;
+
+	return 0;
+}
+
+static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
+					    *uname, int depth, void *data)
+{
+	const __be32 *prop;
+	int count, i;
+	u32 isa;
+
+	/* We are scanning "ibm,powerpc-cpu-features" nodes only */
+	if (!of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features"))
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "isa", NULL);
+	if (!prop)
+		/* We checked before, "can't happen" */
+		return 0;
+
+	isa = be32_to_cpup(prop);
+
+	/* Count and allocate space for cpu features */
+	of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
+						&nr_dt_cpu_features);
+	dt_cpu_features = __va(
+		memblock_alloc(sizeof(struct dt_cpu_feature)*
+				nr_dt_cpu_features, PAGE_SIZE));
+
+	cpufeatures_setup_start(isa);
+
+	/* Scan nodes into dt_cpu_features and enable those without deps  */
+	count = 0;
+	of_scan_flat_dt_subnodes(node, scan_cpufeatures_subnodes, &count);
+
+	/* Recursive enable remaining features with dependencies */
+	for (i = 0; i < nr_dt_cpu_features; i++) {
+		struct dt_cpu_feature *f = &dt_cpu_features[i];
+
+		cpufeatures_deps_enable(f);
+	}
+
+	prop = of_get_flat_dt_prop(node, "display-name", NULL);
+	if (prop && strlen((char *)prop) != 0) {
+		strlcpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name));
+		cur_cpu_spec->cpu_name = dt_cpu_name;
+	}
+
+	cpufeatures_setup_finished();
+
+	memblock_free(__pa(dt_cpu_features),
+			sizeof(struct dt_cpu_feature)*nr_dt_cpu_features);
+
+	return 0;
+}
+
+void __init dt_cpu_ftrs_scan(void)
+{
+	of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL);
+}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index d2f0afeae5a0..40c4887c27b6 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -57,6 +57,7 @@
 #include <asm/fadump.h>
 #include <asm/epapr_hcalls.h>
 #include <asm/firmware.h>
+#include <asm/dt_cpu_ftrs.h>
 
 #include <mm/mmu_decl.h>
 
@@ -375,23 +376,31 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	 * A POWER6 partition in "POWER6 architected" mode
 	 * uses the 0x0f000002 PVR value; in POWER5+ mode
 	 * it uses 0x0f000001.
+	 *
+	 * If we're using device tree CPU feature discovery then we don't
+	 * support the cpu-version property, and it's the responsibility of the
+	 * firmware/hypervisor to provide the correct feature set for the
+	 * architecture level via the ibm,powerpc-cpu-features binding.
 	 */
-	prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
-	if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
-		identify_cpu(0, be32_to_cpup(prop));
+	if (!dt_cpu_ftrs_in_use()) {
+		prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
+		if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
+			identify_cpu(0, be32_to_cpup(prop));
 
-	identical_pvr_fixup(node);
+		check_cpu_feature_properties(node);
+		check_cpu_pa_features(node);
+	}
 
-	check_cpu_feature_properties(node);
-	check_cpu_pa_features(node);
+	identical_pvr_fixup(node);
 	init_mmu_slb_size(node);
 
 #ifdef CONFIG_PPC64
-	if (nthreads > 1)
-		cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
-	else
+	if (nthreads == 1)
 		cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
+	else if (!dt_cpu_ftrs_in_use())
+		cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
 #endif
+
 	return 0;
 }
 
@@ -721,6 +730,8 @@ void __init early_init_devtree(void *params)
 
 	DBG("Scanning CPUs ...\n");
 
+	dt_cpu_ftrs_scan();
+
 	/* Retrieve CPU related informations from the flat tree
 	 * (altivec support, boot CPU ID, ...)
 	 */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 0f7b15860a06..1bf8978ec8da 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -49,6 +49,7 @@
 #include <asm/paca.h>
 #include <asm/time.h>
 #include <asm/cputable.h>
+#include <asm/dt_cpu_ftrs.h>
 #include <asm/sections.h>
 #include <asm/btext.h>
 #include <asm/nvram.h>
@@ -265,8 +266,10 @@ void __init early_setup(unsigned long dt_ptr)
 
 	/* -------- printk is _NOT_ safe to use here ! ------- */
 
-	/* Identify CPU type */
-	identify_cpu(0, mfspr(SPRN_PVR));
+	/* Try new device tree based feature discovery ... */
+	if (!dt_cpu_ftrs_init(__va(dt_ptr)))
+		/* Otherwise use the old style CPU table */
+		identify_cpu(0, mfspr(SPRN_PVR));
 
 	/* Assume we're on cpu 0 for now. Don't write to the paca yet! */
 	initialise_paca(&boot_paca, 0);
@@ -532,6 +535,9 @@ void __init initialize_cache_info(void)
 	dcache_bsize = ppc64_caches.l1d.block_size;
 	icache_bsize = ppc64_caches.l1i.block_size;
 
+	cur_cpu_spec->dcache_bsize = dcache_bsize;
+	cur_cpu_spec->icache_bsize = icache_bsize;
+
 	DBG(" <- initialize_cache_info()\n");
 }
 
-- 
cgit v1.2.3


From d45b897b11eaf9847c4b9bc99a91279041b1bbf7 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 6 Mar 2017 20:31:21 +0200
Subject: virtio_net: allow specifying context for rx

With mergeable buffers we never use s/g for rx,
so allow specifying context in that case.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 71f447ab440e..108f923f8a69 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2044,6 +2044,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	int ret = -ENOMEM;
 	int i, total_vqs;
 	const char **names;
+	bool *ctx;
 
 	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
 	 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
@@ -2062,6 +2063,13 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
 	if (!names)
 		goto err_names;
+	if (vi->mergeable_rx_bufs) {
+		ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
+		if (!ctx)
+			goto err_ctx;
+	} else {
+		ctx = NULL;
+	}
 
 	/* Parameters for control virtqueue, if any */
 	if (vi->has_cvq) {
@@ -2077,9 +2085,12 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 		sprintf(vi->sq[i].name, "output.%d", i);
 		names[rxq2vq(i)] = vi->rq[i].name;
 		names[txq2vq(i)] = vi->sq[i].name;
+		if (ctx)
+			ctx[rxq2vq(i)] = true;
 	}
 
-	ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, callbacks, names, NULL);
+	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
+					 names, ctx, NULL);
 	if (ret)
 		goto err_find;
 
@@ -2101,6 +2112,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	return 0;
 
 err_find:
+	kfree(ctx);
+err_ctx:
 	kfree(names);
 err_names:
 	kfree(callbacks);
-- 
cgit v1.2.3


From 680557cf79f82623e2c4fd42733077d60a843513 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 6 Mar 2017 21:29:47 +0200
Subject: virtio_net: rework mergeable buffer handling

Use the new _ctx virtio API to maintain true length for each buffer.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 89 +++++++++++++++++++++++-------------------------
 1 file changed, 43 insertions(+), 46 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 108f923f8a69..2170a5caf68d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -250,24 +250,6 @@ static void skb_xmit_done(struct virtqueue *vq)
 	netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
-static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
-{
-	unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1);
-	return (truesize + 1) * MERGEABLE_BUFFER_ALIGN;
-}
-
-static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx)
-{
-	return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN);
-
-}
-
-static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize)
-{
-	unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN;
-	return (unsigned long)buf | (size - 1);
-}
-
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 				   struct receive_queue *rq,
@@ -511,15 +493,13 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
 
 	while (--*num_buf) {
 		unsigned int buflen;
-		unsigned long ctx;
 		void *buf;
 		int off;
 
-		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen);
-		if (unlikely(!ctx))
+		buf = virtqueue_get_buf(rq->vq, &buflen);
+		if (unlikely(!buf))
 			goto err_buf;
 
-		buf = mergeable_ctx_to_buf_address(ctx);
 		p = virt_to_head_page(buf);
 		off = buf - page_address(p);
 
@@ -548,10 +528,10 @@ err_buf:
 static struct sk_buff *receive_mergeable(struct net_device *dev,
 					 struct virtnet_info *vi,
 					 struct receive_queue *rq,
-					 unsigned long ctx,
+					 void *buf,
+					 void *ctx,
 					 unsigned int len)
 {
-	void *buf = mergeable_ctx_to_buf_address(ctx);
 	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
 	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
 	struct page *page = virt_to_head_page(buf);
@@ -639,7 +619,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	}
 	rcu_read_unlock();
 
-	truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
+	if (unlikely(len > (unsigned long)ctx)) {
+		pr_debug("%s: rx error: len %u exceeds truesize 0x%lu\n",
+			 dev->name, len, (unsigned long)ctx);
+		dev->stats.rx_length_errors++;
+		goto err_skb;
+	}
+	truesize = (unsigned long)ctx;
 	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
 	curr_skb = head_skb;
 
@@ -648,7 +634,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	while (--num_buf) {
 		int num_skb_frags;
 
-		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
+		buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
 		if (unlikely(!ctx)) {
 			pr_debug("%s: rx error: %d buffers out of %d missing\n",
 				 dev->name, num_buf,
@@ -658,8 +644,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			goto err_buf;
 		}
 
-		buf = mergeable_ctx_to_buf_address(ctx);
 		page = virt_to_head_page(buf);
+		if (unlikely(len > (unsigned long)ctx)) {
+			pr_debug("%s: rx error: len %u exceeds truesize 0x%lu\n",
+				 dev->name, len, (unsigned long)ctx);
+			dev->stats.rx_length_errors++;
+			goto err_skb;
+		}
+		truesize = (unsigned long)ctx;
 
 		num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
 		if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
@@ -675,7 +667,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			head_skb->truesize += nskb->truesize;
 			num_skb_frags = 0;
 		}
-		truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
 		if (curr_skb != head_skb) {
 			head_skb->data_len += len;
 			head_skb->len += len;
@@ -700,14 +691,14 @@ err_xdp:
 err_skb:
 	put_page(page);
 	while (--num_buf) {
-		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
-		if (unlikely(!ctx)) {
+		buf = virtqueue_get_buf(rq->vq, &len);
+		if (unlikely(!buf)) {
 			pr_debug("%s: rx error: %d buffers missing\n",
 				 dev->name, num_buf);
 			dev->stats.rx_length_errors++;
 			break;
 		}
-		page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx));
+		page = virt_to_head_page(buf);
 		put_page(page);
 	}
 err_buf:
@@ -718,7 +709,7 @@ xdp_xmit:
 }
 
 static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
-		       void *buf, unsigned int len)
+		       void *buf, unsigned int len, void **ctx)
 {
 	struct net_device *dev = vi->dev;
 	struct sk_buff *skb;
@@ -729,9 +720,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
 		pr_debug("%s: short packet %i\n", dev->name, len);
 		dev->stats.rx_length_errors++;
 		if (vi->mergeable_rx_bufs) {
-			unsigned long ctx = (unsigned long)buf;
-			void *base = mergeable_ctx_to_buf_address(ctx);
-			put_page(virt_to_head_page(base));
+			put_page(virt_to_head_page(buf));
 		} else if (vi->big_packets) {
 			give_pages(rq, buf);
 		} else {
@@ -741,7 +730,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
 	}
 
 	if (vi->mergeable_rx_bufs)
-		skb = receive_mergeable(dev, vi, rq, (unsigned long)buf, len);
+		skb = receive_mergeable(dev, vi, rq, buf, ctx, len);
 	else if (vi->big_packets)
 		skb = receive_big(dev, vi, rq, buf, len);
 	else
@@ -869,7 +858,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 	struct page_frag *alloc_frag = &rq->alloc_frag;
 	unsigned int headroom = virtnet_get_headroom(vi);
 	char *buf;
-	unsigned long ctx;
+	void *ctx;
 	int err;
 	unsigned int len, hole;
 
@@ -879,7 +868,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 
 	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
 	buf += headroom; /* advance address leaving hole at front of pkt */
-	ctx = mergeable_buf_to_ctx(buf, len);
+	ctx = (void *)(unsigned long)len;
 	get_page(alloc_frag->page);
 	alloc_frag->offset += len + headroom;
 	hole = alloc_frag->size - alloc_frag->offset;
@@ -894,7 +883,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 	}
 
 	sg_init_one(rq->sg, buf, len);
-	err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
+	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
 	if (err < 0)
 		put_page(virt_to_head_page(buf));
 
@@ -988,10 +977,20 @@ static int virtnet_receive(struct receive_queue *rq, int budget)
 	void *buf;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
-	while (received < budget &&
-	       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
-		bytes += receive_buf(vi, rq, buf, len);
-		received++;
+	if (vi->mergeable_rx_bufs) {
+		void *ctx;
+
+		while (received < budget &&
+		       (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
+			bytes += receive_buf(vi, rq, buf, len, ctx);
+			received++;
+		}
+	} else {
+		while (received < budget &&
+		       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+			bytes += receive_buf(vi, rq, buf, len, NULL);
+			received++;
+		}
 	}
 
 	if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
@@ -2014,9 +2013,7 @@ static void free_unused_bufs(struct virtnet_info *vi)
 
 		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
 			if (vi->mergeable_rx_bufs) {
-				unsigned long ctx = (unsigned long)buf;
-				void *base = mergeable_ctx_to_buf_address(ctx);
-				put_page(virt_to_head_page(base));
+				put_page(virt_to_head_page(buf));
 			} else if (vi->big_packets) {
 				give_pages(&vi->rq[i], buf);
 			} else {
-- 
cgit v1.2.3


From e377fcc8486d40867c6c217077ad0fa40977e060 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 6 Mar 2017 22:21:35 +0200
Subject: virtio_net: reduce alignment for buffers

We don't need to align length to any particular
value anymore. Aligning to L1 cache size probably
sill makes sense to reduce false sharing.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 2170a5caf68d..6e47e653a05d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -53,17 +53,6 @@ module_param(gso, bool, 0444);
  */
 DECLARE_EWMA(pkt_len, 0, 64)
 
-/* With mergeable buffers we align buffer address and use the low bits to
- * encode its true size. Buffer size is up to 1 page so we need to align to
- * square root of page size to ensure we reserve enough bits to encode the true
- * size.
- */
-#define MERGEABLE_BUFFER_MIN_ALIGN_SHIFT ((PAGE_SHIFT + 1) / 2)
-
-/* Minimum alignment for mergeable packet buffers. */
-#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, \
-				   1 << MERGEABLE_BUFFER_MIN_ALIGN_SHIFT)
-
 #define VIRTNET_DRIVER_VERSION "1.0.0"
 
 struct virtnet_stats {
@@ -849,7 +838,7 @@ static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len)
 
 	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
 			GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
-	return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
+	return ALIGN(len, L1_CACHE_BYTES);
 }
 
 static int add_recvbuf_mergeable(struct virtnet_info *vi,
-- 
cgit v1.2.3


From d85b758f72b05a774045545f24d70980e3e9aac4 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 9 Mar 2017 02:21:21 +0200
Subject: virtio_net: fix support for small rings

When ring size is small (<32 entries) making buffers smaller means a
full ring might not be able to hold enough buffers to fit a single large
packet.

Make sure a ring full of buffers is large enough to allow at least one
packet of max size.

Fixes: 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6e47e653a05d..f9381e1f365f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/average.h>
+#include <net/route.h>
 
 static int napi_weight = NAPI_POLL_WEIGHT;
 module_param(napi_weight, int, 0444);
@@ -98,6 +99,9 @@ struct receive_queue {
 	/* RX: fragments + linear part + virtio header */
 	struct scatterlist sg[MAX_SKB_FRAGS + 2];
 
+	/* Min single buffer size for mergeable buffers case. */
+	unsigned int min_buf_len;
+
 	/* Name of this receive queue: input.$index */
 	char name[40];
 };
@@ -831,13 +835,14 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 	return err;
 }
 
-static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len)
+static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
+					  struct ewma_pkt_len *avg_pkt_len)
 {
 	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	unsigned int len;
 
 	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
-			GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
+				rq->min_buf_len - hdr_len, PAGE_SIZE - hdr_len);
 	return ALIGN(len, L1_CACHE_BYTES);
 }
 
@@ -851,7 +856,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 	int err;
 	unsigned int len, hole;
 
-	len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
+	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
 	if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
 		return -ENOMEM;
 
@@ -2023,6 +2028,21 @@ static void virtnet_del_vqs(struct virtnet_info *vi)
 	virtnet_free_queues(vi);
 }
 
+/* How large should a single buffer be so a queue full of these can fit at
+ * least one full packet?
+ * Logic below assumes the mergeable buffer header is used.
+ */
+static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
+{
+	const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	unsigned int rq_size = virtqueue_get_vring_size(vq);
+	unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
+	unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
+	unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
+
+	return max(min_buf_len, hdr_len);
+}
+
 static int virtnet_find_vqs(struct virtnet_info *vi)
 {
 	vq_callback_t **callbacks;
@@ -2088,6 +2108,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		vi->rq[i].vq = vqs[rxq2vq(i)];
+		vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
 		vi->sq[i].vq = vqs[txq2vq(i)];
 	}
 
@@ -2174,7 +2195,8 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
 
 	BUG_ON(queue_index >= vi->max_queue_pairs);
 	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
-	return sprintf(buf, "%u\n", get_mergeable_buf_len(avg));
+	return sprintf(buf, "%u\n",
+		       get_mergeable_buf_len(&vi->rq[queue_index], avg));
 }
 
 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
-- 
cgit v1.2.3


From 5f24df0945b3ce3be71602c2655f689d42a24181 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 29 Mar 2017 23:12:23 +0300
Subject: virtio_net: don't reset twice on XDP on/off

We already do a reset once in remove_vq_common -
there appears to be no point in doing another one
when we add/remove XDP.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f9381e1f365f..fd5145bdfb77 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1747,7 +1747,6 @@ static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
 	virtnet_freeze_down(dev);
 	_remove_vq_common(vi);
 
-	dev->config->reset(dev);
 	virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
 	virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
 
-- 
cgit v1.2.3


From 9ea762a5ae45235eb3e5ec9c05c33cf37db78d70 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Thu, 30 Mar 2017 13:13:33 +0200
Subject: virtio: virtio_driver doc

Add comments for the virtio_driver members that were not documented.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index ed04753278d4..28b0e965360f 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -165,9 +165,13 @@ int virtio_device_restore(struct virtio_device *dev);
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @scan: optional function to call after successful probe; intended
+ *    for virtio-scsi to invoke a scan.
  * @remove: the function to call when a device is removed.
  * @config_changed: optional function to call when the device configuration
  *    changes; may be called in interrupt context.
+ * @freeze: optional function to call during suspend/hibernation.
+ * @restore: optional function to call on resume.
  */
 struct virtio_driver {
 	struct device_driver driver;
-- 
cgit v1.2.3


From fb9de9704775d6190c204f4ddf8da4cfdac26be1 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 7 Apr 2017 08:25:09 +0300
Subject: ptr_ring: batch ring zeroing

A known weakness in ptr_ring design is that it does not handle well the
situation when ring is almost full: as entries are consumed they are
immediately used again by the producer, so consumer and producer are
writing to a shared cache line.

To fix this, add batching to consume calls: as entries are
consumed do not write NULL into the ring until we get
a multiple (in current implementation 2x) of cache lines
away from the producer. At that point, write them all out.

We do the write out in the reverse order to keep
producer from sharing cache with consumer for as long
as possible.

Writeout also triggers when ring wraps around - there's
no special reason to do this but it helps keep the code
a bit simpler.

What should we do if getting away from producer by 2 cache lines
would mean we are keeping the ring moe than half empty?
Maybe we should reduce the batching in this case,
current patch simply reduces the batching.

Notes:
- it is no longer true that a call to consume guarantees
  that the following call to produce will succeed.
  No users seem to assume that.
- batching can also in theory reduce the signalling rate:
  users that would previously send interrups to the producer
  to wake it up after consuming each entry would now only
  need to do this once in a batch.
  Doing this would be easy by returning a flag to the caller.
  No users seem to do signalling on consume yet so this was not
  implemented yet.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 include/linux/ptr_ring.h | 63 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6c70444da3b9..6b2e0dd88569 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -34,11 +34,13 @@
 struct ptr_ring {
 	int producer ____cacheline_aligned_in_smp;
 	spinlock_t producer_lock;
-	int consumer ____cacheline_aligned_in_smp;
+	int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
+	int consumer_tail; /* next entry to invalidate */
 	spinlock_t consumer_lock;
 	/* Shared consumer/producer data */
 	/* Read-only by both the producer and the consumer */
 	int size ____cacheline_aligned_in_smp; /* max entries in queue */
+	int batch; /* number of entries to consume in a batch */
 	void **queue;
 };
 
@@ -170,7 +172,7 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
 static inline void *__ptr_ring_peek(struct ptr_ring *r)
 {
 	if (likely(r->size))
-		return r->queue[r->consumer];
+		return r->queue[r->consumer_head];
 	return NULL;
 }
 
@@ -231,9 +233,38 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
 /* Must only be called after __ptr_ring_peek returned !NULL */
 static inline void __ptr_ring_discard_one(struct ptr_ring *r)
 {
-	r->queue[r->consumer++] = NULL;
-	if (unlikely(r->consumer >= r->size))
-		r->consumer = 0;
+	/* Fundamentally, what we want to do is update consumer
+	 * index and zero out the entry so producer can reuse it.
+	 * Doing it naively at each consume would be as simple as:
+	 *       r->queue[r->consumer++] = NULL;
+	 *       if (unlikely(r->consumer >= r->size))
+	 *               r->consumer = 0;
+	 * but that is suboptimal when the ring is full as producer is writing
+	 * out new entries in the same cache line.  Defer these updates until a
+	 * batch of entries has been consumed.
+	 */
+	int head = r->consumer_head++;
+
+	/* Once we have processed enough entries invalidate them in
+	 * the ring all at once so producer can reuse their space in the ring.
+	 * We also do this when we reach end of the ring - not mandatory
+	 * but helps keep the implementation simple.
+	 */
+	if (unlikely(r->consumer_head - r->consumer_tail >= r->batch ||
+		     r->consumer_head >= r->size)) {
+		/* Zero out entries in the reverse order: this way we touch the
+		 * cache line that producer might currently be reading the last;
+		 * producer won't make progress and touch other cache lines
+		 * besides the first one until we write out all entries.
+		 */
+		while (likely(head >= r->consumer_tail))
+			r->queue[head--] = NULL;
+		r->consumer_tail = r->consumer_head;
+	}
+	if (unlikely(r->consumer_head >= r->size)) {
+		r->consumer_head = 0;
+		r->consumer_tail = 0;
+	}
 }
 
 static inline void *__ptr_ring_consume(struct ptr_ring *r)
@@ -345,14 +376,27 @@ static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp)
 	return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp);
 }
 
+static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
+{
+	r->size = size;
+	r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
+	/* We need to set batch at least to 1 to make logic
+	 * in __ptr_ring_discard_one work correctly.
+	 * Batching too much (because ring is small) would cause a lot of
+	 * burstiness. Needs tuning, for now disable batching.
+	 */
+	if (r->batch > r->size / 2 || !r->batch)
+		r->batch = 1;
+}
+
 static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
 {
 	r->queue = __ptr_ring_init_queue_alloc(size, gfp);
 	if (!r->queue)
 		return -ENOMEM;
 
-	r->size = size;
-	r->producer = r->consumer = 0;
+	__ptr_ring_set_size(r, size);
+	r->producer = r->consumer_head = r->consumer_tail = 0;
 	spin_lock_init(&r->producer_lock);
 	spin_lock_init(&r->consumer_lock);
 
@@ -373,9 +417,10 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
 		else if (destroy)
 			destroy(ptr);
 
-	r->size = size;
+	__ptr_ring_set_size(r, size);
 	r->producer = producer;
-	r->consumer = 0;
+	r->consumer_head = 0;
+	r->consumer_tail = 0;
 	old = r->queue;
 	r->queue = queue;
 
-- 
cgit v1.2.3


From a49795054a0b2034ac50a2a894864fdb353addb5 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 7 Apr 2017 08:44:23 +0300
Subject: ringtest: support test specific parameters

Add a new flag for passing test-specific parameters.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/ringtest/main.c | 13 +++++++++++++
 tools/virtio/ringtest/main.h |  2 ++
 2 files changed, 15 insertions(+)

diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
index 2801ab7547e2..453ca3c21193 100644
--- a/tools/virtio/ringtest/main.c
+++ b/tools/virtio/ringtest/main.c
@@ -20,6 +20,7 @@
 int runcycles = 10000000;
 int max_outstanding = INT_MAX;
 int batch = 1;
+int param = 0;
 
 bool do_sleep = false;
 bool do_relax = false;
@@ -246,6 +247,11 @@ static const struct option longopts[] = {
 		.has_arg = required_argument,
 		.val = 'b',
 	},
+	{
+		.name = "param",
+		.has_arg = required_argument,
+		.val = 'p',
+	},
 	{
 		.name = "sleep",
 		.has_arg = no_argument,
@@ -274,6 +280,7 @@ static void help(void)
 		" [--run-cycles C (default: %d)]"
 		" [--batch b]"
 		" [--outstanding o]"
+		" [--param p]"
 		" [--sleep]"
 		" [--relax]"
 		" [--exit]"
@@ -328,6 +335,12 @@ int main(int argc, char **argv)
 			assert(c > 0 && c < INT_MAX);
 			max_outstanding = c;
 			break;
+		case 'p':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			param = c;
+			break;
 		case 'b':
 			c = strtol(optarg, &endptr, 0);
 			assert(!*endptr);
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index 14142faf040b..90b0133004e1 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -10,6 +10,8 @@
 
 #include <stdbool.h>
 
+extern int param;
+
 extern bool do_exit;
 
 #if defined(__x86_64__) || defined(__i386__)
-- 
cgit v1.2.3


From 3008a206209fba670ab1da5590694a616b705efb Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 7 Apr 2017 08:45:32 +0300
Subject: ptr_ring: support testing different batching sizes

Use the param flag for that.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/ringtest/ptr_ring.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index 635b07b4fdd3..7b22f1b20652 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -97,6 +97,9 @@ void alloc_ring(void)
 {
 	int ret = ptr_ring_init(&array, ring_size, 0);
 	assert(!ret);
+	/* Hacky way to poke at ring internals. Useful for testing though. */
+	if (param)
+		array.batch = param;
 }
 
 /* guest side */
-- 
cgit v1.2.3


From 56da5fd04e3d51d0d876c685b0107ca357b9fd9d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 6 Apr 2017 12:04:47 +0300
Subject: virtio_net: tidy a couple debug statements

We are printing a decimal value for truesize so we shouldn't use an "0x"
prefix.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fd5145bdfb77..87b5c2046341 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -613,7 +613,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	rcu_read_unlock();
 
 	if (unlikely(len > (unsigned long)ctx)) {
-		pr_debug("%s: rx error: len %u exceeds truesize 0x%lu\n",
+		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
 			 dev->name, len, (unsigned long)ctx);
 		dev->stats.rx_length_errors++;
 		goto err_skb;
@@ -639,7 +639,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 
 		page = virt_to_head_page(buf);
 		if (unlikely(len > (unsigned long)ctx)) {
-			pr_debug("%s: rx error: len %u exceeds truesize 0x%lu\n",
+			pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
 				 dev->name, len, (unsigned long)ctx);
 			dev->stats.rx_length_errors++;
 			goto err_skb;
-- 
cgit v1.2.3


From 96fb20c343be46afca37d6c501d4cf13d58e110a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 29 Apr 2017 23:14:21 +0100
Subject: tools/virtio: fix spelling mistake: "wakeus" -> "wakeups"

trivial fix to spelling mistake in an error message.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 tools/virtio/virtio_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 76d6f583c249..0fecaec90d0d 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -202,7 +202,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq,
 	test = 0;
 	r = ioctl(dev->control, VHOST_TEST_RUN, &test);
 	assert(r >= 0);
-	fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious);
+	fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious);
 }
 
 const char optstring[] = "h";
-- 
cgit v1.2.3


From c8b0d7290657996a29f318b948d2397d30e70c36 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 2 Mar 2017 17:08:23 +0100
Subject: s390/virtio: change maintainership

Halil is doing a lot more work in the virtio area on s390 than I
do. Let's reflect the reality in the maintainers file.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 38d3e4ed7208..a1f6e1954804 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13324,8 +13324,8 @@ F:	include/uapi/linux/virtio_*.h
 F:	drivers/crypto/virtio/
 
 VIRTIO DRIVERS FOR S390
-M:	Christian Borntraeger <borntraeger@de.ibm.com>
 M:	Cornelia Huck <cornelia.huck@de.ibm.com>
+M:	Halil Pasic <pasic@linux.vnet.ibm.com>
 L:	linux-s390@vger.kernel.org
 L:	virtualization@lists.linux-foundation.org
 L:	kvm@vger.kernel.org
-- 
cgit v1.2.3


From 505a9249c23988768983aaa0766ce54e0dbe037d Mon Sep 17 00:00:00 2001
From: Kamal Heib <kamalh@mellanox.com>
Date: Tue, 9 May 2017 14:45:22 +0300
Subject: net/mlx4_en: Change the error print to debug print

The error print within mlx4_en_calc_rx_buf() should be a debug print.

Fixes: 51151a16a60f ('mlx4: allow order-0 memory allocations in RX path')
Signed-off-by: Kamal Heib <kamalh@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index aa074e57ce06..77abd1813047 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -997,7 +997,8 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
 	en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d num_frags:%d):\n",
 	       eff_mtu, priv->num_frags);
 	for (i = 0; i < priv->num_frags; i++) {
-		en_err(priv,
+		en_dbg(DRV,
+		       priv,
 		       "  frag:%d - size:%d stride:%d\n",
 		       i,
 		       priv->frag_info[i].frag_size,
-- 
cgit v1.2.3


From 89c557687a32c294e9d25670a96e9287c09f2d5f Mon Sep 17 00:00:00 2001
From: Talat Batheesh <talatb@mellanox.com>
Date: Tue, 9 May 2017 14:45:23 +0300
Subject: net/mlx4_en: Avoid adding steering rules with invalid ring

Inserting steering rules with illegal ring is an invalid operation,
block it.

Fixes: 820672812f82 ('net/mlx4_en: Manage flow steering rules with ethtool')
Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ffbcb27c05e5..ae5fdc2df654 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1562,6 +1562,11 @@ static int mlx4_en_flow_replace(struct net_device *dev,
 		qpn = priv->drop_qp.qpn;
 	else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) {
 		qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1);
+		if (qpn < priv->rss_map.base_qpn ||
+		    qpn >= priv->rss_map.base_qpn + priv->rx_ring_num) {
+			en_warn(priv, "rxnfc: QP (0x%x) doesn't exist\n", qpn);
+			return -EINVAL;
+		}
 	} else {
 		if (cmd->fs.ring_cookie >= priv->rx_ring_num) {
 			en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist\n",
-- 
cgit v1.2.3


From 83bd5118a119cabba47e194bba96641e303aadff Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 9 May 2017 14:45:24 +0300
Subject: net/mlx4_core: Reduce harmless SRIOV error message to debug level

Under SRIOV resource management, extra counters are allocated to VFs
from a free pool. If that pool is empty, the ALLOC_RES command for
a counter resource fails -- and this generates a misleading error
message in the message log.

Under SRIOV, each VF is allocated (i.e., guaranteed) 2 counters --
one counter per port. For ETH ports, the RoCE driver requests an
additional counter (above the guaranteed counters). If that request
fails, the VF RoCE driver simply uses the default (i.e., guaranteed)
counter for that port.

Thus, failing to allocate an additional counter does not constitute
a  problem, and the error message on the PF when this occurs should
be reduced to debug level.

Finally, to identify the situation that the reason for the failure is
that no resources are available to grant to the VF, we modified the
error returned by mlx4_grant_resource to -EDQUOT (Quota exceeded),
which more accurately describes the error.

Fixes: c3abb51bdb0e ("IB/mlx4: Add RoCE/IB dedicated counters")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c              | 14 +++++++++++---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c |  2 +-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 0e0fa7030565..c1af47e45d3f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1789,9 +1789,17 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
 	}
 
 	if (err) {
-		if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR))
-			mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n",
-				  vhcr->op, slave, vhcr->errno, err);
+		if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) {
+			if (vhcr->op == MLX4_CMD_ALLOC_RES &&
+			    (vhcr->in_modifier & 0xff) == RES_COUNTER &&
+			    err == -EDQUOT)
+				mlx4_dbg(dev,
+					 "Unable to allocate counter for slave %d (%d)\n",
+					 slave, err);
+			else
+				mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n",
+					  vhcr->op, slave, vhcr->errno, err);
+		}
 		vhcr_cmd->status = mlx4_errno_to_status(err);
 		goto out_status;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 4aa29ee93013..07516545474f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -311,7 +311,7 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct resource_allocator *res_alloc =
 		&priv->mfunc.master.res_tracker.res_alloc[res_type];
-	int err = -EINVAL;
+	int err = -EDQUOT;
 	int allocated, free, reserved, guaranteed, from_free;
 	int from_rsvd;
 
-- 
cgit v1.2.3


From 92c43eb41669d8edbbfa3db45e89d2edcc55e2de Mon Sep 17 00:00:00 2001
From: Suddarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Date: Tue, 9 May 2017 15:07:47 +0300
Subject: qede: Fix XDP memory leak on unload

When (re|un)loading, Tx-queues belonging to XDP would not get freed.

Fixes: cb6aeb079294 ("qede: Add support for XDP_TX")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index b9ba23d71c61..263fd28648ea 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1313,6 +1313,9 @@ static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 	if (fp->type & QEDE_FASTPATH_RX)
 		qede_free_mem_rxq(edev, fp->rxq);
 
+	if (fp->type & QEDE_FASTPATH_XDP)
+		qede_free_mem_txq(edev, fp->xdp_tx);
+
 	if (fp->type & QEDE_FASTPATH_TX)
 		qede_free_mem_txq(edev, fp->txq);
 }
-- 
cgit v1.2.3


From 5f027d7a48aee110e5f66ef5f9e2d90d214cd06b Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 9 May 2017 15:07:48 +0300
Subject: qed: Fix VF removal sequence

After previos changes in HW-stop scheme, VFs stopped sending CLOSE
messages to their PFs when they unload.

Fixes: 1226337ad98f ("qed: Correct HW stop flow")
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index b7ad36b91e12..0cbbd59e34a2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1093,10 +1093,12 @@ static int qed_slowpath_stop(struct qed_dev *cdev)
 		qed_free_stream_mem(cdev);
 		if (IS_QED_ETH_IF(cdev))
 			qed_sriov_disable(cdev, true);
+	}
+
+	qed_nic_stop(cdev);
 
-		qed_nic_stop(cdev);
+	if (IS_PF(cdev))
 		qed_slowpath_irq_free(cdev);
-	}
 
 	qed_disable_msix(cdev);
 
-- 
cgit v1.2.3


From c9f0523bb3d1e70fbfd3245842de855096194925 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 9 May 2017 15:07:49 +0300
Subject: qed: Tell QM the number of tasks

Driver doesn't pass the number of tasks to the QM init logic
which would cause back-pressure in scenarios requiring many tasks
[E.g., using max MRs] and thus reduced performance.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_cxt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index b3aaa985956e..694845793af2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -1460,6 +1460,7 @@ void qed_qm_init_pf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	params.is_first_pf = p_hwfn->first_on_engine;
 	params.num_pf_cids = iids.cids;
 	params.num_vf_cids = iids.vf_cids;
+	params.num_tids = iids.tids;
 	params.start_pq = qm_info->start_pq;
 	params.num_pf_pqs = qm_info->num_pqs - qm_info->num_vf_pqs;
 	params.num_vf_pqs = qm_info->num_vf_pqs;
-- 
cgit v1.2.3


From a82dadbce47395747824971db08a128130786fdc Mon Sep 17 00:00:00 2001
From: Ram Amrani <Ram.Amrani@cavium.com>
Date: Tue, 9 May 2017 15:07:50 +0300
Subject: qed: Correct doorbell configuration for !4Kb pages

When configuring the doorbell DPI address, driver aligns the start
address to 4KB [HW-pages] instead of host PAGE_SIZE.
As a result, RoCE applications might receive addresses which are
unaligned to pages [when PAGE_SIZE > 4KB], which is a security risk.

Fixes: 51ff17251c9c ("qed: Add support for RoCE hw init")
Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index bb70522ad362..463927f17032 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1370,7 +1370,7 @@ qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 						   NULL) +
 		       qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
 						   NULL);
-	norm_regsize = roundup(QED_PF_DEMS_SIZE * non_pwm_conn, 4096);
+	norm_regsize = roundup(QED_PF_DEMS_SIZE * non_pwm_conn, PAGE_SIZE);
 	min_addr_reg1 = norm_regsize / 4096;
 	pwm_regsize = db_bar_size - norm_regsize;
 
-- 
cgit v1.2.3


From be47c5555778fa3354950731023deb034a9e445e Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 9 May 2017 15:07:51 +0300
Subject: qede: Split PF/VF ndos.

PFs and VFs share the same structure of NDOs today,
and the VFs explicitly fails the ndo_xdp() callback stating
it doesn't support XDP.

This results in lots of:

  [qede_xdp:1032(enp131s2)]VFs don't support XDP
  ------------[ cut here ]------------
  WARNING: CPU: 4 PID: 1426 at net/core/rtnetlink.c:1637 rtnl_dump_ifinfo+0x354/0x3c0
  ...
  Call Trace:
    ? __alloc_skb+0x9b/0x1d0
    netlink_dump+0x122/0x290
    netlink_recvmsg+0x27d/0x430
    sock_recvmsg+0x3d/0x50
  ...

As every dump request for the VF interface info would fail due to
rtnl_xdp_fill() returning an error code.

To resolve this, introduce a subset of the NDOs meant for the VF
in a seperate structure and register that one instead for VFs,
and omit the ndo_xdp initialization.

Fixes: 40b8c45492ef ("qede: Prevent VFs from using XDP")
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_filter.c |  5 -----
 drivers/net/ethernet/qlogic/qede/qede_main.c   | 22 +++++++++++++++++++++-
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index eb5652073ca8..333876c19d7d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -1028,11 +1028,6 @@ int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp)
 {
 	struct qede_dev *edev = netdev_priv(dev);
 
-	if (IS_VF(edev)) {
-		DP_NOTICE(edev, "VFs don't support XDP\n");
-		return -EOPNOTSUPP;
-	}
-
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 		return qede_xdp_set(edev, xdp->prog);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 263fd28648ea..38b77bbfe4ee 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -563,6 +563,23 @@ static const struct net_device_ops qede_netdev_ops = {
 #endif
 };
 
+static const struct net_device_ops qede_netdev_vf_ops = {
+	.ndo_open = qede_open,
+	.ndo_stop = qede_close,
+	.ndo_start_xmit = qede_start_xmit,
+	.ndo_set_rx_mode = qede_set_rx_mode,
+	.ndo_set_mac_address = qede_set_mac_addr,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_change_mtu = qede_change_mtu,
+	.ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
+	.ndo_set_features = qede_set_features,
+	.ndo_get_stats64 = qede_get_stats64,
+	.ndo_udp_tunnel_add = qede_udp_tunnel_add,
+	.ndo_udp_tunnel_del = qede_udp_tunnel_del,
+	.ndo_features_check = qede_features_check,
+};
+
 /* -------------------------------------------------------------------------
  * START OF PROBE / REMOVE
  * -------------------------------------------------------------------------
@@ -622,7 +639,10 @@ static void qede_init_ndev(struct qede_dev *edev)
 
 	ndev->watchdog_timeo = TX_TIMEOUT;
 
-	ndev->netdev_ops = &qede_netdev_ops;
+	if (IS_VF(edev))
+		ndev->netdev_ops = &qede_netdev_vf_ops;
+	else
+		ndev->netdev_ops = &qede_netdev_ops;
 
 	qede_set_ethtool_ops(ndev);
 
-- 
cgit v1.2.3


From cf1e22891bee39f50e058bee0827086fd75a8717 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 8 May 2017 12:33:53 -0700
Subject: device-dax: kill NR_DEV_DAX

There is no point to ask how many device-dax instances the kernel should
support. Since we are already using a dynamic major number, just allow
the max number of minors by default and be done. This also fixes the
fact that the proposed max for the NR_DEV_DAX range was larger than what
could be supported by alloc_chrdev_region().

Fixes: ba09c01d2fa8 ("dax: convert to the cdev api")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/Kconfig |  5 -----
 drivers/dax/super.c | 11 +++--------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index 6b7e20eae16c..b79aa8f7a497 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -28,9 +28,4 @@ config DEV_DAX_PMEM
 
 	  Say Y if unsure
 
-config NR_DEV_DAX
-	int "Maximum number of Device-DAX instances"
-	default 32768
-	range 256 2147483647
-
 endif
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e8998d15f3cd..ebf43f531ada 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -21,10 +21,6 @@
 #include <linux/dax.h>
 #include <linux/fs.h>
 
-static int nr_dax = CONFIG_NR_DEV_DAX;
-module_param(nr_dax, int, S_IRUGO);
-MODULE_PARM_DESC(nr_dax, "max number of dax device instances");
-
 static dev_t dax_devt;
 DEFINE_STATIC_SRCU(dax_srcu);
 static struct vfsmount *dax_mnt;
@@ -331,7 +327,7 @@ struct dax_device *alloc_dax(void *private, const char *__host,
 	if (__host && !host)
 		return NULL;
 
-	minor = ida_simple_get(&dax_minor_ida, 0, nr_dax, GFP_KERNEL);
+	minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
 	if (minor < 0)
 		goto err_minor;
 
@@ -475,8 +471,7 @@ static int __init dax_fs_init(void)
 	if (rc)
 		return rc;
 
-	nr_dax = max(nr_dax, 256);
-	rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax");
+	rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax");
 	if (rc)
 		__dax_fs_exit();
 	return rc;
@@ -484,7 +479,7 @@ static int __init dax_fs_init(void)
 
 static void __exit dax_fs_exit(void)
 {
-	unregister_chrdev_region(dax_devt, nr_dax);
+	unregister_chrdev_region(dax_devt, MINORMASK+1);
 	ida_destroy(&dax_minor_ida);
 	__dax_fs_exit();
 }
-- 
cgit v1.2.3


From 81cddd65b5c82758ea5571a25e31ff6f1f89ff02 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 3 May 2017 16:37:45 +0100
Subject: arm64: traps: fix userspace cache maintenance emulation on a tagged
 pointer

When we emulate userspace cache maintenance in the kernel, we can
currently send the task a SIGSEGV even though the maintenance was done
on a valid address. This happens if the address has a non-zero address
tag, and happens to not be mapped in.

When we get the address from a user register, we don't currently remove
the address tag before performing cache maintenance on it. If the
maintenance faults, we end up in either __do_page_fault, where find_vma
can't find the VMA if the address has a tag, or in do_translation_fault,
where the tagged address will appear to be above TASK_SIZE. In both
cases, the address is not mapped in, and the task is sent a SIGSEGV.

This patch removes the tag from the address before using it. With this
patch, the fault is handled correctly, the address gets mapped in, and
the cache maintenance succeeds.

As a second bug, if cache maintenance (correctly) fails on an invalid
tagged address, the address gets passed into arm64_notify_segfault,
where find_vma fails to find the VMA due to the tag, and the wrong
si_code may be sent as part of the siginfo_t of the segfault. With this
patch, the correct si_code is sent.

Fixes: 7dd01aef0557 ("arm64: trap userspace "dc cvau" cache operation on errata-affected core")
Cc: <stable@vger.kernel.org> # 4.8.x-
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/traps.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index d4d6ae02cd55..0805b44f986a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -443,7 +443,7 @@ int cpu_enable_cache_maint_trap(void *__unused)
 }
 
 #define __user_cache_maint(insn, address, res)			\
-	if (untagged_addr(address) >= user_addr_max()) {	\
+	if (address >= user_addr_max()) {			\
 		res = -EFAULT;					\
 	} else {						\
 		uaccess_ttbr0_enable();				\
@@ -469,7 +469,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 	int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
 	int ret = 0;
 
-	address = pt_regs_read_reg(regs, rt);
+	address = untagged_addr(pt_regs_read_reg(regs, rt));
 
 	switch (crm) {
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAU:	/* DC CVAU, gets promoted */
-- 
cgit v1.2.3


From 7dcd9dd8cebe9fa626af7e2358d03a37041a70fb Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 3 May 2017 16:37:46 +0100
Subject: arm64: hw_breakpoint: fix watchpoint matching for tagged pointers

When we take a watchpoint exception, the address that triggered the
watchpoint is found in FAR_EL1. We compare it to the address of each
configured watchpoint to see which one was hit.

The configured watchpoint addresses are untagged, while the address in
FAR_EL1 will have an address tag if the data access was done using a
tagged address. The tag needs to be removed to compare the address to
the watchpoints.

Currently we don't remove it, and as a result can report the wrong
watchpoint as being hit (specifically, always either the highest TTBR0
watchpoint or lowest TTBR1 watchpoint). This patch removes the tag.

Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0")
Cc: <stable@vger.kernel.org> # 3.12.x-
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/uaccess.h  | 6 +++---
 arch/arm64/kernel/hw_breakpoint.c | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 5308d696311b..0221029e27ff 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -106,9 +106,9 @@ static inline void set_fs(mm_segment_t fs)
 })
 
 /*
- * When dealing with data aborts or instruction traps we may end up with
- * a tagged userland pointer. Clear the tag to get a sane pointer to pass
- * on to access_ok(), for instance.
+ * When dealing with data aborts, watchpoints, or instruction traps we may end
+ * up with a tagged userland pointer. Clear the tag to get a sane pointer to
+ * pass on to access_ok(), for instance.
  */
 #define untagged_addr(addr)		sign_extend64(addr, 55)
 
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 0296e7924240..749f81779420 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -36,6 +36,7 @@
 #include <asm/traps.h>
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
+#include <asm/uaccess.h>
 
 /* Breakpoint currently in use for each BRP. */
 static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
@@ -721,6 +722,8 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
 	u64 wp_low, wp_high;
 	u32 lens, lene;
 
+	addr = untagged_addr(addr);
+
 	lens = __ffs(ctrl->len);
 	lene = __fls(ctrl->len);
 
-- 
cgit v1.2.3


From 276e93279a630657fff4b086ba14c95955912dfa Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 3 May 2017 16:37:47 +0100
Subject: arm64: entry: improve data abort handling of tagged pointers

When handling a data abort from EL0, we currently zero the top byte of
the faulting address, as we assume the address is a TTBR0 address, which
may contain a non-zero address tag. However, the address may be a TTBR1
address, in which case we should not zero the top byte. This patch fixes
that. The effect is that the full TTBR1 address is passed to the task's
signal handler (or printed out in the kernel log).

When handling a data abort from EL1, we leave the faulting address
intact, as we assume it's either a TTBR1 address or a TTBR0 address with
tag 0x00. This is true as far as I'm aware, we don't seem to access a
tagged TTBR0 address anywhere in the kernel. Regardless, it's easy to
forget about address tags, and code added in the future may not always
remember to remove tags from addresses before accessing them. So add tag
handling to the EL1 data abort handler as well. This also makes it
consistent with the EL0 data abort handler.

Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0")
Cc: <stable@vger.kernel.org> # 3.12.x-
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/asm-uaccess.h | 9 +++++++++
 arch/arm64/kernel/entry.S            | 5 +++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index df411f3e083c..ecd9788cd298 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -62,4 +62,13 @@ alternative_if ARM64_ALT_PAN_NOT_UAO
 alternative_else_nop_endif
 	.endm
 
+/*
+ * Remove the address tag from a virtual address, if present.
+ */
+	.macro	clear_address_tag, dst, addr
+	tst	\addr, #(1 << 55)
+	bic	\dst, \addr, #(0xff << 56)
+	csel	\dst, \dst, \addr, eq
+	.endm
+
 #endif
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 43512d4d7df2..b738880350f9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -428,12 +428,13 @@ el1_da:
 	/*
 	 * Data abort handling
 	 */
-	mrs	x0, far_el1
+	mrs	x3, far_el1
 	enable_dbg
 	// re-enable interrupts if they were enabled in the aborted context
 	tbnz	x23, #7, 1f			// PSR_I_BIT
 	enable_irq
 1:
+	clear_address_tag x0, x3
 	mov	x2, sp				// struct pt_regs
 	bl	do_mem_abort
 
@@ -594,7 +595,7 @@ el0_da:
 	// enable interrupts before calling the main handler
 	enable_dbg_and_irq
 	ct_user_exit
-	bic	x0, x26, #(0xff << 56)
+	clear_address_tag x0, x26
 	mov	x1, x25
 	mov	x2, sp
 	bl	do_mem_abort
-- 
cgit v1.2.3


From f0e421b1bf7af97f026e1bb8bfe4c5a7a8c08f42 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 3 May 2017 16:37:48 +0100
Subject: arm64: documentation: document tagged pointer stack constraints

Some kernel features don't currently work if a task puts a non-zero
address tag in its stack pointer, frame pointer, or frame record entries
(FP, LR).

For example, with a tagged stack pointer, the kernel can't deliver
signals to the process, and the task is killed instead. As another
example, with a tagged frame pointer or frame records, perf fails to
generate call graphs or resolve symbols.

For now, just document these limitations, instead of finding and fixing
everything that doesn't work, as it's not known if anyone needs to use
tags in these places anyway.

In addition, as requested by Dave Martin, generalize the limitations
into a general kernel address tag policy, and refactor
tagged-pointers.txt to include it.

Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0")
Cc: <stable@vger.kernel.org> # 3.12.x-
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/tagged-pointers.txt | 62 +++++++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt
index d9995f1f51b3..a25a99e82bb1 100644
--- a/Documentation/arm64/tagged-pointers.txt
+++ b/Documentation/arm64/tagged-pointers.txt
@@ -11,24 +11,56 @@ in AArch64 Linux.
 The kernel configures the translation tables so that translations made
 via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
 the virtual address ignored by the translation hardware. This frees up
-this byte for application use, with the following caveats:
+this byte for application use.
 
-	(1) The kernel requires that all user addresses passed to EL1
-	    are tagged with tag 0x00. This means that any syscall
-	    parameters containing user virtual addresses *must* have
-	    their top byte cleared before trapping to the kernel.
 
-	(2) Non-zero tags are not preserved when delivering signals.
-	    This means that signal handlers in applications making use
-	    of tags cannot rely on the tag information for user virtual
-	    addresses being maintained for fields inside siginfo_t.
-	    One exception to this rule is for signals raised in response
-	    to watchpoint debug exceptions, where the tag information
-	    will be preserved.
+Passing tagged addresses to the kernel
+--------------------------------------
 
-	(3) Special care should be taken when using tagged pointers,
-	    since it is likely that C compilers will not hazard two
-	    virtual addresses differing only in the upper byte.
+All interpretation of userspace memory addresses by the kernel assumes
+an address tag of 0x00.
+
+This includes, but is not limited to, addresses found in:
+
+ - pointer arguments to system calls, including pointers in structures
+   passed to system calls,
+
+ - the stack pointer (sp), e.g. when interpreting it to deliver a
+   signal,
+
+ - the frame pointer (x29) and frame records, e.g. when interpreting
+   them to generate a backtrace or call graph.
+
+Using non-zero address tags in any of these locations may result in an
+error code being returned, a (fatal) signal being raised, or other modes
+of failure.
+
+For these reasons, passing non-zero address tags to the kernel via
+system calls is forbidden, and using a non-zero address tag for sp is
+strongly discouraged.
+
+Programs maintaining a frame pointer and frame records that use non-zero
+address tags may suffer impaired or inaccurate debug and profiling
+visibility.
+
+
+Preserving tags
+---------------
+
+Non-zero tags are not preserved when delivering signals. This means that
+signal handlers in applications making use of tags cannot rely on the
+tag information for user virtual addresses being maintained for fields
+inside siginfo_t. One exception to this rule is for signals raised in
+response to watchpoint debug exceptions, where the tag information will
+be preserved.
 
 The architecture prevents the use of a tagged PC, so the upper byte will
 be set to a sign-extension of bit 55 on exception return.
+
+
+Other considerations
+--------------------
+
+Special care should be taken when using tagged pointers, since it is
+likely that C compilers will not hazard two virtual addresses differing
+only in the upper byte.
-- 
cgit v1.2.3


From fee960bed5e857eb126c4e56dd9ff85938356579 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:33 +0100
Subject: arm64: xchg: hazard against entire exchange variable

The inline assembly in __XCHG_CASE() uses a +Q constraint to hazard
against other accesses to the memory location being exchanged. However,
the pointer passed to the constraint is a u8 pointer, and thus the
hazard only applies to the first byte of the location.

GCC can take advantage of this, assuming that other portions of the
location are unchanged, as demonstrated with the following test case:

union u {
	unsigned long l;
	unsigned int i[2];
};

unsigned long update_char_hazard(union u *u)
{
	unsigned int a, b;

	a = u->i[1];
	asm ("str %1, %0" : "+Q" (*(char *)&u->l) : "r" (0UL));
	b = u->i[1];

	return a ^ b;
}

unsigned long update_long_hazard(union u *u)
{
	unsigned int a, b;

	a = u->i[1];
	asm ("str %1, %0" : "+Q" (*(long *)&u->l) : "r" (0UL));
	b = u->i[1];

	return a ^ b;
}

The linaro 15.08 GCC 5.1.1 toolchain compiles the above as follows when
using -O2 or above:

0000000000000000 <update_char_hazard>:
   0:	d2800001 	mov	x1, #0x0                   	// #0
   4:	f9000001 	str	x1, [x0]
   8:	d2800000 	mov	x0, #0x0                   	// #0
   c:	d65f03c0 	ret

0000000000000010 <update_long_hazard>:
  10:	b9400401 	ldr	w1, [x0,#4]
  14:	d2800002 	mov	x2, #0x0                   	// #0
  18:	f9000002 	str	x2, [x0]
  1c:	b9400400 	ldr	w0, [x0,#4]
  20:	4a000020 	eor	w0, w1, w0
  24:	d65f03c0 	ret

This patch fixes the issue by passing an unsigned long pointer into the
+Q constraint, as we do for our cmpxchg code. This may hazard against
more than is necessary, but this is better than missing a necessary
hazard.

Fixes: 305d454aaa29 ("arm64: atomics: implement native {relaxed, acquire, release} atomics")
Cc: <stable@vger.kernel.org> # 4.4.x-
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cmpxchg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 91b26d26af8a..ae852add053d 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -46,7 +46,7 @@ static inline unsigned long __xchg_case_##name(unsigned long x,		\
 	"	swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n"	\
 		__nops(3)						\
 	"	" #nop_lse)						\
-	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)			\
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr)	\
 	: "r" (x)							\
 	: cl);								\
 									\
-- 
cgit v1.2.3


From 994870bead4ab19087a79492400a5478e2906196 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:34 +0100
Subject: arm64: ensure extension of smp_store_release value

When an inline assembly operand's type is narrower than the register it
is allocated to, the least significant bits of the register (up to the
operand type's width) are valid, and any other bits are permitted to
contain any arbitrary value. This aligns with the AAPCS64 parameter
passing rules.

Our __smp_store_release() implementation does not account for this, and
implicitly assumes that operands have been zero-extended to the width of
the type being stored to. Thus, we may store unknown values to memory
when the value type is narrower than the pointer type (e.g. when storing
a char to a long).

This patch fixes the issue by casting the value operand to the same
width as the pointer operand in all cases, which ensures that the value
is zero-extended as we expect. We use the same union trickery as
__smp_load_acquire and {READ,WRITE}_ONCE() to avoid GCC complaining that
pointers are potentially cast to narrower width integers in unreachable
paths.

A whitespace issue at the top of __smp_store_release() is also
corrected.

No changes are necessary for __smp_load_acquire(). Load instructions
implicitly clear any upper bits of the register, and the compiler will
only consider the least significant bits of the register as valid
regardless.

Fixes: 47933ad41a86 ("arch: Introduce smp_load_acquire(), smp_store_release()")
Fixes: 878a84d5a8a1 ("arm64: add missing data types in smp_load_acquire/smp_store_release")
Cc: <stable@vger.kernel.org> # 3.14.x-
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/barrier.h | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 4e0497f581a0..0fe7e43b7fbc 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -42,25 +42,35 @@
 #define __smp_rmb()	dmb(ishld)
 #define __smp_wmb()	dmb(ishst)
 
-#define __smp_store_release(p, v)						\
+#define __smp_store_release(p, v)					\
 do {									\
+	union { typeof(*p) __val; char __c[1]; } __u =			\
+		{ .__val = (__force typeof(*p)) (v) }; 			\
 	compiletime_assert_atomic_type(*p);				\
 	switch (sizeof(*p)) {						\
 	case 1:								\
 		asm volatile ("stlrb %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u8 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	case 2:								\
 		asm volatile ("stlrh %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u16 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	case 4:								\
 		asm volatile ("stlr %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u32 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	case 8:								\
 		asm volatile ("stlr %1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u64 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 	}								\
 } while (0)
-- 
cgit v1.2.3


From a06040d7a791a9177581dcf7293941bd92400856 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:35 +0100
Subject: arm64: uaccess: ensure extension of access_ok() addr

Our access_ok() simply hands its arguments over to __range_ok(), which
implicitly assummes that the addr parameter is 64 bits wide. This isn't
necessarily true for compat code, which might pass down a 32-bit address
parameter.

In these cases, we don't have a guarantee that the address has been zero
extended to 64 bits, and the upper bits of the register may contain
unknown values, potentially resulting in a suprious failure.

Avoid this by explicitly casting the addr parameter to an unsigned long
(as is done on other architectures), ensuring that the parameter is
widened appropriately.

Fixes: 0aea86a2176c ("arm64: User access library functions")
Cc: <stable@vger.kernel.org> # 3.7.x-
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/uaccess.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 0221029e27ff..e6540471dcda 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -95,11 +95,12 @@ static inline void set_fs(mm_segment_t fs)
  */
 #define __range_ok(addr, size)						\
 ({									\
+	unsigned long __addr = (unsigned long __force)(addr);		\
 	unsigned long flag, roksum;					\
 	__chk_user_ptr(addr);						\
 	asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls"		\
 		: "=&r" (flag), "=&r" (roksum)				\
-		: "1" (addr), "Ir" (size),				\
+		: "1" (__addr), "Ir" (size),				\
 		  "r" (current_thread_info()->addr_limit)		\
 		: "cc");						\
 	flag;								\
-- 
cgit v1.2.3


From 55de49f9aa17b0b2b144dd2af587177b9aadf429 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:36 +0100
Subject: arm64: armv8_deprecated: ensure extension of addr

Our compat swp emulation holds the compat user address in an unsigned
int, which it passes to __user_swpX_asm(). When a 32-bit value is passed
in a register, the upper 32 bits of the register are unknown, and we
must extend the value to 64 bits before we can use it as a base address.

This patch casts the address to unsigned long to ensure it has been
suitably extended, avoiding the potential issue, and silencing a related
warning from clang.

Fixes: bd35a4adc413 ("arm64: Port SWP/SWPB emulation support from arm")
Cc: <stable@vger.kernel.org> # 3.19.x-
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/armv8_deprecated.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 657977e77ec8..f0e6d717885b 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -306,7 +306,8 @@ do {								\
 	_ASM_EXTABLE(0b, 4b)					\
 	_ASM_EXTABLE(1b, 4b)					\
 	: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2)	\
-	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT),		\
+	: "r" ((unsigned long)addr), "i" (-EAGAIN),		\
+	  "i" (-EFAULT),					\
 	  "i" (__SWP_LL_SC_LOOPS)				\
 	: "memory");						\
 	uaccess_disable();					\
-- 
cgit v1.2.3


From 8997c93452d16aac11d3b0cc53940c94330273a4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:37 +0100
Subject: arm64: atomic_lse: match asm register sizes

The LSE atomic code uses asm register variables to ensure that
parameters are allocated in specific registers. In the majority of cases
we specifically ask for an x register when using 64-bit values, but in a
couple of cases we use a w regsiter for a 64-bit value.

For asm register variables, the compiler only cares about the register
index, with wN and xN having the same meaning. The compiler determines
the register size to use based on the type of the variable. Thus, this
inconsistency is merely confusing, and not harmful to code generation.

For consistency, this patch updates those cases to use the x register
alias. There should be no functional change as a result of this patch.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/atomic_lse.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 7457ce082b5f..99fa69c9c3cf 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -322,7 +322,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
 #define ATOMIC64_FETCH_OP_AND(name, mb, cl...)				\
 static inline long atomic64_fetch_and##name(long i, atomic64_t *v)	\
 {									\
-	register long x0 asm ("w0") = i;				\
+	register long x0 asm ("x0") = i;				\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
@@ -394,7 +394,7 @@ ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)				\
 static inline long atomic64_fetch_sub##name(long i, atomic64_t *v)	\
 {									\
-	register long x0 asm ("w0") = i;				\
+	register long x0 asm ("x0") = i;				\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
-- 
cgit v1.2.3


From d135b8b5060ea91dd751ff172d179eb4eab1e966 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 3 May 2017 16:09:38 +0100
Subject: arm64: uaccess: suppress spurious clang warning

Clang tries to warn when there's a mismatch between an operand's size,
and the size of the register it is held in, as this may indicate a bug.
Specifically, clang warns when the operand's type is less than 64 bits
wide, and the register is used unqualified (i.e. %N rather than %xN or
%wN).

Unfortunately clang can generate these warnings for unreachable code.
For example, for code like:

do {                                            \
        typeof(*(ptr)) __v = (v);               \
        switch(sizeof(*(ptr))) {                \
        case 1:                                 \
                // assume __v is 1 byte wide    \
                asm ("{op}b %w0" : : "r" (v));  \
                break;                          \
        case 8:                                 \
                // assume __v is 8 bytes wide   \
                asm ("{op} %0" : : "r" (v));    \
                break;                          \
        }
while (0)

... if op() were passed a char value and pointer to char, clang may
produce a warning for the unreachable case where sizeof(*(ptr)) is 8.

For the same reasons, clang produces warnings when __put_user_err() is
used for types that are less than 64 bits wide.

We could avoid this with a cast to a fixed-width type in each of the
cases. However, GCC will then warn that pointer types are being cast to
mismatched integer sizes (in unreachable paths).

Another option would be to use the same union trickery as we do for
__smp_store_release() and __smp_load_acquire(), but this is fairly
invasive.

Instead, this patch suppresses the clang warning by using an x modifier
in the assembly for the 8 byte case of __put_user_err(). No additional
work is necessary as the value has been cast to typeof(*(ptr)), so the
compiler will have performed any necessary extension for the reachable
case.

For consistency, __get_user_err() is also updated to use the x modifier
for its 8 byte case.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/uaccess.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index e6540471dcda..90b5755c5003 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -257,7 +257,7 @@ do {									\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 8:								\
-		__get_user_asm("ldr", "ldtr", "%",  __gu_val, (ptr),	\
+		__get_user_asm("ldr", "ldtr", "%x",  __gu_val, (ptr),	\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	default:							\
@@ -324,7 +324,7 @@ do {									\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 8:								\
-		__put_user_asm("str", "sttr", "%", __pu_val, (ptr),	\
+		__put_user_asm("str", "sttr", "%x", __pu_val, (ptr),	\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	default:							\
-- 
cgit v1.2.3


From 8376efd31d3d7c44bd05be337adde023cc531fa1 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Tue, 9 May 2017 18:00:43 +0100
Subject: x86, pmem: Fix cache flushing for iovec write < 8 bytes

Commit 11e63f6d920d added cache flushing for unaligned writes from an
iovec, covering the first and last cache line of a >= 8 byte write and
the first cache line of a < 8 byte write.  But an unaligned write of
2-7 bytes can still cover two cache lines, so make sure we flush both
in that case.

Cc: <stable@vger.kernel.org>
Fixes: 11e63f6d920d ("x86, pmem: fix broken __copy_user_nocache ...")
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index d5a22bac9988..0ff8fe71b255 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -98,7 +98,7 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
 
 		if (bytes < 8) {
 			if (!IS_ALIGNED(dest, 4) || (bytes != 4))
-				arch_wb_cache_pmem(addr, 1);
+				arch_wb_cache_pmem(addr, bytes);
 		} else {
 			if (!IS_ALIGNED(dest, 8)) {
 				dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
-- 
cgit v1.2.3


From aed74ea0a09d2e60e4b3fe45caf30f6365b6c9f3 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Sat, 6 May 2017 23:40:20 +0800
Subject: sparc: use memdup_user_nul in sun4m LED driver

Use memdup_user_nul() helper instead of open-coding to simplify the code.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/led.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c
index 44a3ed93c214..e278bf52963b 100644
--- a/arch/sparc/kernel/led.c
+++ b/arch/sparc/kernel/led.c
@@ -70,16 +70,9 @@ static ssize_t led_proc_write(struct file *file, const char __user *buffer,
 	if (count > LED_MAX_LENGTH)
 		count = LED_MAX_LENGTH;
 
-	buf = kmalloc(sizeof(char) * (count + 1), GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	if (copy_from_user(buf, buffer, count)) {
-		kfree(buf);
-		return -EFAULT;
-	}
-
-	buf[count] = '\0';
+	buf = memdup_user_nul(buffer, count);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
 
 	/* work around \n when echo'ing into proc */
 	if (buf[count - 1] == '\n')
-- 
cgit v1.2.3


From 3c7f62212018b904ae17f5636ead18a4dca3a88f Mon Sep 17 00:00:00 2001
From: Dave Aldridge <david.j.aldridge@oracle.com>
Date: Tue, 9 May 2017 02:57:35 -0600
Subject: sparc64: fix fault handling in NGbzero.S and GENbzero.S

When any of the functions contained in NGbzero.S and GENbzero.S
vector through *bzero_from_clear_user, we may end up taking a
fault when executing one of the store alternate address space
instructions. If this happens, the exception handler does not
restore the %asi register.

This commit fixes the issue by introducing a new exception
handler that ensures the %asi register is restored when
a fault is handled.

Orabug: 25577560

Signed-off-by: Dave Aldridge <david.j.aldridge@oracle.com>
Reviewed-by: Rob Gardner <rob.gardner@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/head_64.S | 6 ++++++
 arch/sparc/lib/GENbzero.S   | 2 +-
 arch/sparc/lib/NGbzero.S    | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 44101196d02b..41a407328667 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -939,3 +939,9 @@ ENTRY(__retl_o1)
 	retl
 	 mov	%o1, %o0
 ENDPROC(__retl_o1)
+
+ENTRY(__retl_o1_asi)
+	wr      %o5, 0x0, %asi
+	retl
+	 mov    %o1, %o0
+ENDPROC(__retl_o1_asi)
diff --git a/arch/sparc/lib/GENbzero.S b/arch/sparc/lib/GENbzero.S
index 8e7a843ddd88..2fbf6297d57c 100644
--- a/arch/sparc/lib/GENbzero.S
+++ b/arch/sparc/lib/GENbzero.S
@@ -8,7 +8,7 @@
 98:	x,y;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_o1;	\
+	.word 98b, __retl_o1_asi;\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/NGbzero.S b/arch/sparc/lib/NGbzero.S
index beab29bf419b..33053bdf3766 100644
--- a/arch/sparc/lib/NGbzero.S
+++ b/arch/sparc/lib/NGbzero.S
@@ -8,7 +8,7 @@
 98:	x,y;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_o1;	\
+	.word 98b, __retl_o1_asi;\
 	.text;			\
 	.align 4;
 
-- 
cgit v1.2.3


From 657831ffc38e30092a2d5f03d385d710eb88b09a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 9 May 2017 06:29:19 -0700
Subject: dccp/tcp: do not inherit mc_list from parent

syzkaller found a way to trigger double frees from ip_mc_drop_socket()

It turns out that leave a copy of parent mc_list at accept() time,
which is very bad.

Very similar to commit 8b485ce69876 ("tcp: do not inherit
fastopen_req from parent")

Initial report from Pray3r, completed by Andrey one.
Thanks a lot to them !

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Pray3r <pray3r.z@gmail.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5e313c1ac94f..1054d330bf9d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -794,6 +794,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 		/* listeners have SOCK_RCU_FREE, not the children */
 		sock_reset_flag(newsk, SOCK_RCU_FREE);
 
+		inet_sk(newsk)->mc_list = NULL;
+
 		newsk->sk_mark = inet_rsk(req)->ir_mark;
 		atomic64_set(&newsk->sk_cookie,
 			     atomic64_read(&inet_rsk(req)->ir_cookie));
-- 
cgit v1.2.3


From aeb899af65d2d29039ce8f000bced39dd0dacad1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 11 Apr 2017 07:01:19 -0300
Subject: docs-rst: add input docs at main index and use kernel-figure

The input subsystem documentation got converted into ReST.

Add it to the main documentation index and use kernel-figure
for the two svg images there.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/index.rst    | 1 +
 Documentation/input/ff.rst | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/index.rst b/Documentation/index.rst
index 61306a22888d..bc67dbf76eb0 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -67,6 +67,7 @@ needed).
    driver-api/index
    core-api/index
    media/index
+   input/index
    gpu/index
    security/index
    sound/index
diff --git a/Documentation/input/ff.rst b/Documentation/input/ff.rst
index 6a265a6934e6..26d461998e08 100644
--- a/Documentation/input/ff.rst
+++ b/Documentation/input/ff.rst
@@ -130,11 +130,11 @@ See <uapi/linux/input.h> for a description of the ff_effect struct.  You
 should also find help in a few sketches, contained in files shape.svg
 and interactive.svg:
 
-.. figure:: shape.svg
+.. kernel-figure:: shape.svg
 
     Shape
 
-.. figure:: interactive.svg
+.. kernel-figure:: interactive.svg
 
     Interactive
 
-- 
cgit v1.2.3


From f7c864e7ee5c3907d232838f1b4540612252a605 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Thu, 4 May 2017 00:49:36 +0100
Subject: Documentation: earlycon: fix Marvell Armada 3700 UART name

The Marvell Armada 3700 UART uses "ar3700_uart" for its earlycon name.
Adjust documentation to match the code.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e4c9e0e46b95..230b03caee55 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -972,7 +972,7 @@
 			A valid base address must be provided, and the serial
 			port must already be setup and configured.
 
-		armada3700_uart,<addr>
+		ar3700_uart,<addr>
 			Start an early, polled-mode console on the
 			Armada 3700 serial port at the specified
 			address. The serial port must already be setup
-- 
cgit v1.2.3


From 0fe397f0c65d478eea3798ac361736fd91da7022 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <h.grohne@intenta.de>
Date: Wed, 3 May 2017 11:51:46 +0200
Subject: docs: update references to the device io book

While converting the deviceiobook from DocBook to RST, dangling
references were left behind. This commit updates all remaining
references to the new location. SeongJae Park improved the ko_KR
translation.

Fixes: 8a8a602fdb83 ("docs: Convert the deviceio template to RST")
Signed-off-by: Helmut Grohne <h.grohne@intenta.de>
Signed-off-by: SeongJae Park <sj38.park@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/memory-barriers.txt                    | 4 ++--
 Documentation/translations/ko_KR/memory-barriers.txt | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index d2b0a8d81258..d323adcb7b88 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -2373,7 +2373,7 @@ is performed:
 					spin_unlock(Q);
 
 
-See Documentation/DocBook/deviceiobook.tmpl for more information.
+See Documentation/driver-api/device-io.rst for more information.
 
 
 =================================
@@ -2614,7 +2614,7 @@ might be needed:
      relaxed memory access properties, then _mandatory_ memory barriers are
      required to enforce ordering.
 
-See Documentation/DocBook/deviceiobook.tmpl for more information.
+See Documentation/driver-api/device-io.rst for more information.
 
 
 INTERRUPTS
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index ce0b48d69eaa..d05d4c54e8f7 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -2343,7 +2343,7 @@ ACQUIRE VS I/O 액세스
 					spin_unlock(Q);
 
 
-더 많은 정보를 위해선 Documenataion/DocBook/deviceiobook.tmpl 을 참고하세요.
+더 많은 정보를 위해선 Documentation/driver-api/device-io.rst 를 참고하세요.
 
 
 =========================
@@ -2578,7 +2578,7 @@ CPU 에서는 사용되는 어토믹 인스트럭션 자체에 메모리 배리
  (2) 만약 액세스 함수들이 완화된 메모리 액세스 속성을 갖는 I/O 메모리 윈도우를
      사용한다면, 순서를 강제하기 위해선 _mandatory_ 메모리 배리어가 필요합니다.
 
-더 많은 정보를 위해선 Documentation/DocBook/deviceiobook.tmpl 을 참고하십시오.
+더 많은 정보를 위해선 Documentation/driver-api/device-io.rst 를 참고하십시오.
 
 
 인터럽트
-- 
cgit v1.2.3


From f4b23de3dda1536590787c9e5c3d16b8738ab108 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 9 May 2017 15:47:15 -0400
Subject: NFSv4.1: Work around a Linux server bug...

It turns out the Linux server has a bug in its implementation of
supattr_exclcreat; it returns the set of all attributes, whether
or not they are supported by minor version 1.
In order to avoid a regression, we therefore apply the supported_attrs
as a mask on top of whatever the server sent us.

Reported-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f48dfb7f3691..c08c46a3b8cd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3268,6 +3268,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 		.rpc_resp = &res,
 	};
 	int status;
+	int i;
 
 	bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS |
 		     FATTR4_WORD0_FH_EXPIRE_TYPE |
@@ -3333,8 +3334,13 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 		server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
 		server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
 		server->cache_consistency_bitmask[2] = 0;
+
+		/* Avoid a regression due to buggy server */
+		for (i = 0; i < ARRAY_SIZE(res.exclcreat_bitmask); i++)
+			res.exclcreat_bitmask[i] &= res.attr_bitmask[i];
 		memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask,
 			sizeof(server->exclcreat_bitmask));
+
 		server->acl_bitmask = res.acl_bitmask;
 		server->fh_expire_type = res.fh_expire_type;
 	}
-- 
cgit v1.2.3


From 76b2a303384e1d6299c3a0249f0f0ce2f8f96017 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 9 May 2017 16:02:57 -0400
Subject: pNFS/flexfiles: Always attempt to call layoutstats when flexfiles is
 enabled

Layoutstats is always desirable when using the flexfiles driver, so
we should enable it if that driver is being loaded. It is safe to do
so, because even when the mount specifies NFSv4.1, we will turn it
off if the server tells us it is unsupported.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 4e8bf02a62f9..f5714ee01000 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -2361,10 +2361,21 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
 	return 0;
 }
 
+static int
+ff_layout_set_layoutdriver(struct nfs_server *server,
+		const struct nfs_fh *dummy)
+{
+#if IS_ENABLED(CONFIG_NFS_V4_2)
+	server->caps |= NFS_CAP_LAYOUTSTATS;
+#endif
+	return 0;
+}
+
 static struct pnfs_layoutdriver_type flexfilelayout_type = {
 	.id			= LAYOUT_FLEX_FILES,
 	.name			= "LAYOUT_FLEX_FILES",
 	.owner			= THIS_MODULE,
+	.set_layoutdriver	= ff_layout_set_layoutdriver,
 	.alloc_layout_hdr	= ff_layout_alloc_layout_hdr,
 	.free_layout_hdr	= ff_layout_free_layout_hdr,
 	.alloc_lseg		= ff_layout_alloc_lseg,
-- 
cgit v1.2.3


From b550a32e60a4941994b437a8d662432a486235a5 Mon Sep 17 00:00:00 2001
From: Ari Kauppi <ari@synopsys.com>
Date: Fri, 5 May 2017 16:07:55 -0400
Subject: nfsd: fix undefined behavior in nfsd4_layout_verify

  UBSAN: Undefined behaviour in fs/nfsd/nfs4proc.c:1262:34
  shift exponent 128 is too large for 32-bit type 'int'

Depending on compiler+architecture, this may cause the check for
layout_type to succeed for overly large values (which seems to be the
case with amd64). The large value will be later used in de-referencing
nfsd4_layout_ops for function pointers.

Reported-by: Jani Tuovila <tuovila@synopsys.com>
Signed-off-by: Ari Kauppi <ari@synopsys.com>
[colin.king@canonical.com: use LAYOUT_TYPE_MAX instead of 32]
Cc: stable@vger.kernel.org
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d86031b6ad79..c453a1998e00 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1259,7 +1259,8 @@ nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
 		return NULL;
 	}
 
-	if (!(exp->ex_layout_types & (1 << layout_type))) {
+	if (layout_type >= LAYOUT_TYPE_MAX ||
+	    !(exp->ex_layout_types & (1 << layout_type))) {
 		dprintk("%s: layout type %d not supported\n",
 			__func__, layout_type);
 		return NULL;
-- 
cgit v1.2.3


From 88b0193d9418c00340e45e0a913a0813bc6c8c96 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 9 May 2017 18:00:04 +0100
Subject: perf/callchain: Force USER_DS when invoking perf_callchain_user()

Perf can generate and record a user callchain in response to a synchronous
request, such as a tracepoint firing. If this happens under set_fs(KERNEL_DS),
then we can end up walking the user stack (and dereferencing/saving whatever we
find there) without the protections usually afforded by checks such as
access_ok.

Rather than play whack-a-mole with each architecture's stack unwinding
implementation, fix the root of the problem by ensuring that we force USER_DS
when invoking perf_callchain_user from the perf core.

Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/callchain.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index c04917cad1bf..1b2be63c8528 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -229,12 +229,18 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 		}
 
 		if (regs) {
+			mm_segment_t fs;
+
 			if (crosstask)
 				goto exit_put;
 
 			if (add_mark)
 				perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
+
+			fs = get_fs();
+			set_fs(USER_DS);
 			perf_callchain_user(&ctx, regs);
+			set_fs(fs);
 		}
 	}
 
-- 
cgit v1.2.3


From 960013762df0a214b57f2fce655422fb52bdfd2c Mon Sep 17 00:00:00 2001
From: Jeeja KP <jeeja.kp@intel.com>
Date: Wed, 10 May 2017 11:51:58 +0530
Subject: ALSA: hda: Fix cpu lockup when stopping the cmd dmas

Using jiffies in hdac_wait_for_cmd_dmas() to determine when to time out
when interrupts are off (snd_hdac_bus_stop_cmd_io()/spin_lock_irq())
causes hard lockup so unlock while waiting using jiffies.

---<-snip->---
<0>[ 1211.603046] NMI watchdog: Watchdog detected hard LOCKUP on cpu 3
<4>[ 1211.603047] Modules linked in: snd_hda_intel i915 vgem
<4>[ 1211.603053] irq event stamp: 13366
<4>[ 1211.603053] hardirqs last  enabled at (13365):
...
<4>[ 1211.603059] Call Trace:
<4>[ 1211.603059]  ? delay_tsc+0x3d/0xc0
<4>[ 1211.603059]  __delay+0xa/0x10
<4>[ 1211.603060]  __const_udelay+0x31/0x40
<4>[ 1211.603060]  snd_hdac_bus_stop_cmd_io+0x96/0xe0 [snd_hda_core]
<4>[ 1211.603060]  ? azx_dev_disconnect+0x20/0x20 [snd_hda_intel]
<4>[ 1211.603061]  snd_hdac_bus_stop_chip+0xb1/0x100 [snd_hda_core]
<4>[ 1211.603061]  azx_stop_chip+0x9/0x10 [snd_hda_codec]
<4>[ 1211.603061]  azx_suspend+0x72/0x220 [snd_hda_intel]
<4>[ 1211.603061]  pci_pm_suspend+0x71/0x140
<4>[ 1211.603062]  dpm_run_callback+0x6f/0x330
<4>[ 1211.603062]  ? pci_pm_freeze+0xe0/0xe0
<4>[ 1211.603062]  __device_suspend+0xf9/0x370
<4>[ 1211.603062]  ? dpm_watchdog_set+0x60/0x60
<4>[ 1211.603063]  async_suspend+0x1a/0x90
<4>[ 1211.603063]  async_run_entry_fn+0x34/0x160
<4>[ 1211.603063]  process_one_work+0x1f4/0x6d0
<4>[ 1211.603063]  ? process_one_work+0x16e/0x6d0
<4>[ 1211.603064]  worker_thread+0x49/0x4a0
<4>[ 1211.603064]  kthread+0x107/0x140
<4>[ 1211.603064]  ? process_one_work+0x6d0/0x6d0
<4>[ 1211.603065]  ? kthread_create_on_node+0x40/0x40
<4>[ 1211.603065]  ret_from_fork+0x2e/0x40

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100419
Fixes: 38b19ed7f81ec ("ALSA: hda: fix to wait for RIRB & CORB DMA to set")
Reported-by: Marta Lofstedt <marta.lofstedt@intel.com>
Suggested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jeeja KP <jeeja.kp@intel.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
CC: stable <stable@vger.kernel.org> # 4.7
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/hda/hdac_controller.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index ee08c389b4d6..978dc1801b3a 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c
@@ -106,7 +106,11 @@ void snd_hdac_bus_stop_cmd_io(struct hdac_bus *bus)
 	/* disable ringbuffer DMAs */
 	snd_hdac_chip_writeb(bus, RIRBCTL, 0);
 	snd_hdac_chip_writeb(bus, CORBCTL, 0);
+	spin_unlock_irq(&bus->reg_lock);
+
 	hdac_wait_for_cmd_dmas(bus);
+
+	spin_lock_irq(&bus->reg_lock);
 	/* disable unsolicited responses */
 	snd_hdac_chip_updatel(bus, GCTL, AZX_GCTL_UNSOL, 0);
 	spin_unlock_irq(&bus->reg_lock);
-- 
cgit v1.2.3


From fba704b494fdc6816a039a66887274b4e5c00eeb Mon Sep 17 00:00:00 2001
From: Rakesh Pandit <rakesh@tuxera.com>
Date: Tue, 9 May 2017 08:48:25 -0600
Subject: nvme: lightnvm: fix memory leak
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Free up kmalloc allocated memory if failure happens while handling L2P
table transfer in nvme_nvm_get_l2p_tbl.

Fixes: 8e79b5cb ("lightnvm: move block provisioning to targets")
Signed-off-by: Rakesh Pandit <rakesh@tuxera.com>
Reviewed-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/lightnvm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 206bfdbc1c98..f5df78ed1e10 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -367,7 +367,8 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
 
 		if (unlikely(elba > nvmdev->total_secs)) {
 			pr_err("nvm: L2P data from device is out of bounds!\n");
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out;
 		}
 
 		/* Transform physical address to target address space */
-- 
cgit v1.2.3


From a66c38a171ed25488debf80247a9e72e1026e82c Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Tue, 9 May 2017 11:37:27 +0200
Subject: block, bfq: use pointer entity->sched_data only if set

In the function __bfq_deactivate_entity, the pointer
entity->sched_data could happen to be used before being properly
initialized. This led to a NULL pointer dereference. This commit fixes
this bug by just using this pointer only where it is safe to do so.

Reported-by: Tom Harrison <l12436.tw@gmail.com>
Tested-by: Tom Harrison <l12436.tw@gmail.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bfq-wf2q.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index b4fc3e4260b7..8726ede19eef 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1114,12 +1114,21 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
 bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
 {
 	struct bfq_sched_data *sd = entity->sched_data;
-	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
-	int is_in_service = entity == sd->in_service_entity;
+	struct bfq_service_tree *st;
+	bool is_in_service;
 
 	if (!entity->on_st) /* entity never activated, or already inactive */
 		return false;
 
+	/*
+	 * If we get here, then entity is active, which implies that
+	 * bfq_group_set_parent has already been invoked for the group
+	 * represented by entity. Therefore, the field
+	 * entity->sched_data has been set, and we can safely use it.
+	 */
+	st = bfq_entity_service_tree(entity);
+	is_in_service = entity == sd->in_service_entity;
+
 	if (is_in_service)
 		bfq_calc_finish(entity, entity->service);
 
-- 
cgit v1.2.3


From 43c1b3d6e536b7b21ed329ae54280d8ba308ba92 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Tue, 9 May 2017 12:54:23 +0200
Subject: block, bfq: stress that low_latency must be off to get max throughput

The introduction of the BFQ and Kyber I/O schedulers has triggered a
new wave of I/O benchmarks. Unfortunately, comments and discussions on
these benchmarks confirm that there is still little awareness that it
is very hard to achieve, at the same time, a low latency and a high
throughput. In particular, virtually all benchmarks measure
throughput, or throughput-related figures of merit, but, for BFQ, they
use the scheduler in its default configuration. This configuration is
geared, instead, toward a low latency. This is evidently a sign that
BFQ documentation is still too unclear on this important aspect. This
commit addresses this issue by stressing how BFQ configuration must be
(easily) changed if the only goal is maximum throughput.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/bfq-iosched.txt | 17 ++++++++++++++++-
 block/bfq-iosched.c                 |  5 +++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 1b87df6cd476..05e2822a80b3 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -11,6 +11,13 @@ controllers), BFQ's main features are:
   groups (switching back to time distribution when needed to keep
   throughput high).
 
+In its default configuration, BFQ privileges latency over
+throughput. So, when needed for achieving a lower latency, BFQ builds
+schedules that may lead to a lower throughput. If your main or only
+goal, for a given device, is to achieve the maximum-possible
+throughput at all times, then do switch off all low-latency heuristics
+for that device, by setting low_latency to 0. Full details in Section 3.
+
 On average CPUs, the current version of BFQ can handle devices
 performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
 reference, 30-50 KIOPS correspond to very high bandwidths with
@@ -375,11 +382,19 @@ default, low latency mode is enabled. If enabled, interactive and soft
 real-time applications are privileged and experience a lower latency,
 as explained in more detail in the description of how BFQ works.
 
-DO NOT enable this mode if you need full control on bandwidth
+DISABLE this mode if you need full control on bandwidth
 distribution. In fact, if it is enabled, then BFQ automatically
 increases the bandwidth share of privileged applications, as the main
 means to guarantee a lower latency to them.
 
+In addition, as already highlighted at the beginning of this document,
+DISABLE this mode if your only goal is to achieve a high throughput.
+In fact, privileging the I/O of some application over the rest may
+entail a lower throughput. To achieve the highest-possible throughput
+on a non-rotational device, setting slice_idle to 0 may be needed too
+(at the cost of giving up any strong guarantee on fairness and low
+latency).
+
 timeout_sync
 ------------
 
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index bd8499ef157c..08ce45096350 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -56,6 +56,11 @@
  * rotational or flash-based devices, and to get the job done quickly
  * for applications consisting in many I/O-bound processes.
  *
+ * NOTE: if the main or only goal, with a given device, is to achieve
+ * the maximum-possible throughput at all times, then do switch off
+ * all low-latency heuristics for that device, by setting low_latency
+ * to 0.
+ *
  * BFQ is described in [1], where also a reference to the initial, more
  * theoretical paper on BFQ can be found. The interested reader can find
  * in the latter paper full details on the main algorithm, as well as
-- 
cgit v1.2.3


From 340ff3216799a947fe0b07bed8f0409ffc716be9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 10 May 2017 07:40:04 -0600
Subject: elevator: remove redundant warnings on IO scheduler switch

We warn twice for switching to a scheduler, if that switch fails.
As we also report the failure in the return value to the
sysfs write, remove the dmesg induced failures.

Keep the failure print for warning to switch to the kconfig
selected IO scheduler, as we can't report errors for that in
any other way.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/elevator.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/block/elevator.c b/block/elevator.c
index ab726a5c0bf6..dac99fbfc273 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1062,10 +1062,8 @@ static int __elevator_change(struct request_queue *q, const char *name)
 
 	strlcpy(elevator_name, name, sizeof(elevator_name));
 	e = elevator_get(strstrip(elevator_name), true);
-	if (!e) {
-		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
+	if (!e)
 		return -EINVAL;
-	}
 
 	if (q->elevator &&
 	    !strcmp(elevator_name, q->elevator->type->elevator_name)) {
@@ -1105,7 +1103,6 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 	if (!ret)
 		return count;
 
-	printk(KERN_ERR "elevator: switch to %s failed\n", name);
 	return ret;
 }
 
-- 
cgit v1.2.3


From d3738123986954ba3abbd96b595f5176b50c3f5d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 9 May 2017 11:39:56 -0600
Subject: blk-stat: don't use this_cpu_ptr() in a preemptable section

If PREEMPT_RCU is enabled, rcu_read_lock() isn't strong enough
for us to use this_cpu_ptr() in that section. Use the safer
get/put_cpu_ptr() variants instead.

Reported-by: Mike Galbraith <efault@gmx.de>
Fixes: 34dbad5d26e2 ("blk-stat: convert to callback-based statistics reporting")
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-stat.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/block/blk-stat.c b/block/blk-stat.c
index 6c2f40940439..c52356d90fe3 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -96,13 +96,16 @@ void blk_stat_add(struct request *rq)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
-		if (blk_stat_is_active(cb)) {
-			bucket = cb->bucket_fn(rq);
-			if (bucket < 0)
-				continue;
-			stat = &this_cpu_ptr(cb->cpu_stat)[bucket];
-			__blk_stat_add(stat, value);
-		}
+		if (!blk_stat_is_active(cb))
+			continue;
+
+		bucket = cb->bucket_fn(rq);
+		if (bucket < 0)
+			continue;
+
+		stat = &get_cpu_ptr(cb->cpu_stat)[bucket];
+		__blk_stat_add(stat, value);
+		put_cpu_ptr(cb->cpu_stat);
 	}
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3


From f36ea50ca0043e7b1204feaf1d2ba6bd68c08d36 Mon Sep 17 00:00:00 2001
From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
Date: Wed, 10 May 2017 08:54:11 -0500
Subject: blk-mq: NVMe 512B/4K+T10 DIF/DIX format returns I/O error on dd with
 split op

When formatting NVMe to 512B/4K + T10 DIf/DIX, dd with split op returns
"Input/output error". Looks block layer split the bio after calling
bio_integrity_prep(bio). This patch fixes the issue.

Below is how we debug this issue:
(1)format nvme to 4K block # size with type 2 DIF
(2)dd with block size bigger than 1024k.
oflag=direct
dd: error writing '/dev/nvme0n1': Input/output error

We added some debug code in nvme device driver. It showed us the first
op and the second op have the same bi and pi address. This is not
correct.

1st op: nvme0n1 Op:Wr slba 0x505 length 0x100, PI ctrl=0x1400,
	dsmgmt=0x0, AT=0x0 & RT=0x505
	Guard 0x00b1, AT 0x0000, RT physical 0x00000505 RT virtual 0x00002828

2nd op: nvme0n1 Op:Wr slba 0x605 length 0x1, PI ctrl=0x1400, dsmgmt=0x0,
	AT=0x0 & RT=0x605  ==> This op fails and subsequent 5 retires..
	Guard 0x00b1, AT 0x0000, RT physical 0x00000605 RT virtual 0x00002828

With the fix, It showed us both of the first op and the second op have
correct bi and pi address.

1st op: nvme2n1 Op:Wr slba 0x505 length 0x100, PI ctrl=0x1400,
	dsmgmt=0x0, AT=0x0 & RT=0x505
	Guard 0x5ccb, AT 0x0000, RT physical 0x00000505 RT virtual
	0x00002828
2nd op: nvme2n1 Op:Wr slba 0x605 length 0x1, PI ctrl=0x1400, dsmgmt=0x0,
	AT=0x0 & RT=0x605
	Guard 0xab4c, AT 0x0000, RT physical 0x00000605 RT virtual
	0x00003028

Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9ae1d9ccf4df..a69ad122ed66 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1554,13 +1554,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_bounce(q, &bio);
 
+	blk_queue_split(q, &bio, q->bio_split);
+
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
 		return BLK_QC_T_NONE;
 	}
 
-	blk_queue_split(q, &bio, q->bio_split);
-
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
 		return BLK_QC_T_NONE;
-- 
cgit v1.2.3


From 37835671c43b82bdfed628a6a37284a117bda17d Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:06 +0200
Subject: h8300: put bitsperlong.h in uapi

This header file is exported, thus move it to uapi.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/h8300/include/asm/bitsperlong.h      | 14 --------------
 arch/h8300/include/uapi/asm/bitsperlong.h | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 14 deletions(-)
 delete mode 100644 arch/h8300/include/asm/bitsperlong.h
 create mode 100644 arch/h8300/include/uapi/asm/bitsperlong.h

diff --git a/arch/h8300/include/asm/bitsperlong.h b/arch/h8300/include/asm/bitsperlong.h
deleted file mode 100644
index e140e46729ac..000000000000
--- a/arch/h8300/include/asm/bitsperlong.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __ASM_H8300_BITS_PER_LONG
-#define __ASM_H8300_BITS_PER_LONG
-
-#include <asm-generic/bitsperlong.h>
-
-#if !defined(__ASSEMBLY__)
-/* h8300-unknown-linux required long */
-#define __kernel_size_t __kernel_size_t
-typedef unsigned long	__kernel_size_t;
-typedef long		__kernel_ssize_t;
-typedef long		__kernel_ptrdiff_t;
-#endif
-
-#endif /* __ASM_H8300_BITS_PER_LONG */
diff --git a/arch/h8300/include/uapi/asm/bitsperlong.h b/arch/h8300/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..34212608371e
--- /dev/null
+++ b/arch/h8300/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,14 @@
+#ifndef _UAPI__ASM_H8300_BITS_PER_LONG
+#define _UAPI__ASM_H8300_BITS_PER_LONG
+
+#include <asm-generic/bitsperlong.h>
+
+#if !defined(__ASSEMBLY__)
+/* h8300-unknown-linux required long */
+#define __kernel_size_t __kernel_size_t
+typedef unsigned long	__kernel_size_t;
+typedef long		__kernel_ssize_t;
+typedef long		__kernel_ptrdiff_t;
+#endif
+
+#endif /* _UAPI__ASM_H8300_BITS_PER_LONG */
-- 
cgit v1.2.3


From 4f4ddad395b04c051fcf76687dd558228e605b6f Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:07 +0200
Subject: nios2: put setup.h in uapi

This header file is exported, but from a userland pov, it's just a wrapper
to asm-generic/setup.h.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/nios2/include/uapi/asm/Kbuild | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index e0bb972a50d7..69c965304146 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -2,4 +2,5 @@ include include/uapi/asm-generic/Kbuild.asm
 
 header-y += elf.h
 
+generic-y += setup.h
 generic-y += ucontext.h
-- 
cgit v1.2.3


From 25dc1d6cc3082aab293e5dad47623b550f7ddd2a Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:08 +0200
Subject: x86: stop exporting msr-index.h to userland

Even if this file was not in an uapi directory, it was exported because
it was listed in the Kbuild file.

Fixes: b72e7464e4cf ("x86/uapi: Do not export <asm/msr-index.h> as part of the user API headers")
Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/x86/include/uapi/asm/Kbuild | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index 3dec769cadf7..1c532b3f18ea 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -27,7 +27,6 @@ header-y += ldt.h
 header-y += mce.h
 header-y += mman.h
 header-y += msgbuf.h
-header-y += msr-index.h
 header-y += msr.h
 header-y += mtrr.h
 header-y += param.h
-- 
cgit v1.2.3


From 7c025b2afc96b4a3fddff1dc9fbe3685953e38b4 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:09 +0200
Subject: Makefile.headersinst: cleanup input files

After the last three patches, all exported headers are under uapi/, thus
input-files2 are not needed anymore.
The side effect is that input-files1-name is exactly header-y.

Note also that input-files3-name is genhdr-y.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Makefile.headersinst | 38 ++++++++++++++------------------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 1106d6ca3a38..7bd9df6efe2f 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -40,31 +40,20 @@ wrapper-files := $(filter $(header-y), $(generic-y))
 srcdir        := $(srctree)/$(obj)
 gendir        := $(objtree)/$(gen)
 
-oldsrcdir     := $(srctree)/$(subst /uapi,,$(obj))
-
 # all headers files for this dir
 header-y      := $(filter-out $(generic-y), $(header-y))
 all-files     := $(header-y) $(genhdr-y) $(wrapper-files)
 output-files  := $(addprefix $(installdir)/, $(all-files))
 
-input-files1  := $(foreach hdr, $(header-y), \
-		   $(if $(wildcard $(srcdir)/$(hdr)), \
-			$(wildcard $(srcdir)/$(hdr))) \
-		   )
-input-files1-name := $(notdir $(input-files1))
-input-files2  := $(foreach hdr, $(header-y), \
-		   $(if  $(wildcard $(srcdir)/$(hdr)),, \
-			$(if $(wildcard $(oldsrcdir)/$(hdr)), \
-				$(wildcard $(oldsrcdir)/$(hdr)), \
-				$(error Missing UAPI file $(srcdir)/$(hdr))) \
-		   ))
-input-files2-name := $(notdir $(input-files2))
-input-files3  := $(foreach hdr, $(genhdr-y), \
-		   $(if	$(wildcard $(gendir)/$(hdr)), \
-			$(wildcard $(gendir)/$(hdr)), \
-			$(error Missing generated UAPI file $(gendir)/$(hdr)) \
-		   ))
-input-files3-name := $(notdir $(input-files3))
+# Check that all expected files exist
+$(foreach hdr, $(header-y), \
+  $(if $(wildcard $(srcdir)/$(hdr)),, \
+       $(error Missing UAPI file $(srcdir)/$(hdr)) \
+   ))
+$(foreach hdr, $(genhdr-y), \
+  $(if	$(wildcard $(gendir)/$(hdr)),, \
+       $(error Missing generated UAPI file $(gendir)/$(hdr)) \
+  ))
 
 # Work out what needs to be removed
 oldheaders    := $(patsubst $(installdir)/%,%,$(wildcard $(installdir)/*.h))
@@ -78,9 +67,8 @@ printdir = $(patsubst $(INSTALL_HDR_PATH)/%/,%,$(dir $@))
 quiet_cmd_install = INSTALL $(printdir) ($(words $(all-files))\
                             file$(if $(word 2, $(all-files)),s))
       cmd_install = \
-        $(CONFIG_SHELL) $< $(installdir) $(srcdir) $(input-files1-name); \
-        $(CONFIG_SHELL) $< $(installdir) $(oldsrcdir) $(input-files2-name); \
-        $(CONFIG_SHELL) $< $(installdir) $(gendir) $(input-files3-name); \
+        $(CONFIG_SHELL) $< $(installdir) $(srcdir) $(header-y); \
+        $(CONFIG_SHELL) $< $(installdir) $(gendir) $(genhdr-y); \
         for F in $(wrapper-files); do                                   \
                 echo "\#include <asm-generic/$$F>" > $(installdir)/$$F;    \
         done;                                                           \
@@ -106,7 +94,9 @@ __headersinst: $(subdirs) $(install-file)
 	@:
 
 targets += $(install-file)
-$(install-file): scripts/headers_install.sh $(input-files1) $(input-files2) $(input-files3) FORCE
+$(install-file): scripts/headers_install.sh \
+		 $(addprefix $(srcdir)/,$(header-y)) \
+		 $(addprefix $(gendir)/,$(genhdr-y)) FORCE
 	$(if $(unwanted),$(call cmd,remove),)
 	$(if $(wildcard $(dir $@)),,$(shell mkdir -p $(dir $@)))
 	$(call if_changed,install)
-- 
cgit v1.2.3


From bd73a328cbb5731000bb3fd876189d9395c0cd61 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:10 +0200
Subject: Makefile.headersinst: remove destination-y option

This option was added in commit c7bb349e7c25 ("kbuild: introduce destination-y
for exported headers") but never used in-tree.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/makefiles.txt | 23 ++++-------------------
 scripts/Makefile.headersinst       |  2 +-
 2 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 9b9c4797fc55..37b525d329ae 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -46,9 +46,8 @@ This document describes the Linux kernel Makefiles.
 	=== 7 Kbuild syntax for exported headers
 		--- 7.1 header-y
 		--- 7.2 genhdr-y
-		--- 7.3 destination-y
-		--- 7.4 generic-y
-		--- 7.5 generated-y
+		--- 7.3 generic-y
+		--- 7.4 generated-y
 
 	=== 8 Kbuild Variables
 	=== 9 Makefile language
@@ -1295,21 +1294,7 @@ See subsequent chapter for the syntax of the Kbuild file.
 			#include/linux/Kbuild
 			genhdr-y += version.h
 
-	--- 7.3 destination-y
-
-	When an architecture has a set of exported headers that needs to be
-	exported to a different directory destination-y is used.
-	destination-y specifies the destination directory for all exported
-	headers in the file where it is present.
-
-		Example:
-			#arch/xtensa/platforms/s6105/include/platform/Kbuild
-			destination-y := include/linux
-
-	In the example above all exported headers in the Kbuild file
-	will be located in the directory "include/linux" when exported.
-
-	--- 7.4 generic-y
+	--- 7.3 generic-y
 
 	If an architecture uses a verbatim copy of a header from
 	include/asm-generic then this is listed in the file
@@ -1336,7 +1321,7 @@ See subsequent chapter for the syntax of the Kbuild file.
 		Example: termios.h
 			#include <asm-generic/termios.h>
 
-	--- 7.5 generated-y
+	--- 7.4 generated-y
 
 	If an architecture generates other header files alongside generic-y
 	wrappers, and not included in genhdr-y, then generated-y specifies
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 7bd9df6efe2f..ca5d439c9abf 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -14,7 +14,7 @@ kbuild-file := $(srctree)/$(obj)/Kbuild
 include $(kbuild-file)
 
 # called may set destination dir (when installing to asm/)
-_dst := $(if $(destination-y),$(destination-y),$(if $(dst),$(dst),$(obj)))
+_dst := $(if $(dst),$(dst),$(obj))
 
 old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild
 ifneq ($(wildcard $(old-kbuild-file)),)
-- 
cgit v1.2.3


From 9078b4eea119c13d633d45af0397c821a517b522 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:11 +0200
Subject: uapi: includes linux/types.h before exporting files

Some files will be exported after a following patch. 0-day tests report the
following warning/error:
./usr/include/linux/bcache.h:8: include of <linux/types.h> is preferred over <asm/types.h>
./usr/include/linux/bcache.h:11: found __[us]{8,16,32,64} type without #include <linux/types.h>
./usr/include/linux/qrtr.h:8: found __[us]{8,16,32,64} type without #include <linux/types.h>
./usr/include/linux/cryptouser.h:39: found __[us]{8,16,32,64} type without #include <linux/types.h>
./usr/include/linux/pr.h:14: found __[us]{8,16,32,64} type without #include <linux/types.h>
./usr/include/linux/btrfs_tree.h:337: found __[us]{8,16,32,64} type without #include <linux/types.h>
./usr/include/rdma/bnxt_re-abi.h:45: found __[us]{8,16,32,64} type without #include <linux/types.h>

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 include/uapi/linux/bcache.h     | 2 +-
 include/uapi/linux/btrfs_tree.h | 2 ++
 include/uapi/linux/cryptouser.h | 2 ++
 include/uapi/linux/pr.h         | 2 ++
 include/uapi/linux/qrtr.h       | 1 +
 include/uapi/rdma/bnxt_re-abi.h | 2 ++
 6 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 22b6ad31c706..e3bb0635e94a 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -5,7 +5,7 @@
  * Bcache on disk data structures
  */
 
-#include <asm/types.h>
+#include <linux/types.h>
 
 #define BITMASK(name, type, field, offset, size)		\
 static inline __u64 name(const type *k)				\
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index d5ad15a106a7..6a261cb52d95 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -1,6 +1,8 @@
 #ifndef _BTRFS_CTREE_H_
 #define _BTRFS_CTREE_H_
 
+#include <linux/types.h>
+
 /*
  * This header contains the structure definitions and constants used
  * by file system objects that can be retrieved using
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index b4def5c630e7..fdcbb3c29083 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -18,6 +18,8 @@
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <linux/types.h>
+
 /* Netlink configuration messages.  */
 enum {
 	CRYPTO_MSG_BASE = 0x10,
diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h
index 57d7c0f916b6..645ef3cf3dd0 100644
--- a/include/uapi/linux/pr.h
+++ b/include/uapi/linux/pr.h
@@ -1,6 +1,8 @@
 #ifndef _UAPI_PR_H
 #define _UAPI_PR_H
 
+#include <linux/types.h>
+
 enum pr_type {
 	PR_WRITE_EXCLUSIVE		= 1,
 	PR_EXCLUSIVE_ACCESS		= 2,
diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h
index 66c0748d26e2..9d76c566f66e 100644
--- a/include/uapi/linux/qrtr.h
+++ b/include/uapi/linux/qrtr.h
@@ -2,6 +2,7 @@
 #define _LINUX_QRTR_H
 
 #include <linux/socket.h>
+#include <linux/types.h>
 
 struct sockaddr_qrtr {
 	__kernel_sa_family_t sq_family;
diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index e2c8a3f0ccec..74018bd18d72 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -39,6 +39,8 @@
 #ifndef __BNXT_RE_UVERBS_ABI_H__
 #define __BNXT_RE_UVERBS_ABI_H__
 
+#include <linux/types.h>
+
 #define BNXT_RE_ABI_VERSION	1
 
 struct bnxt_re_uctx_resp {
-- 
cgit v1.2.3


From 3a4e7f56ca6cd66e69a159073584434b2e639c71 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:12 +0200
Subject: btrfs_tree.h: fix include from userland
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch prepares the uapi export by fixing the following errors:

.../linux/btrfs_tree.h:283:2: error: #error "UUID items require BTRFS_UUID_SIZE == 16!"
 #error "UUID items require BTRFS_UUID_SIZE == 16!"

.../linux/btrfs_tree.h:390:12: error: ‘BTRFS_UUID_SIZE’ undeclared here (not in a function)
  __u8 uuid[BTRFS_UUID_SIZE];
            ^
.../linux/btrfs_tree.h:796:16: error: ‘BTRFS_DEV_STAT_VALUES_MAX’ undeclared here (not in a function)
  __le64 values[BTRFS_DEV_STAT_VALUES_MAX];

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 include/uapi/linux/btrfs_tree.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 6a261cb52d95..10689e1fdf11 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -1,6 +1,7 @@
 #ifndef _BTRFS_CTREE_H_
 #define _BTRFS_CTREE_H_
 
+#include <linux/btrfs.h>
 #include <linux/types.h>
 
 /*
-- 
cgit v1.2.3


From ea6819e1f2d6c30624ea067f4b3a50a3cca79d8a Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:14 +0200
Subject: smc_diag.h: fix include from userland

This patch prepares the uapi export by fixing the following error:

.../linux/smc_diag.h:6:27: fatal error: rdma/ib_verbs.h: No such file or directory
 #include <rdma/ib_verbs.h>

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 include/rdma/ib_verbs.h           | 3 +--
 include/uapi/linux/smc_diag.h     | 2 +-
 include/uapi/rdma/ib_user_verbs.h | 2 ++
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index f0cb4906478a..ba8314ec5768 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -62,6 +62,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/uaccess.h>
 #include <linux/cgroup_rdma.h>
+#include <uapi/rdma/ib_user_verbs.h>
 
 extern struct workqueue_struct *ib_wq;
 extern struct workqueue_struct *ib_comp_wq;
@@ -1889,8 +1890,6 @@ enum ib_mad_result {
 	IB_MAD_RESULT_CONSUMED = 1 << 2  /* Packet consumed: stop processing */
 };
 
-#define IB_DEVICE_NAME_MAX 64
-
 struct ib_port_cache {
 	struct ib_pkey_cache  *pkey;
 	struct ib_gid_table   *gid;
diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 0063919fea34..87712bfaa9dd 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -3,7 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/inet_diag.h>
-#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
 
 /* Request structure */
 struct smc_diag_req {
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 477d629f539d..270c350bedc6 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -1135,4 +1135,6 @@ struct ib_uverbs_ex_destroy_rwq_ind_table  {
 	__u32 ind_tbl_handle;
 };
 
+#define IB_DEVICE_NAME_MAX 64
+
 #endif /* IB_USER_VERBS_H */
-- 
cgit v1.2.3


From fcc8487d477a3452a1d0ccbdd4c5e0e1e3cb8bed Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:15 +0200
Subject: uapi: export all headers under uapi directories

Regularly, when a new header is created in include/uapi/, the developer
forgets to add it in the corresponding Kbuild file. This error is usually
detected after the release is out.

In fact, all headers under uapi directories should be exported, thus it's
useless to have an exhaustive list.

After this patch, the following files, which were not exported, are now
exported (with make headers_install_all):
asm-arc/kvm_para.h
asm-arc/ucontext.h
asm-blackfin/shmparam.h
asm-blackfin/ucontext.h
asm-c6x/shmparam.h
asm-c6x/ucontext.h
asm-cris/kvm_para.h
asm-h8300/shmparam.h
asm-h8300/ucontext.h
asm-hexagon/shmparam.h
asm-m32r/kvm_para.h
asm-m68k/kvm_para.h
asm-m68k/shmparam.h
asm-metag/kvm_para.h
asm-metag/shmparam.h
asm-metag/ucontext.h
asm-mips/hwcap.h
asm-mips/reg.h
asm-mips/ucontext.h
asm-nios2/kvm_para.h
asm-nios2/ucontext.h
asm-openrisc/shmparam.h
asm-parisc/kvm_para.h
asm-powerpc/perf_regs.h
asm-sh/kvm_para.h
asm-sh/ucontext.h
asm-tile/shmparam.h
asm-unicore32/shmparam.h
asm-unicore32/ucontext.h
asm-x86/hwcap2.h
asm-xtensa/kvm_para.h
drm/armada_drm.h
drm/etnaviv_drm.h
drm/vgem_drm.h
linux/aspeed-lpc-ctrl.h
linux/auto_dev-ioctl.h
linux/bcache.h
linux/btrfs_tree.h
linux/can/vxcan.h
linux/cifs/cifs_mount.h
linux/coresight-stm.h
linux/cryptouser.h
linux/fsmap.h
linux/genwqe/genwqe_card.h
linux/hash_info.h
linux/kcm.h
linux/kcov.h
linux/kfd_ioctl.h
linux/lightnvm.h
linux/module.h
linux/nbd-netlink.h
linux/nilfs2_api.h
linux/nilfs2_ondisk.h
linux/nsfs.h
linux/pr.h
linux/qrtr.h
linux/rpmsg.h
linux/sched/types.h
linux/sed-opal.h
linux/smc.h
linux/smc_diag.h
linux/stm.h
linux/switchtec_ioctl.h
linux/vfio_ccw.h
linux/wil6210_uapi.h
rdma/bnxt_re-abi.h

Note that I have removed from this list the files which are generated in every
exported directories (like .install or .install.cmd).

Thanks to Julien Floret <julien.floret@6wind.com> for the tip to get all
subdirs with a pure makefile command.

For the record, note that exported files for asm directories are a mix of
files listed by:
 - include/uapi/asm-generic/Kbuild.asm;
 - arch/<arch>/include/uapi/asm/Kbuild;
 - arch/<arch>/include/asm/Kbuild.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Acked-by: Mark Salter <msalter@redhat.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/makefiles.txt          |  66 ++--
 arch/alpha/include/uapi/asm/Kbuild          |  41 ---
 arch/arc/include/uapi/asm/Kbuild            |   3 -
 arch/arm/include/uapi/asm/Kbuild            |  17 -
 arch/arm64/include/uapi/asm/Kbuild          |  18 -
 arch/blackfin/include/uapi/asm/Kbuild       |  17 -
 arch/c6x/include/uapi/asm/Kbuild            |   8 -
 arch/cris/include/uapi/arch-v10/arch/Kbuild |   5 -
 arch/cris/include/uapi/arch-v32/arch/Kbuild |   3 -
 arch/cris/include/uapi/asm/Kbuild           |  43 +--
 arch/frv/include/uapi/asm/Kbuild            |  33 --
 arch/h8300/include/uapi/asm/Kbuild          |  28 --
 arch/hexagon/include/asm/Kbuild             |   3 -
 arch/hexagon/include/uapi/asm/Kbuild        |  13 -
 arch/ia64/include/uapi/asm/Kbuild           |  45 ---
 arch/m32r/include/uapi/asm/Kbuild           |  31 --
 arch/m68k/include/uapi/asm/Kbuild           |  24 --
 arch/metag/include/uapi/asm/Kbuild          |   8 -
 arch/microblaze/include/uapi/asm/Kbuild     |  32 --
 arch/mips/include/uapi/asm/Kbuild           |  37 ---
 arch/mn10300/include/uapi/asm/Kbuild        |  32 --
 arch/nios2/include/uapi/asm/Kbuild          |   3 +-
 arch/openrisc/include/asm/Kbuild            |   3 -
 arch/openrisc/include/uapi/asm/Kbuild       |   8 -
 arch/parisc/include/uapi/asm/Kbuild         |  28 --
 arch/powerpc/include/uapi/asm/Kbuild        |  45 ---
 arch/s390/include/uapi/asm/Kbuild           |  46 ---
 arch/score/include/asm/Kbuild               |   3 -
 arch/score/include/uapi/asm/Kbuild          |  32 --
 arch/sh/include/uapi/asm/Kbuild             |  23 --
 arch/sparc/include/uapi/asm/Kbuild          |  48 ---
 arch/tile/include/asm/Kbuild                |   3 -
 arch/tile/include/uapi/arch/Kbuild          |  17 -
 arch/tile/include/uapi/asm/Kbuild           |  19 +-
 arch/unicore32/include/uapi/asm/Kbuild      |   6 -
 arch/x86/include/uapi/asm/Kbuild            |  58 ----
 arch/xtensa/include/uapi/asm/Kbuild         |  23 --
 include/Kbuild                              |   2 -
 include/asm-generic/Kbuild.asm              |   1 -
 include/scsi/fc/Kbuild                      |   0
 include/uapi/Kbuild                         |  15 -
 include/uapi/asm-generic/Kbuild             |  36 --
 include/uapi/asm-generic/Kbuild.asm         |  76 ++---
 include/uapi/drm/Kbuild                     |  23 --
 include/uapi/linux/Kbuild                   | 494 +---------------------------
 include/uapi/linux/android/Kbuild           |   2 -
 include/uapi/linux/byteorder/Kbuild         |   3 -
 include/uapi/linux/caif/Kbuild              |   3 -
 include/uapi/linux/can/Kbuild               |   6 -
 include/uapi/linux/dvb/Kbuild               |   9 -
 include/uapi/linux/hdlc/Kbuild              |   2 -
 include/uapi/linux/hsi/Kbuild               |   2 -
 include/uapi/linux/iio/Kbuild               |   3 -
 include/uapi/linux/isdn/Kbuild              |   2 -
 include/uapi/linux/mmc/Kbuild               |   2 -
 include/uapi/linux/netfilter/Kbuild         |  89 -----
 include/uapi/linux/netfilter/ipset/Kbuild   |   5 -
 include/uapi/linux/netfilter_arp/Kbuild     |   3 -
 include/uapi/linux/netfilter_bridge/Kbuild  |  18 -
 include/uapi/linux/netfilter_ipv4/Kbuild    |  10 -
 include/uapi/linux/netfilter_ipv6/Kbuild    |  13 -
 include/uapi/linux/nfsd/Kbuild              |   6 -
 include/uapi/linux/raid/Kbuild              |   3 -
 include/uapi/linux/spi/Kbuild               |   2 -
 include/uapi/linux/sunrpc/Kbuild            |   2 -
 include/uapi/linux/tc_act/Kbuild            |  16 -
 include/uapi/linux/tc_ematch/Kbuild         |   5 -
 include/uapi/linux/usb/Kbuild               |  12 -
 include/uapi/linux/wimax/Kbuild             |   2 -
 include/uapi/misc/Kbuild                    |   2 -
 include/uapi/mtd/Kbuild                     |   6 -
 include/uapi/rdma/Kbuild                    |  20 --
 include/uapi/rdma/hfi/Kbuild                |   3 -
 include/uapi/scsi/Kbuild                    |   6 -
 include/uapi/scsi/fc/Kbuild                 |   5 -
 include/uapi/sound/Kbuild                   |  16 -
 include/uapi/video/Kbuild                   |   4 -
 include/uapi/xen/Kbuild                     |   5 -
 include/video/Kbuild                        |   0
 scripts/Makefile.headersinst                |  55 ++--
 80 files changed, 111 insertions(+), 1750 deletions(-)
 delete mode 100644 arch/cris/include/uapi/arch-v10/arch/Kbuild
 delete mode 100644 arch/cris/include/uapi/arch-v32/arch/Kbuild
 delete mode 100644 arch/tile/include/uapi/arch/Kbuild
 delete mode 100644 include/Kbuild
 delete mode 100644 include/asm-generic/Kbuild.asm
 delete mode 100644 include/scsi/fc/Kbuild
 delete mode 100644 include/uapi/Kbuild
 delete mode 100644 include/uapi/asm-generic/Kbuild
 delete mode 100644 include/uapi/drm/Kbuild
 delete mode 100644 include/uapi/linux/android/Kbuild
 delete mode 100644 include/uapi/linux/byteorder/Kbuild
 delete mode 100644 include/uapi/linux/caif/Kbuild
 delete mode 100644 include/uapi/linux/can/Kbuild
 delete mode 100644 include/uapi/linux/dvb/Kbuild
 delete mode 100644 include/uapi/linux/hdlc/Kbuild
 delete mode 100644 include/uapi/linux/hsi/Kbuild
 delete mode 100644 include/uapi/linux/iio/Kbuild
 delete mode 100644 include/uapi/linux/isdn/Kbuild
 delete mode 100644 include/uapi/linux/mmc/Kbuild
 delete mode 100644 include/uapi/linux/netfilter/Kbuild
 delete mode 100644 include/uapi/linux/netfilter/ipset/Kbuild
 delete mode 100644 include/uapi/linux/netfilter_arp/Kbuild
 delete mode 100644 include/uapi/linux/netfilter_bridge/Kbuild
 delete mode 100644 include/uapi/linux/netfilter_ipv4/Kbuild
 delete mode 100644 include/uapi/linux/netfilter_ipv6/Kbuild
 delete mode 100644 include/uapi/linux/nfsd/Kbuild
 delete mode 100644 include/uapi/linux/raid/Kbuild
 delete mode 100644 include/uapi/linux/spi/Kbuild
 delete mode 100644 include/uapi/linux/sunrpc/Kbuild
 delete mode 100644 include/uapi/linux/tc_act/Kbuild
 delete mode 100644 include/uapi/linux/tc_ematch/Kbuild
 delete mode 100644 include/uapi/linux/usb/Kbuild
 delete mode 100644 include/uapi/linux/wimax/Kbuild
 delete mode 100644 include/uapi/misc/Kbuild
 delete mode 100644 include/uapi/mtd/Kbuild
 delete mode 100644 include/uapi/rdma/Kbuild
 delete mode 100644 include/uapi/rdma/hfi/Kbuild
 delete mode 100644 include/uapi/scsi/Kbuild
 delete mode 100644 include/uapi/scsi/fc/Kbuild
 delete mode 100644 include/uapi/sound/Kbuild
 delete mode 100644 include/uapi/video/Kbuild
 delete mode 100644 include/uapi/xen/Kbuild
 delete mode 100644 include/video/Kbuild

diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 37b525d329ae..b9f7ca4e62ae 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -44,10 +44,12 @@ This document describes the Linux kernel Makefiles.
 	   --- 6.11 Post-link pass
 
 	=== 7 Kbuild syntax for exported headers
-		--- 7.1 header-y
+		--- 7.1 no-export-headers
 		--- 7.2 genhdr-y
 		--- 7.3 generic-y
 		--- 7.4 generated-y
+		--- 7.5 mandatory-y
+		--- 7.6 subdir-y
 
 	=== 8 Kbuild Variables
 	=== 9 Makefile language
@@ -1235,7 +1237,7 @@ When kbuild executes, the following steps are followed (roughly):
 	that may be shared between individual architectures.
 	The recommended approach how to use a generic header file is
 	to list the file in the Kbuild file.
-	See "7.4 generic-y" for further info on syntax etc.
+	See "7.3 generic-y" for further info on syntax etc.
 
 --- 6.11 Post-link pass
 
@@ -1262,37 +1264,30 @@ The pre-processing does:
 - drop include of compiler.h
 - drop all sections that are kernel internal (guarded by ifdef __KERNEL__)
 
-Each relevant directory contains a file name "Kbuild" which specifies the
-headers to be exported.
-See subsequent chapter for the syntax of the Kbuild file.
-
-	--- 7.1 header-y
-
-	header-y specifies header files to be exported.
-
-		Example:
-			#include/linux/Kbuild
-			header-y += usb/
-			header-y += aio_abi.h
+All headers under include/uapi/, include/generated/uapi/,
+arch/<arch>/include/uapi/asm/ and arch/<arch>/include/generated/uapi/asm/
+are exported.
 
-	The convention is to list one file per line and
-	preferably in alphabetic order.
+A Kbuild file may be defined under arch/<arch>/include/uapi/asm/ and
+arch/<arch>/include/asm/ to list asm files coming from asm-generic.
+See subsequent chapter for the syntax of the Kbuild file.
 
-	header-y also specifies which subdirectories to visit.
-	A subdirectory is identified by a trailing '/' which
-	can be seen in the example above for the usb subdirectory.
+	--- 7.1 no-export-headers
 
-	Subdirectories are visited before their parent directories.
+	no-export-headers is essentially used by include/uapi/linux/Kbuild to
+	avoid exporting specific headers (e.g. kvm.h) on architectures that do
+	not support it. It should be avoided as much as possible.
 
 	--- 7.2 genhdr-y
 
-	genhdr-y specifies generated files to be exported.
-	Generated files are special as they need to be looked
-	up in another directory when doing 'make O=...' builds.
+	genhdr-y specifies asm files to be generated.
 
 		Example:
-			#include/linux/Kbuild
-			genhdr-y += version.h
+			#arch/x86/include/uapi/asm/Kbuild
+			genhdr-y += unistd_32.h
+			genhdr-y += unistd_64.h
+			genhdr-y += unistd_x32.h
+
 
 	--- 7.3 generic-y
 
@@ -1334,6 +1329,27 @@ See subsequent chapter for the syntax of the Kbuild file.
 			#arch/x86/include/asm/Kbuild
 			generated-y += syscalls_32.h
 
+	--- 7.5 mandatory-y
+
+	mandatory-y is essentially used by include/uapi/asm-generic/Kbuild.asm
+	to define the minimun set of headers that must be exported in
+	include/asm.
+
+	The convention is to list one subdir per line and
+	preferably in alphabetic order.
+
+	--- 7.6 subdir-y
+
+	subdir-y may be used to specify a subdirectory to be exported.
+
+		Example:
+			#arch/cris/include/uapi/asm/Kbuild
+			subdir-y += ../arch-v10/arch/
+			subdir-y += ../arch-v32/arch/
+
+	The convention is to list one subdir per line and
+	preferably in alphabetic order.
+
 === 8 Kbuild Variables
 
 The top Makefile exports the following variables:
diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
index d96f2ef5b639..b15bf6bc0e94 100644
--- a/arch/alpha/include/uapi/asm/Kbuild
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -1,43 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += a.out.h
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += compiler.h
-header-y += console.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += fpu.h
-header-y += gentrap.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += pal.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += reg.h
-header-y += regdef.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += sysinfo.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index f50d02df78d5..b15bf6bc0e94 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -1,5 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-header-y += elf.h
-header-y += page.h
-header-y += cachectl.h
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 46a76cd6acb6..607f702c2d62 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -1,23 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += auxvec.h
-header-y += byteorder.h
-header-y += fcntl.h
-header-y += hwcap.h
-header-y += ioctls.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += perf_regs.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += unistd.h
 genhdr-y += unistd-common.h
 genhdr-y += unistd-oabi.h
 genhdr-y += unistd-eabi.h
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
index 825b0fe51c2b..13a97aa2285f 100644
--- a/arch/arm64/include/uapi/asm/Kbuild
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -2,21 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += fcntl.h
-header-y += hwcap.h
-header-y += kvm_para.h
-header-y += perf_regs.h
-header-y += param.h
-header-y += ptrace.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += stat.h
-header-y += statfs.h
-header-y += ucontext.h
-header-y += unistd.h
diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
index 0bd28f77abc3..b15bf6bc0e94 100644
--- a/arch/blackfin/include/uapi/asm/Kbuild
+++ b/arch/blackfin/include/uapi/asm/Kbuild
@@ -1,19 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += bfin_sport.h
-header-y += byteorder.h
-header-y += cachectl.h
-header-y += fcntl.h
-header-y += fixed_code.h
-header-y += ioctls.h
-header-y += kvm_para.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += stat.h
-header-y += swab.h
-header-y += unistd.h
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index e9bc2b2b8147..13a97aa2285f 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -2,11 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
-
-header-y += byteorder.h
-header-y += kvm_para.h
-header-y += ptrace.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += swab.h
-header-y += unistd.h
diff --git a/arch/cris/include/uapi/arch-v10/arch/Kbuild b/arch/cris/include/uapi/arch-v10/arch/Kbuild
deleted file mode 100644
index 9048c87a782b..000000000000
--- a/arch/cris/include/uapi/arch-v10/arch/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-# UAPI Header export list
-header-y += sv_addr.agh
-header-y += sv_addr_ag.h
-header-y += svinto.h
-header-y += user.h
diff --git a/arch/cris/include/uapi/arch-v32/arch/Kbuild b/arch/cris/include/uapi/arch-v32/arch/Kbuild
deleted file mode 100644
index 59efffd16b61..000000000000
--- a/arch/cris/include/uapi/arch-v32/arch/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += cryptocop.h
-header-y += user.h
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index d5564a0ae66a..d0c5471856e0 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -1,44 +1,5 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += ../arch-v10/arch/
-header-y += ../arch-v32/arch/
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += elf.h
-header-y += elf_v10.h
-header-y += elf_v32.h
-header-y += errno.h
-header-y += ethernet.h
-header-y += etraxgpio.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += ptrace_v10.h
-header-y += ptrace_v32.h
-header-y += resource.h
-header-y += rs485.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += sync_serial.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
+subdir-y += ../arch-v10/arch/
+subdir-y += ../arch-v32/arch/
diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild
index 42a2b33461c0..b15bf6bc0e94 100644
--- a/arch/frv/include/uapi/asm/Kbuild
+++ b/arch/frv/include/uapi/asm/Kbuild
@@ -1,35 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += registers.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index fb6101a5d4f1..b15bf6bc0e94 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -1,30 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += siginfo.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index a2036bfda8af..6b45ef79eb8f 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -1,6 +1,3 @@
-
-header-y += ucontext.h
-
 generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bug.h
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index c31706c38631..b15bf6bc0e94 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -1,15 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += kvm_para.h
-header-y += param.h
-header-y += ptrace.h
-header-y += registers.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += swab.h
-header-y += unistd.h
-header-y += user.h
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
index 891002bbb995..13a97aa2285f 100644
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -2,48 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += break.h
-header-y += byteorder.h
-header-y += cmpxchg.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += fpu.h
-header-y += gcc_intrin.h
-header-y += ia64regs.h
-header-y += intel_intrin.h
-header-y += intrinsics.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += perfmon.h
-header-y += perfmon_default_smpl.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += ptrace_offsets.h
-header-y += resource.h
-header-y += rse.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
-header-y += ustack.h
diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
index 43937a61d6cf..b15bf6bc0e94 100644
--- a/arch/m32r/include/uapi/asm/Kbuild
+++ b/arch/m32r/include/uapi/asm/Kbuild
@@ -1,33 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 6a2d257bdfb2..64368077235a 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -9,27 +9,3 @@ generic-y += socket.h
 generic-y += sockios.h
 generic-y += termbits.h
 generic-y += termios.h
-
-header-y += a.out.h
-header-y += bootinfo.h
-header-y += bootinfo-amiga.h
-header-y += bootinfo-apollo.h
-header-y += bootinfo-atari.h
-header-y += bootinfo-hp300.h
-header-y += bootinfo-mac.h
-header-y += bootinfo-q40.h
-header-y += bootinfo-vme.h
-header-y += byteorder.h
-header-y += cachectl.h
-header-y += fcntl.h
-header-y += ioctls.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += stat.h
-header-y += swab.h
-header-y += unistd.h
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
index ab78be2b6eb0..b29731ebd7a9 100644
--- a/arch/metag/include/uapi/asm/Kbuild
+++ b/arch/metag/include/uapi/asm/Kbuild
@@ -1,14 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += byteorder.h
-header-y += ech.h
-header-y += ptrace.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += swab.h
-header-y += unistd.h
-
 generic-y += mman.h
 generic-y += resource.h
 generic-y += setup.h
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 1aac99f87df1..2178c78c7c1a 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -2,35 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += types.h
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += elf.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += unistd.h
diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild
index f2cf41461146..a0266feba9e6 100644
--- a/arch/mips/include/uapi/asm/Kbuild
+++ b/arch/mips/include/uapi/asm/Kbuild
@@ -2,40 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += ipcbuf.h
-
-header-y += auxvec.h
-header-y += bitfield.h
-header-y += bitsperlong.h
-header-y += break.h
-header-y += byteorder.h
-header-y += cachectl.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += inst.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += sgidefs.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += sysmips.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild
index 040178cdb3eb..b15bf6bc0e94 100644
--- a/arch/mn10300/include/uapi/asm/Kbuild
+++ b/arch/mn10300/include/uapi/asm/Kbuild
@@ -1,34 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index 69c965304146..374bd123329f 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -1,6 +1,5 @@
+# UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += elf.h
-
 generic-y += setup.h
 generic-y += ucontext.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index df8e2f7bc7dd..fdbcf0bf44a4 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -1,6 +1,3 @@
-
-header-y += ucontext.h
-
 generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bitsperlong.h
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index 80761eb82b5f..b15bf6bc0e94 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -1,10 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += byteorder.h
-header-y += elf.h
-header-y += kvm_para.h
-header-y += param.h
-header-y += ptrace.h
-header-y += sigcontext.h
-header-y += unistd.h
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index 348356c99514..3971c60a7e7f 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -2,31 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += resource.h
-
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += pdc.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index dab3717e3ea0..b15bf6bc0e94 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,47 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += bootx.h
-header-y += byteorder.h
-header-y += cputable.h
-header-y += eeh.h
-header-y += elf.h
-header-y += epapr_hcalls.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += nvram.h
-header-y += opal-prd.h
-header-y += param.h
-header-y += perf_event.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ps3fb.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += spu_info.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += tm.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index addb09cee0f5..ca62066895e0 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -10,49 +10,3 @@ generic-y += poll.h
 generic-y += resource.h
 generic-y += sockios.h
 generic-y += termbits.h
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += chpid.h
-header-y += chsc.h
-header-y += clp.h
-header-y += cmb.h
-header-y += dasd.h
-header-y += debug.h
-header-y += errno.h
-header-y += guarded_storage.h
-header-y += hypfs.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm.h
-header-y += kvm_para.h
-header-y += kvm_perf.h
-header-y += kvm_virtio.h
-header-y += monwriter.h
-header-y += msgbuf.h
-header-y += pkey.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += qeth.h
-header-y += schid.h
-header-y += sclp_ctl.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sie.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += tape390.h
-header-y += termios.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
-header-y += virtio-ccw.h
-header-y += vtoc.h
-header-y += zcrypt.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index e3a8d0f96652..54b3b2039af1 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -1,6 +1,3 @@
-
-header-y +=
-
 generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += current.h
diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild
index 040178cdb3eb..b15bf6bc0e94 100644
--- a/arch/score/include/uapi/asm/Kbuild
+++ b/arch/score/include/uapi/asm/Kbuild
@@ -1,34 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index 60613ae78513..b15bf6bc0e94 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -1,25 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += byteorder.h
-header-y += cachectl.h
-header-y += cpu-features.h
-header-y += hw_breakpoint.h
-header-y += ioctls.h
-header-y += posix_types.h
-header-y += posix_types_32.h
-header-y += posix_types_64.h
-header-y += ptrace.h
-header-y += ptrace_32.h
-header-y += ptrace_64.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += sockios.h
-header-y += stat.h
-header-y += swab.h
-header-y += types.h
-header-y += unistd.h
-header-y += unistd_32.h
-header-y += unistd_64.h
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
index b5843ee09fb5..b15bf6bc0e94 100644
--- a/arch/sparc/include/uapi/asm/Kbuild
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -1,50 +1,2 @@
 # UAPI Header export list
-# User exported sparc header files
-
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += apc.h
-header-y += asi.h
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += display7seg.h
-header-y += envctrl.h
-header-y += errno.h
-header-y += fbio.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += jsflash.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += openpromio.h
-header-y += param.h
-header-y += perfctr.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += psr.h
-header-y += psrcompat.h
-header-y += pstate.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += traps.h
-header-y += uctx.h
-header-y += unistd.h
-header-y += utrap.h
-header-y += watchdog.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 24c44e93804d..16f0b08c8ce9 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -1,6 +1,3 @@
-
-header-y += ../arch/
-
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild
deleted file mode 100644
index 97dfbecec6b6..000000000000
--- a/arch/tile/include/uapi/arch/Kbuild
+++ /dev/null
@@ -1,17 +0,0 @@
-# UAPI Header export list
-header-y += abi.h
-header-y += chip.h
-header-y += chip_tilegx.h
-header-y += chip_tilepro.h
-header-y += icache.h
-header-y += interrupts.h
-header-y += interrupts_32.h
-header-y += interrupts_64.h
-header-y += opcode.h
-header-y += opcode_tilegx.h
-header-y += opcode_tilepro.h
-header-y += sim.h
-header-y += sim_def.h
-header-y += spr_def.h
-header-y += spr_def_32.h
-header-y += spr_def_64.h
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
index c20db8e428bf..e0a50111e07f 100644
--- a/arch/tile/include/uapi/asm/Kbuild
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -1,21 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += cachectl.h
-header-y += hardwall.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += ptrace.h
-header-y += setup.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += stat.h
-header-y += swab.h
-header-y += ucontext.h
-header-y += unistd.h
-
 generic-y += ucontext.h
+
+subdir-y += ../arch
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 0514d7ad6855..13a97aa2285f 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -1,10 +1,4 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += byteorder.h
-header-y += kvm_para.h
-header-y += ptrace.h
-header-y += sigcontext.h
-header-y += unistd.h
-
 generic-y += kvm_para.h
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index 1c532b3f18ea..83b6e9a0dce4 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -4,61 +4,3 @@ include include/uapi/asm-generic/Kbuild.asm
 genhdr-y += unistd_32.h
 genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
-header-y += a.out.h
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += boot.h
-header-y += bootparam.h
-header-y += byteorder.h
-header-y += debugreg.h
-header-y += e820.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += hw_breakpoint.h
-header-y += hyperv.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += ist.h
-header-y += kvm.h
-header-y += kvm_para.h
-header-y += kvm_perf.h
-header-y += ldt.h
-header-y += mce.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += msr.h
-header-y += mtrr.h
-header-y += param.h
-header-y += perf_regs.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += posix_types_32.h
-header-y += posix_types_64.h
-header-y += posix_types_x32.h
-header-y += prctl.h
-header-y += processor-flags.h
-header-y += ptrace-abi.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += sigcontext32.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += svm.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
-header-y += vm86.h
-header-y += vmx.h
-header-y += vsyscall.h
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index 56aad54e7fb7..b15bf6bc0e94 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -1,25 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += byteorder.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += swab.h
-header-y += termbits.h
-header-y += types.h
-header-y += unistd.h
diff --git a/include/Kbuild b/include/Kbuild
deleted file mode 100644
index bab1145bc7a7..000000000000
--- a/include/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# Top-level Makefile calls into asm-$(ARCH)
-# List only non-arch directories below
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
deleted file mode 100644
index d2ee86b4c091..000000000000
--- a/include/asm-generic/Kbuild.asm
+++ /dev/null
@@ -1 +0,0 @@
-include include/uapi/asm-generic/Kbuild.asm
diff --git a/include/scsi/fc/Kbuild b/include/scsi/fc/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/uapi/Kbuild b/include/uapi/Kbuild
deleted file mode 100644
index 245aa6e05e6a..000000000000
--- a/include/uapi/Kbuild
+++ /dev/null
@@ -1,15 +0,0 @@
-# UAPI Header export list
-# Top-level Makefile calls into asm-$(ARCH)
-# List only non-arch directories below
-
-
-header-y += asm-generic/
-header-y += linux/
-header-y += sound/
-header-y += mtd/
-header-y += rdma/
-header-y += video/
-header-y += drm/
-header-y += xen/
-header-y += scsi/
-header-y += misc/
diff --git a/include/uapi/asm-generic/Kbuild b/include/uapi/asm-generic/Kbuild
deleted file mode 100644
index b73de7bb7a62..000000000000
--- a/include/uapi/asm-generic/Kbuild
+++ /dev/null
@@ -1,36 +0,0 @@
-# UAPI Header export list
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += errno-base.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += int-l64.h
-header-y += int-ll64.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman-common.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += shmparam.h
-header-y += siginfo.h
-header-y += signal-defs.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
diff --git a/include/uapi/asm-generic/Kbuild.asm b/include/uapi/asm-generic/Kbuild.asm
index fcd50b759217..21381449d98a 100644
--- a/include/uapi/asm-generic/Kbuild.asm
+++ b/include/uapi/asm-generic/Kbuild.asm
@@ -1,49 +1,33 @@
-#
-# Headers that are optional in usr/include/asm/
-#
-opt-header += kvm.h
-opt-header += kvm_para.h
-opt-header += a.out.h
-
 #
 # Headers that are mandatory in usr/include/asm/
 #
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += byteorder.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += param.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += unistd.h
-
-header-y += $(foreach hdr,$(opt-header), \
-	      $(if \
-		$(wildcard \
-			$(srctree)/arch/$(SRCARCH)/include/uapi/asm/$(hdr) \
-			$(srctree)/arch/$(SRCARCH)/include/asm/$(hdr) \
-		), \
-		$(hdr) \
-		))
+mandatory-y += auxvec.h
+mandatory-y += bitsperlong.h
+mandatory-y += byteorder.h
+mandatory-y += errno.h
+mandatory-y += fcntl.h
+mandatory-y += ioctl.h
+mandatory-y += ioctls.h
+mandatory-y += ipcbuf.h
+mandatory-y += mman.h
+mandatory-y += msgbuf.h
+mandatory-y += param.h
+mandatory-y += poll.h
+mandatory-y += posix_types.h
+mandatory-y += ptrace.h
+mandatory-y += resource.h
+mandatory-y += sembuf.h
+mandatory-y += setup.h
+mandatory-y += shmbuf.h
+mandatory-y += sigcontext.h
+mandatory-y += siginfo.h
+mandatory-y += signal.h
+mandatory-y += socket.h
+mandatory-y += sockios.h
+mandatory-y += stat.h
+mandatory-y += statfs.h
+mandatory-y += swab.h
+mandatory-y += termbits.h
+mandatory-y += termios.h
+mandatory-y += types.h
+mandatory-y += unistd.h
diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
deleted file mode 100644
index c97addd08f8c..000000000000
--- a/include/uapi/drm/Kbuild
+++ /dev/null
@@ -1,23 +0,0 @@
-# UAPI Header export list
-header-y += drm.h
-header-y += drm_fourcc.h
-header-y += drm_mode.h
-header-y += drm_sarea.h
-header-y += amdgpu_drm.h
-header-y += exynos_drm.h
-header-y += i810_drm.h
-header-y += i915_drm.h
-header-y += mga_drm.h
-header-y += nouveau_drm.h
-header-y += omap_drm.h
-header-y += qxl_drm.h
-header-y += r128_drm.h
-header-y += radeon_drm.h
-header-y += savage_drm.h
-header-y += sis_drm.h
-header-y += tegra_drm.h
-header-y += via_drm.h
-header-y += vmwgfx_drm.h
-header-y += msm_drm.h
-header-y += vc4_drm.h
-header-y += virtgpu_drm.h
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 662c592b74dd..ca2787d9bf0f 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -1,495 +1,13 @@
 # UAPI Header export list
-header-y += android/
-header-y += byteorder/
-header-y += can/
-header-y += caif/
-header-y += dvb/
-header-y += hdlc/
-header-y += hsi/
-header-y += iio/
-header-y += isdn/
-header-y += mmc/
-header-y += nfsd/
-header-y += raid/
-header-y += spi/
-header-y += sunrpc/
-header-y += tc_act/
-header-y += tc_ematch/
-header-y += netfilter/
-header-y += netfilter_arp/
-header-y += netfilter_bridge/
-header-y += netfilter_ipv4/
-header-y += netfilter_ipv6/
-header-y += usb/
-header-y += wimax/
 
-genhdr-y += version.h
-
-ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/a.out.h \
-		  $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h),)
-header-y += a.out.h
+ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/a.out.h),)
+no-export-headers += a.out.h
 endif
 
-header-y += acct.h
-header-y += adb.h
-header-y += adfs_fs.h
-header-y += affs_hardblocks.h
-header-y += agpgart.h
-header-y += aio_abi.h
-header-y += am437x-vpfe.h
-header-y += apm_bios.h
-header-y += arcfb.h
-header-y += atalk.h
-header-y += atmapi.h
-header-y += atmarp.h
-header-y += atmbr2684.h
-header-y += atmclip.h
-header-y += atmdev.h
-header-y += atm_eni.h
-header-y += atm.h
-header-y += atm_he.h
-header-y += atm_idt77105.h
-header-y += atmioc.h
-header-y += atmlec.h
-header-y += atmmpc.h
-header-y += atm_nicstar.h
-header-y += atmppp.h
-header-y += atmsap.h
-header-y += atmsvc.h
-header-y += atm_tcp.h
-header-y += atm_zatm.h
-header-y += audit.h
-header-y += auto_fs4.h
-header-y += auto_fs.h
-header-y += auxvec.h
-header-y += ax25.h
-header-y += b1lli.h
-header-y += batman_adv.h
-header-y += baycom.h
-header-y += bcm933xx_hcs.h
-header-y += bfs_fs.h
-header-y += binfmts.h
-header-y += blkpg.h
-header-y += blktrace_api.h
-header-y += blkzoned.h
-header-y += bpf_common.h
-header-y += bpf_perf_event.h
-header-y += bpf.h
-header-y += bpqether.h
-header-y += bsg.h
-header-y += bt-bmc.h
-header-y += btrfs.h
-header-y += can.h
-header-y += capability.h
-header-y += capi.h
-header-y += cciss_defs.h
-header-y += cciss_ioctl.h
-header-y += cdrom.h
-header-y += cec.h
-header-y += cec-funcs.h
-header-y += cgroupstats.h
-header-y += chio.h
-header-y += cm4000_cs.h
-header-y += cn_proc.h
-header-y += coda.h
-header-y += coda_psdev.h
-header-y += coff.h
-header-y += connector.h
-header-y += const.h
-header-y += cramfs_fs.h
-header-y += cuda.h
-header-y += cyclades.h
-header-y += cycx_cfm.h
-header-y += dcbnl.h
-header-y += dccp.h
-header-y += devlink.h
-header-y += dlmconstants.h
-header-y += dlm_device.h
-header-y += dlm.h
-header-y += dlm_netlink.h
-header-y += dlm_plock.h
-header-y += dm-ioctl.h
-header-y += dm-log-userspace.h
-header-y += dma-buf.h
-header-y += dn.h
-header-y += dqblk_xfs.h
-header-y += edd.h
-header-y += efs_fs_sb.h
-header-y += elfcore.h
-header-y += elf-em.h
-header-y += elf-fdpic.h
-header-y += elf.h
-header-y += errno.h
-header-y += errqueue.h
-header-y += ethtool.h
-header-y += eventpoll.h
-header-y += fadvise.h
-header-y += falloc.h
-header-y += fanotify.h
-header-y += fb.h
-header-y += fcntl.h
-header-y += fd.h
-header-y += fdreg.h
-header-y += fib_rules.h
-header-y += fiemap.h
-header-y += filter.h
-header-y += firewire-cdev.h
-header-y += firewire-constants.h
-header-y += flat.h
-header-y += fou.h
-header-y += fs.h
-header-y += fsl_hypervisor.h
-header-y += fuse.h
-header-y += futex.h
-header-y += gameport.h
-header-y += genetlink.h
-header-y += gen_stats.h
-header-y += gfs2_ondisk.h
-header-y += gigaset_dev.h
-header-y += gpio.h
-header-y += gsmmux.h
-header-y += gtp.h
-header-y += hdlcdrv.h
-header-y += hdlc.h
-header-y += hdreg.h
-header-y += hiddev.h
-header-y += hid.h
-header-y += hidraw.h
-header-y += hpet.h
-header-y += hsr_netlink.h
-header-y += hyperv.h
-header-y += hysdn_if.h
-header-y += i2c-dev.h
-header-y += i2c.h
-header-y += i2o-dev.h
-header-y += i8k.h
-header-y += icmp.h
-header-y += icmpv6.h
-header-y += if_addr.h
-header-y += if_addrlabel.h
-header-y += if_alg.h
-header-y += if_arcnet.h
-header-y += if_arp.h
-header-y += if_bonding.h
-header-y += if_bridge.h
-header-y += if_cablemodem.h
-header-y += if_eql.h
-header-y += if_ether.h
-header-y += if_fc.h
-header-y += if_fddi.h
-header-y += if_frad.h
-header-y += if.h
-header-y += if_hippi.h
-header-y += if_infiniband.h
-header-y += if_link.h
-header-y += if_ltalk.h
-header-y += if_macsec.h
-header-y += if_packet.h
-header-y += if_phonet.h
-header-y += if_plip.h
-header-y += if_ppp.h
-header-y += if_pppol2tp.h
-header-y += if_pppox.h
-header-y += if_slip.h
-header-y += if_team.h
-header-y += if_tun.h
-header-y += if_tunnel.h
-header-y += if_vlan.h
-header-y += if_x25.h
-header-y += ife.h
-header-y += igmp.h
-header-y += ila.h
-header-y += in6.h
-header-y += inet_diag.h
-header-y += in.h
-header-y += inotify.h
-header-y += input.h
-header-y += input-event-codes.h
-header-y += in_route.h
-header-y += ioctl.h
-header-y += ip6_tunnel.h
-header-y += ipc.h
-header-y += ip.h
-header-y += ipmi.h
-header-y += ipmi_msgdefs.h
-header-y += ipsec.h
-header-y += ipv6.h
-header-y += ipv6_route.h
-header-y += ip_vs.h
-header-y += ipx.h
-header-y += irda.h
-header-y += irqnr.h
-header-y += isdn_divertif.h
-header-y += isdn.h
-header-y += isdnif.h
-header-y += isdn_ppp.h
-header-y += iso_fs.h
-header-y += ivtvfb.h
-header-y += ivtv.h
-header-y += ixjuser.h
-header-y += jffs2.h
-header-y += joystick.h
-header-y += kcmp.h
-header-y += kdev_t.h
-header-y += kd.h
-header-y += kernelcapi.h
-header-y += kernel.h
-header-y += kernel-page-flags.h
-header-y += kexec.h
-header-y += keyboard.h
-header-y += keyctl.h
-
-ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm.h \
-		  $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h),)
-header-y += kvm.h
+ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm.h),)
+no-export-headers += kvm.h
 endif
 
-
-ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h \
-		  $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h),)
-header-y += kvm_para.h
+ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h),)
+no-export-headers += kvm_para.h
 endif
-
-header-y += hw_breakpoint.h
-header-y += l2tp.h
-header-y += libc-compat.h
-header-y += lirc.h
-header-y += limits.h
-header-y += llc.h
-header-y += loop.h
-header-y += lp.h
-header-y += lwtunnel.h
-header-y += magic.h
-header-y += major.h
-header-y += map_to_7segment.h
-header-y += matroxfb.h
-header-y += mdio.h
-header-y += media.h
-header-y += media-bus-format.h
-header-y += mei.h
-header-y += membarrier.h
-header-y += memfd.h
-header-y += mempolicy.h
-header-y += meye.h
-header-y += mic_common.h
-header-y += mic_ioctl.h
-header-y += mii.h
-header-y += minix_fs.h
-header-y += mman.h
-header-y += mmtimer.h
-header-y += mpls.h
-header-y += mpls_iptunnel.h
-header-y += mqueue.h
-header-y += mroute6.h
-header-y += mroute.h
-header-y += msdos_fs.h
-header-y += msg.h
-header-y += mtio.h
-header-y += nbd.h
-header-y += ncp_fs.h
-header-y += ncp.h
-header-y += ncp_mount.h
-header-y += ncp_no.h
-header-y += ndctl.h
-header-y += neighbour.h
-header-y += netconf.h
-header-y += netdevice.h
-header-y += net_dropmon.h
-header-y += netfilter_arp.h
-header-y += netfilter_bridge.h
-header-y += netfilter_decnet.h
-header-y += netfilter.h
-header-y += netfilter_ipv4.h
-header-y += netfilter_ipv6.h
-header-y += net.h
-header-y += netlink_diag.h
-header-y += netlink.h
-header-y += netrom.h
-header-y += net_namespace.h
-header-y += net_tstamp.h
-header-y += nfc.h
-header-y += psample.h
-header-y += nfs2.h
-header-y += nfs3.h
-header-y += nfs4.h
-header-y += nfs4_mount.h
-header-y += nfsacl.h
-header-y += nfs_fs.h
-header-y += nfs.h
-header-y += nfs_idmap.h
-header-y += nfs_mount.h
-header-y += nl80211.h
-header-y += n_r3964.h
-header-y += nubus.h
-header-y += nvme_ioctl.h
-header-y += nvram.h
-header-y += omap3isp.h
-header-y += omapfb.h
-header-y += oom.h
-header-y += openvswitch.h
-header-y += packet_diag.h
-header-y += param.h
-header-y += parport.h
-header-y += patchkey.h
-header-y += pci.h
-header-y += pci_regs.h
-header-y += pcitest.h
-header-y += perf_event.h
-header-y += personality.h
-header-y += pfkeyv2.h
-header-y += pg.h
-header-y += phantom.h
-header-y += phonet.h
-header-y += pktcdvd.h
-header-y += pkt_cls.h
-header-y += pkt_sched.h
-header-y += pmu.h
-header-y += poll.h
-header-y += posix_acl.h
-header-y += posix_acl_xattr.h
-header-y += posix_types.h
-header-y += ppdev.h
-header-y += ppp-comp.h
-header-y += ppp_defs.h
-header-y += ppp-ioctl.h
-header-y += pps.h
-header-y += prctl.h
-header-y += psci.h
-header-y += ptp_clock.h
-header-y += ptrace.h
-header-y += qnx4_fs.h
-header-y += qnxtypes.h
-header-y += quota.h
-header-y += radeonfb.h
-header-y += random.h
-header-y += raw.h
-header-y += rds.h
-header-y += reboot.h
-header-y += reiserfs_fs.h
-header-y += reiserfs_xattr.h
-header-y += resource.h
-header-y += rfkill.h
-header-y += rio_cm_cdev.h
-header-y += rio_mport_cdev.h
-header-y += romfs_fs.h
-header-y += rose.h
-header-y += route.h
-header-y += rtc.h
-header-y += rtnetlink.h
-header-y += scc.h
-header-y += sched.h
-header-y += scif_ioctl.h
-header-y += screen_info.h
-header-y += sctp.h
-header-y += sdla.h
-header-y += seccomp.h
-header-y += securebits.h
-header-y += seg6_genl.h
-header-y += seg6.h
-header-y += seg6_hmac.h
-header-y += seg6_iptunnel.h
-header-y += selinux_netlink.h
-header-y += sem.h
-header-y += serial_core.h
-header-y += serial.h
-header-y += serial_reg.h
-header-y += serio.h
-header-y += shm.h
-header-y += signalfd.h
-header-y += signal.h
-header-y += smiapp.h
-header-y += snmp.h
-header-y += sock_diag.h
-header-y += socket.h
-header-y += sockios.h
-header-y += sonet.h
-header-y += sonypi.h
-header-y += soundcard.h
-header-y += sound.h
-header-y += stat.h
-header-y += stddef.h
-header-y += string.h
-header-y += suspend_ioctls.h
-header-y += swab.h
-header-y += synclink.h
-header-y += sync_file.h
-header-y += sysctl.h
-header-y += sysinfo.h
-header-y += target_core_user.h
-header-y += taskstats.h
-header-y += tcp.h
-header-y += tcp_metrics.h
-header-y += telephony.h
-header-y += termios.h
-header-y += thermal.h
-header-y += time.h
-header-y += timerfd.h
-header-y += times.h
-header-y += timex.h
-header-y += tiocl.h
-header-y += tipc_config.h
-header-y += tipc_netlink.h
-header-y += tipc.h
-header-y += toshiba.h
-header-y += tty_flags.h
-header-y += tty.h
-header-y += types.h
-header-y += udf_fs_i.h
-header-y += udp.h
-header-y += uhid.h
-header-y += uinput.h
-header-y += uio.h
-header-y += uleds.h
-header-y += ultrasound.h
-header-y += un.h
-header-y += unistd.h
-header-y += unix_diag.h
-header-y += usbdevice_fs.h
-header-y += usbip.h
-header-y += userio.h
-header-y += utime.h
-header-y += utsname.h
-header-y += uuid.h
-header-y += uvcvideo.h
-header-y += v4l2-common.h
-header-y += v4l2-controls.h
-header-y += v4l2-dv-timings.h
-header-y += v4l2-mediabus.h
-header-y += v4l2-subdev.h
-header-y += veth.h
-header-y += vfio.h
-header-y += vhost.h
-header-y += videodev2.h
-header-y += virtio_9p.h
-header-y += virtio_balloon.h
-header-y += virtio_blk.h
-header-y += virtio_config.h
-header-y += virtio_console.h
-header-y += virtio_gpu.h
-header-y += virtio_ids.h
-header-y += virtio_input.h
-header-y += virtio_mmio.h
-header-y += virtio_net.h
-header-y += virtio_pci.h
-header-y += virtio_ring.h
-header-y += virtio_rng.h
-header-y += virtio_scsi.h
-header-y += virtio_types.h
-header-y += virtio_vsock.h
-header-y += virtio_crypto.h
-header-y += vm_sockets.h
-header-y += vsockmon.h
-header-y += vt.h
-header-y += vtpm_proxy.h
-header-y += wait.h
-header-y += wanrouter.h
-header-y += watchdog.h
-header-y += wimax.h
-header-y += wireless.h
-header-y += x25.h
-header-y += xattr.h
-header-y += xfrm.h
-header-y += xilinx-v4l2-controls.h
-header-y += zorro.h
-header-y += zorro_ids.h
-header-y += userfaultfd.h
diff --git a/include/uapi/linux/android/Kbuild b/include/uapi/linux/android/Kbuild
deleted file mode 100644
index ca011eec252a..000000000000
--- a/include/uapi/linux/android/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += binder.h
diff --git a/include/uapi/linux/byteorder/Kbuild b/include/uapi/linux/byteorder/Kbuild
deleted file mode 100644
index 619225b9ff2e..000000000000
--- a/include/uapi/linux/byteorder/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += big_endian.h
-header-y += little_endian.h
diff --git a/include/uapi/linux/caif/Kbuild b/include/uapi/linux/caif/Kbuild
deleted file mode 100644
index 43396612d3a3..000000000000
--- a/include/uapi/linux/caif/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += caif_socket.h
-header-y += if_caif.h
diff --git a/include/uapi/linux/can/Kbuild b/include/uapi/linux/can/Kbuild
deleted file mode 100644
index 21c91bf25a29..000000000000
--- a/include/uapi/linux/can/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-# UAPI Header export list
-header-y += bcm.h
-header-y += error.h
-header-y += gw.h
-header-y += netlink.h
-header-y += raw.h
diff --git a/include/uapi/linux/dvb/Kbuild b/include/uapi/linux/dvb/Kbuild
deleted file mode 100644
index d40942cfc627..000000000000
--- a/include/uapi/linux/dvb/Kbuild
+++ /dev/null
@@ -1,9 +0,0 @@
-# UAPI Header export list
-header-y += audio.h
-header-y += ca.h
-header-y += dmx.h
-header-y += frontend.h
-header-y += net.h
-header-y += osd.h
-header-y += version.h
-header-y += video.h
diff --git a/include/uapi/linux/hdlc/Kbuild b/include/uapi/linux/hdlc/Kbuild
deleted file mode 100644
index 8c1d2cb75e33..000000000000
--- a/include/uapi/linux/hdlc/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += ioctl.h
diff --git a/include/uapi/linux/hsi/Kbuild b/include/uapi/linux/hsi/Kbuild
deleted file mode 100644
index a16a00544258..000000000000
--- a/include/uapi/linux/hsi/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += hsi_char.h cs-protocol.h
diff --git a/include/uapi/linux/iio/Kbuild b/include/uapi/linux/iio/Kbuild
deleted file mode 100644
index 86f76d84c44f..000000000000
--- a/include/uapi/linux/iio/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += events.h
-header-y += types.h
diff --git a/include/uapi/linux/isdn/Kbuild b/include/uapi/linux/isdn/Kbuild
deleted file mode 100644
index 89e52850bf29..000000000000
--- a/include/uapi/linux/isdn/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += capicmd.h
diff --git a/include/uapi/linux/mmc/Kbuild b/include/uapi/linux/mmc/Kbuild
deleted file mode 100644
index 8c1d2cb75e33..000000000000
--- a/include/uapi/linux/mmc/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += ioctl.h
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
deleted file mode 100644
index 03f194aeadc5..000000000000
--- a/include/uapi/linux/netfilter/Kbuild
+++ /dev/null
@@ -1,89 +0,0 @@
-# UAPI Header export list
-header-y += ipset/
-header-y += nf_conntrack_common.h
-header-y += nf_conntrack_ftp.h
-header-y += nf_conntrack_sctp.h
-header-y += nf_conntrack_tcp.h
-header-y += nf_conntrack_tuple_common.h
-header-y += nf_log.h
-header-y += nf_tables.h
-header-y += nf_tables_compat.h
-header-y += nf_nat.h
-header-y += nfnetlink.h
-header-y += nfnetlink_acct.h
-header-y += nfnetlink_compat.h
-header-y += nfnetlink_conntrack.h
-header-y += nfnetlink_cthelper.h
-header-y += nfnetlink_cttimeout.h
-header-y += nfnetlink_log.h
-header-y += nfnetlink_queue.h
-header-y += x_tables.h
-header-y += xt_AUDIT.h
-header-y += xt_CHECKSUM.h
-header-y += xt_CLASSIFY.h
-header-y += xt_CONNMARK.h
-header-y += xt_CONNSECMARK.h
-header-y += xt_CT.h
-header-y += xt_DSCP.h
-header-y += xt_HMARK.h
-header-y += xt_IDLETIMER.h
-header-y += xt_LED.h
-header-y += xt_LOG.h
-header-y += xt_MARK.h
-header-y += xt_NFLOG.h
-header-y += xt_NFQUEUE.h
-header-y += xt_RATEEST.h
-header-y += xt_SECMARK.h
-header-y += xt_SYNPROXY.h
-header-y += xt_TCPMSS.h
-header-y += xt_TCPOPTSTRIP.h
-header-y += xt_TEE.h
-header-y += xt_TPROXY.h
-header-y += xt_addrtype.h
-header-y += xt_bpf.h
-header-y += xt_cgroup.h
-header-y += xt_cluster.h
-header-y += xt_comment.h
-header-y += xt_connbytes.h
-header-y += xt_connlabel.h
-header-y += xt_connlimit.h
-header-y += xt_connmark.h
-header-y += xt_conntrack.h
-header-y += xt_cpu.h
-header-y += xt_dccp.h
-header-y += xt_devgroup.h
-header-y += xt_dscp.h
-header-y += xt_ecn.h
-header-y += xt_esp.h
-header-y += xt_hashlimit.h
-header-y += xt_helper.h
-header-y += xt_ipcomp.h
-header-y += xt_iprange.h
-header-y += xt_ipvs.h
-header-y += xt_l2tp.h
-header-y += xt_length.h
-header-y += xt_limit.h
-header-y += xt_mac.h
-header-y += xt_mark.h
-header-y += xt_multiport.h
-header-y += xt_nfacct.h
-header-y += xt_osf.h
-header-y += xt_owner.h
-header-y += xt_physdev.h
-header-y += xt_pkttype.h
-header-y += xt_policy.h
-header-y += xt_quota.h
-header-y += xt_rateest.h
-header-y += xt_realm.h
-header-y += xt_recent.h
-header-y += xt_rpfilter.h
-header-y += xt_sctp.h
-header-y += xt_set.h
-header-y += xt_socket.h
-header-y += xt_state.h
-header-y += xt_statistic.h
-header-y += xt_string.h
-header-y += xt_tcpmss.h
-header-y += xt_tcpudp.h
-header-y += xt_time.h
-header-y += xt_u32.h
diff --git a/include/uapi/linux/netfilter/ipset/Kbuild b/include/uapi/linux/netfilter/ipset/Kbuild
deleted file mode 100644
index d2680423d9ab..000000000000
--- a/include/uapi/linux/netfilter/ipset/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-# UAPI Header export list
-header-y += ip_set.h
-header-y += ip_set_bitmap.h
-header-y += ip_set_hash.h
-header-y += ip_set_list.h
diff --git a/include/uapi/linux/netfilter_arp/Kbuild b/include/uapi/linux/netfilter_arp/Kbuild
deleted file mode 100644
index 62d5637cc0ac..000000000000
--- a/include/uapi/linux/netfilter_arp/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += arp_tables.h
-header-y += arpt_mangle.h
diff --git a/include/uapi/linux/netfilter_bridge/Kbuild b/include/uapi/linux/netfilter_bridge/Kbuild
deleted file mode 100644
index 0fbad8ef96de..000000000000
--- a/include/uapi/linux/netfilter_bridge/Kbuild
+++ /dev/null
@@ -1,18 +0,0 @@
-# UAPI Header export list
-header-y += ebt_802_3.h
-header-y += ebt_among.h
-header-y += ebt_arp.h
-header-y += ebt_arpreply.h
-header-y += ebt_ip.h
-header-y += ebt_ip6.h
-header-y += ebt_limit.h
-header-y += ebt_log.h
-header-y += ebt_mark_m.h
-header-y += ebt_mark_t.h
-header-y += ebt_nat.h
-header-y += ebt_nflog.h
-header-y += ebt_pkttype.h
-header-y += ebt_redirect.h
-header-y += ebt_stp.h
-header-y += ebt_vlan.h
-header-y += ebtables.h
diff --git a/include/uapi/linux/netfilter_ipv4/Kbuild b/include/uapi/linux/netfilter_ipv4/Kbuild
deleted file mode 100644
index ecb291df390e..000000000000
--- a/include/uapi/linux/netfilter_ipv4/Kbuild
+++ /dev/null
@@ -1,10 +0,0 @@
-# UAPI Header export list
-header-y += ip_tables.h
-header-y += ipt_CLUSTERIP.h
-header-y += ipt_ECN.h
-header-y += ipt_LOG.h
-header-y += ipt_REJECT.h
-header-y += ipt_TTL.h
-header-y += ipt_ah.h
-header-y += ipt_ecn.h
-header-y += ipt_ttl.h
diff --git a/include/uapi/linux/netfilter_ipv6/Kbuild b/include/uapi/linux/netfilter_ipv6/Kbuild
deleted file mode 100644
index 75a668ca2353..000000000000
--- a/include/uapi/linux/netfilter_ipv6/Kbuild
+++ /dev/null
@@ -1,13 +0,0 @@
-# UAPI Header export list
-header-y += ip6_tables.h
-header-y += ip6t_HL.h
-header-y += ip6t_LOG.h
-header-y += ip6t_NPT.h
-header-y += ip6t_REJECT.h
-header-y += ip6t_ah.h
-header-y += ip6t_frag.h
-header-y += ip6t_hl.h
-header-y += ip6t_ipv6header.h
-header-y += ip6t_mh.h
-header-y += ip6t_opts.h
-header-y += ip6t_rt.h
diff --git a/include/uapi/linux/nfsd/Kbuild b/include/uapi/linux/nfsd/Kbuild
deleted file mode 100644
index c11bc404053c..000000000000
--- a/include/uapi/linux/nfsd/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-# UAPI Header export list
-header-y += cld.h
-header-y += debug.h
-header-y += export.h
-header-y += nfsfh.h
-header-y += stats.h
diff --git a/include/uapi/linux/raid/Kbuild b/include/uapi/linux/raid/Kbuild
deleted file mode 100644
index e2c3d25405d7..000000000000
--- a/include/uapi/linux/raid/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += md_p.h
-header-y += md_u.h
diff --git a/include/uapi/linux/spi/Kbuild b/include/uapi/linux/spi/Kbuild
deleted file mode 100644
index 0cc747eff165..000000000000
--- a/include/uapi/linux/spi/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += spidev.h
diff --git a/include/uapi/linux/sunrpc/Kbuild b/include/uapi/linux/sunrpc/Kbuild
deleted file mode 100644
index 8e02e47c20fb..000000000000
--- a/include/uapi/linux/sunrpc/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += debug.h
diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild
deleted file mode 100644
index ba62ddf0e58a..000000000000
--- a/include/uapi/linux/tc_act/Kbuild
+++ /dev/null
@@ -1,16 +0,0 @@
-# UAPI Header export list
-header-y += tc_csum.h
-header-y += tc_defact.h
-header-y += tc_gact.h
-header-y += tc_ipt.h
-header-y += tc_mirred.h
-header-y += tc_sample.h
-header-y += tc_nat.h
-header-y += tc_pedit.h
-header-y += tc_skbedit.h
-header-y += tc_vlan.h
-header-y += tc_bpf.h
-header-y += tc_connmark.h
-header-y += tc_ife.h
-header-y += tc_tunnel_key.h
-header-y += tc_skbmod.h
diff --git a/include/uapi/linux/tc_ematch/Kbuild b/include/uapi/linux/tc_ematch/Kbuild
deleted file mode 100644
index 53fca3925535..000000000000
--- a/include/uapi/linux/tc_ematch/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-# UAPI Header export list
-header-y += tc_em_cmp.h
-header-y += tc_em_meta.h
-header-y += tc_em_nbyte.h
-header-y += tc_em_text.h
diff --git a/include/uapi/linux/usb/Kbuild b/include/uapi/linux/usb/Kbuild
deleted file mode 100644
index 4cc4d6e7e523..000000000000
--- a/include/uapi/linux/usb/Kbuild
+++ /dev/null
@@ -1,12 +0,0 @@
-# UAPI Header export list
-header-y += audio.h
-header-y += cdc.h
-header-y += cdc-wdm.h
-header-y += ch11.h
-header-y += ch9.h
-header-y += functionfs.h
-header-y += g_printer.h
-header-y += gadgetfs.h
-header-y += midi.h
-header-y += tmc.h
-header-y += video.h
diff --git a/include/uapi/linux/wimax/Kbuild b/include/uapi/linux/wimax/Kbuild
deleted file mode 100644
index 1c97be49971f..000000000000
--- a/include/uapi/linux/wimax/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# UAPI Header export list
-header-y += i2400m.h
diff --git a/include/uapi/misc/Kbuild b/include/uapi/misc/Kbuild
deleted file mode 100644
index e96cae7d58c9..000000000000
--- a/include/uapi/misc/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# misc Header export list
-header-y += cxl.h
diff --git a/include/uapi/mtd/Kbuild b/include/uapi/mtd/Kbuild
deleted file mode 100644
index 5a691e10cd0e..000000000000
--- a/include/uapi/mtd/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-# UAPI Header export list
-header-y += inftl-user.h
-header-y += mtd-abi.h
-header-y += mtd-user.h
-header-y += nftl-user.h
-header-y += ubi-user.h
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
deleted file mode 100644
index 1e0af1ff75c3..000000000000
--- a/include/uapi/rdma/Kbuild
+++ /dev/null
@@ -1,20 +0,0 @@
-# UAPI Header export list
-header-y += ib_user_cm.h
-header-y += rdma_user_ioctl.h
-header-y += ib_user_mad.h
-header-y += ib_user_sa.h
-header-y += ib_user_verbs.h
-header-y += rdma_netlink.h
-header-y += rdma_user_cm.h
-header-y += hfi/
-header-y += rdma_user_rxe.h
-header-y += cxgb3-abi.h
-header-y += cxgb4-abi.h
-header-y += mlx4-abi.h
-header-y += mlx5-abi.h
-header-y += mthca-abi.h
-header-y += nes-abi.h
-header-y += ocrdma-abi.h
-header-y += hns-abi.h
-header-y += vmw_pvrdma-abi.h
-header-y += qedr-abi.h
diff --git a/include/uapi/rdma/hfi/Kbuild b/include/uapi/rdma/hfi/Kbuild
deleted file mode 100644
index b65b0b3a5f63..000000000000
--- a/include/uapi/rdma/hfi/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-# UAPI Header export list
-header-y += hfi1_user.h
-header-y += hfi1_ioctl.h
diff --git a/include/uapi/scsi/Kbuild b/include/uapi/scsi/Kbuild
deleted file mode 100644
index d791e0ad509d..000000000000
--- a/include/uapi/scsi/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-# UAPI Header export list
-header-y += fc/
-header-y += scsi_bsg_fc.h
-header-y += scsi_netlink.h
-header-y += scsi_netlink_fc.h
-header-y += cxlflash_ioctl.h
diff --git a/include/uapi/scsi/fc/Kbuild b/include/uapi/scsi/fc/Kbuild
deleted file mode 100644
index 5ead9fac265c..000000000000
--- a/include/uapi/scsi/fc/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-# UAPI Header export list
-header-y += fc_els.h
-header-y += fc_fs.h
-header-y += fc_gs.h
-header-y += fc_ns.h
diff --git a/include/uapi/sound/Kbuild b/include/uapi/sound/Kbuild
deleted file mode 100644
index 9578d8bdbf31..000000000000
--- a/include/uapi/sound/Kbuild
+++ /dev/null
@@ -1,16 +0,0 @@
-# UAPI Header export list
-header-y += asequencer.h
-header-y += asoc.h
-header-y += asound.h
-header-y += asound_fm.h
-header-y += compress_offload.h
-header-y += compress_params.h
-header-y += emu10k1.h
-header-y += firewire.h
-header-y += hdsp.h
-header-y += hdspm.h
-header-y += sb16_csp.h
-header-y += sfnt_info.h
-header-y += tlv.h
-header-y += usb_stream.h
-header-y += snd_sst_tokens.h
diff --git a/include/uapi/video/Kbuild b/include/uapi/video/Kbuild
deleted file mode 100644
index ac7203bb32cc..000000000000
--- a/include/uapi/video/Kbuild
+++ /dev/null
@@ -1,4 +0,0 @@
-# UAPI Header export list
-header-y += edid.h
-header-y += sisfb.h
-header-y += uvesafb.h
diff --git a/include/uapi/xen/Kbuild b/include/uapi/xen/Kbuild
deleted file mode 100644
index 5c459628e8c7..000000000000
--- a/include/uapi/xen/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-# UAPI Header export list
-header-y += evtchn.h
-header-y += gntalloc.h
-header-y += gntdev.h
-header-y += privcmd.h
diff --git a/include/video/Kbuild b/include/video/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index ca5d439c9abf..20be1fbc19cc 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -1,17 +1,19 @@
 # ==========================================================================
 # Installing headers
 #
-# header-y  - list files to be installed. They are preprocessed
-#             to remove __KERNEL__ section of the file
-# genhdr-y  - Same as header-y but in a generated/ directory
+# All headers under include/uapi, include/generated/uapi,
+# arch/<arch>/include/uapi/asm and arch/<arch>/include/generated/uapi/asm are
+# exported.
+# They are preprocessed to remove __KERNEL__ section of the file.
 #
 # ==========================================================================
 
 # generated header directory
 gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj)))
 
+# Kbuild file is optional
 kbuild-file := $(srctree)/$(obj)/Kbuild
-include $(kbuild-file)
+-include $(kbuild-file)
 
 # called may set destination dir (when installing to asm/)
 _dst := $(if $(dst),$(dst),$(obj))
@@ -25,9 +27,15 @@ include scripts/Kbuild.include
 
 installdir    := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst))
 
-header-y      := $(sort $(header-y))
-subdirs       := $(patsubst %/,%,$(filter %/, $(header-y)))
-header-y      := $(filter-out %/, $(header-y))
+srcdir        := $(srctree)/$(obj)
+gendir        := $(objtree)/$(gen)
+subdirs       := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.))
+subdirs       += $(subdir-y)
+header-files  := $(notdir $(wildcard $(srcdir)/*.h))
+header-files  += $(notdir $(wildcard $(srcdir)/*.agh))
+header-files  := $(filter-out $(no-export-headers), $(header-files))
+genhdr-files  := $(notdir $(wildcard $(gendir)/*.h))
+genhdr-files  := $(filter-out $(header-files), $(genhdr-files))
 
 # files used to track state of install/check
 install-file  := $(installdir)/.install
@@ -35,25 +43,20 @@ check-file    := $(installdir)/.check
 
 # generic-y list all files an architecture uses from asm-generic
 # Use this to build a list of headers which require a wrapper
-wrapper-files := $(filter $(header-y), $(generic-y))
-
-srcdir        := $(srctree)/$(obj)
-gendir        := $(objtree)/$(gen)
+generic-files := $(notdir $(wildcard $(srctree)/include/uapi/asm-generic/*.h))
+wrapper-files := $(filter $(generic-files), $(generic-y))
+wrapper-files := $(filter-out $(header-files), $(wrapper-files))
 
 # all headers files for this dir
-header-y      := $(filter-out $(generic-y), $(header-y))
-all-files     := $(header-y) $(genhdr-y) $(wrapper-files)
+all-files     := $(header-files) $(genhdr-files) $(wrapper-files)
 output-files  := $(addprefix $(installdir)/, $(all-files))
 
-# Check that all expected files exist
-$(foreach hdr, $(header-y), \
-  $(if $(wildcard $(srcdir)/$(hdr)),, \
-       $(error Missing UAPI file $(srcdir)/$(hdr)) \
-   ))
-$(foreach hdr, $(genhdr-y), \
-  $(if	$(wildcard $(gendir)/$(hdr)),, \
-       $(error Missing generated UAPI file $(gendir)/$(hdr)) \
-  ))
+ifneq ($(mandatory-y),)
+missing       := $(filter-out $(all-files),$(mandatory-y))
+ifneq ($(missing),)
+$(error Some mandatory headers ($(missing)) are missing in $(obj))
+endif
+endif
 
 # Work out what needs to be removed
 oldheaders    := $(patsubst $(installdir)/%,%,$(wildcard $(installdir)/*.h))
@@ -67,8 +70,8 @@ printdir = $(patsubst $(INSTALL_HDR_PATH)/%/,%,$(dir $@))
 quiet_cmd_install = INSTALL $(printdir) ($(words $(all-files))\
                             file$(if $(word 2, $(all-files)),s))
       cmd_install = \
-        $(CONFIG_SHELL) $< $(installdir) $(srcdir) $(header-y); \
-        $(CONFIG_SHELL) $< $(installdir) $(gendir) $(genhdr-y); \
+        $(CONFIG_SHELL) $< $(installdir) $(srcdir) $(header-files); \
+        $(CONFIG_SHELL) $< $(installdir) $(gendir) $(genhdr-files); \
         for F in $(wrapper-files); do                                   \
                 echo "\#include <asm-generic/$$F>" > $(installdir)/$$F;    \
         done;                                                           \
@@ -95,8 +98,8 @@ __headersinst: $(subdirs) $(install-file)
 
 targets += $(install-file)
 $(install-file): scripts/headers_install.sh \
-		 $(addprefix $(srcdir)/,$(header-y)) \
-		 $(addprefix $(gendir)/,$(genhdr-y)) FORCE
+		 $(addprefix $(srcdir)/,$(header-files)) \
+		 $(addprefix $(gendir)/,$(genhdr-files)) FORCE
 	$(if $(unwanted),$(call cmd,remove),)
 	$(if $(wildcard $(dir $@)),,$(shell mkdir -p $(dir $@)))
 	$(call if_changed,install)
-- 
cgit v1.2.3


From 61562f981e9284ff7dfb2769d89c750986a5f677 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:16 +0200
Subject: uapi: export all arch specifics directories

This patch removes the need of subdir-y. Now all files/directories under
arch/<arch>/include/uapi/ are exported.

The only change for userland is the layout of the command 'make
headers_install_all': directories asm-<arch> are replaced by arch-<arch>/.
Those new directories contains all files/directories of the specified arch.

Note that only cris and tile have more directories than only asm:
 - arch-v[10|32] for cris;
 - arch for tile.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/makefiles.txt | 15 +--------------
 Makefile                           |  6 +++---
 arch/cris/include/uapi/asm/Kbuild  |  3 ---
 arch/tile/include/uapi/asm/Kbuild  |  2 --
 scripts/Makefile.headersinst       |  3 +--
 5 files changed, 5 insertions(+), 24 deletions(-)

diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index b9f7ca4e62ae..e18daca65ccd 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -49,7 +49,6 @@ This document describes the Linux kernel Makefiles.
 		--- 7.3 generic-y
 		--- 7.4 generated-y
 		--- 7.5 mandatory-y
-		--- 7.6 subdir-y
 
 	=== 8 Kbuild Variables
 	=== 9 Makefile language
@@ -1265,7 +1264,7 @@ The pre-processing does:
 - drop all sections that are kernel internal (guarded by ifdef __KERNEL__)
 
 All headers under include/uapi/, include/generated/uapi/,
-arch/<arch>/include/uapi/asm/ and arch/<arch>/include/generated/uapi/asm/
+arch/<arch>/include/uapi/ and arch/<arch>/include/generated/uapi/
 are exported.
 
 A Kbuild file may be defined under arch/<arch>/include/uapi/asm/ and
@@ -1338,18 +1337,6 @@ See subsequent chapter for the syntax of the Kbuild file.
 	The convention is to list one subdir per line and
 	preferably in alphabetic order.
 
-	--- 7.6 subdir-y
-
-	subdir-y may be used to specify a subdirectory to be exported.
-
-		Example:
-			#arch/cris/include/uapi/asm/Kbuild
-			subdir-y += ../arch-v10/arch/
-			subdir-y += ../arch-v32/arch/
-
-	The convention is to list one subdir per line and
-	preferably in alphabetic order.
-
 === 8 Kbuild Variables
 
 The top Makefile exports the following variables:
diff --git a/Makefile b/Makefile
index 220121fdca4d..4c5a02d30118 100644
--- a/Makefile
+++ b/Makefile
@@ -1128,7 +1128,7 @@ firmware_install:
 export INSTALL_HDR_PATH = $(objtree)/usr
 
 # If we do an all arch process set dst to asm-$(hdr-arch)
-hdr-dst = $(if $(KBUILD_HEADERS), dst=include/asm-$(hdr-arch), dst=include/asm)
+hdr-dst = $(if $(KBUILD_HEADERS), dst=include/arch-$(hdr-arch), dst=include)
 
 PHONY += archheaders
 archheaders:
@@ -1149,7 +1149,7 @@ headers_install: __headers
 	$(if $(wildcard $(srctree)/arch/$(hdr-arch)/include/uapi/asm/Kbuild),, \
 	  $(error Headers not exportable for the $(SRCARCH) architecture))
 	$(Q)$(MAKE) $(hdr-inst)=include/uapi
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst)
+	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi $(hdr-dst)
 
 PHONY += headers_check_all
 headers_check_all: headers_install_all
@@ -1158,7 +1158,7 @@ headers_check_all: headers_install_all
 PHONY += headers_check
 headers_check: headers_install
 	$(Q)$(MAKE) $(hdr-inst)=include/uapi HDRCHECK=1
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst) HDRCHECK=1
+	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/ $(hdr-dst) HDRCHECK=1
 
 # ---------------------------------------------------------------------------
 # Kernel selftest
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index d0c5471856e0..b15bf6bc0e94 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -1,5 +1,2 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
-
-subdir-y += ../arch-v10/arch/
-subdir-y += ../arch-v32/arch/
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
index e0a50111e07f..0c74c3c5ebfa 100644
--- a/arch/tile/include/uapi/asm/Kbuild
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -2,5 +2,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += ucontext.h
-
-subdir-y += ../arch
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 20be1fbc19cc..6ba97a1f9c5a 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -2,7 +2,7 @@
 # Installing headers
 #
 # All headers under include/uapi, include/generated/uapi,
-# arch/<arch>/include/uapi/asm and arch/<arch>/include/generated/uapi/asm are
+# arch/<arch>/include/uapi and arch/<arch>/include/generated/uapi are
 # exported.
 # They are preprocessed to remove __KERNEL__ section of the file.
 #
@@ -30,7 +30,6 @@ installdir    := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst))
 srcdir        := $(srctree)/$(obj)
 gendir        := $(objtree)/$(gen)
 subdirs       := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.))
-subdirs       += $(subdir-y)
 header-files  := $(notdir $(wildcard $(srcdir)/*.h))
 header-files  += $(notdir $(wildcard $(srcdir)/*.agh))
 header-files  := $(filter-out $(no-export-headers), $(header-files))
-- 
cgit v1.2.3


From 3e18c637fa3e2f6836a4034c80ca0a86be968efc Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Mar 2017 14:20:17 +0200
Subject: arch/include: remove empty Kbuild files

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/cris/include/arch-v10/arch/Kbuild | 1 -
 arch/cris/include/arch-v32/arch/Kbuild | 1 -
 arch/tile/include/arch/Kbuild          | 1 -
 3 files changed, 3 deletions(-)
 delete mode 100644 arch/cris/include/arch-v10/arch/Kbuild
 delete mode 100644 arch/cris/include/arch-v32/arch/Kbuild
 delete mode 100644 arch/tile/include/arch/Kbuild

diff --git a/arch/cris/include/arch-v10/arch/Kbuild b/arch/cris/include/arch-v10/arch/Kbuild
deleted file mode 100644
index 1f0fc7a66f5f..000000000000
--- a/arch/cris/include/arch-v10/arch/Kbuild
+++ /dev/null
@@ -1 +0,0 @@
-# CRISv10 arch
diff --git a/arch/cris/include/arch-v32/arch/Kbuild b/arch/cris/include/arch-v32/arch/Kbuild
deleted file mode 100644
index 2fd65c7e15c9..000000000000
--- a/arch/cris/include/arch-v32/arch/Kbuild
+++ /dev/null
@@ -1 +0,0 @@
-# CRISv32 arch
diff --git a/arch/tile/include/arch/Kbuild b/arch/tile/include/arch/Kbuild
deleted file mode 100644
index 3751c9fabcf2..000000000000
--- a/arch/tile/include/arch/Kbuild
+++ /dev/null
@@ -1 +0,0 @@
-# Tile arch headers
-- 
cgit v1.2.3


From 634b6a8a064576797120b74facf95ba79283b5d5 Mon Sep 17 00:00:00 2001
From: Julien Isorce <julien.isorce@gmail.com>
Date: Thu, 27 Apr 2017 15:10:08 +0100
Subject: drm/radeon: only warn once in radeon_ttm_bo_destroy if va list not
 empty
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Encountered a dozen of exact same backtraces when mesa's
pb_cache_release_all_buffers is called after that a gpu reset failed.

v2: Remove superfluous error message added in v1.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96271

Signed-off-by: Julien Isorce <jisorce@oblong.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index bec2ec056de4..8b722297a05c 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -81,7 +81,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->rdev->gem.mutex);
 	radeon_bo_clear_surface_reg(bo);
-	WARN_ON(!list_empty(&bo->va));
+	WARN_ON_ONCE(!list_empty(&bo->va));
 	drm_gem_object_release(&bo->gem_base);
 	kfree(bo);
 }
-- 
cgit v1.2.3


From de70c6357be23261c18fcccfd2ad148512fedd28 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 5 May 2017 10:21:36 -0400
Subject: drm/amdgpu/atomfirmware: add function to update engine hang status
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update the scratch reg for when the engine is hung.

Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 13 +++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h |  2 ++
 2 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index d735cd1807b3..4bdda56fccee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -88,6 +88,19 @@ void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev)
 		WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]);
 }
 
+void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev,
+						  bool hung)
+{
+	u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3);
+
+	if (hung)
+		tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG;
+	else
+		tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG;
+
+	WREG32(adev->bios_scratch_reg_offset + 3, tmp);
+}
+
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
 {
 	struct atom_context *ctx = adev->mode_info.atom_context;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index d0c4dcd7fa96..a2c3ebe22c71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -28,6 +28,8 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
 void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
 void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev);
 void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev);
+void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev,
+						  bool hung);
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
 
 #endif
-- 
cgit v1.2.3


From bfc181af3b3978e4b9619e3b46b1c4f7e5adb43a Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 5 May 2017 10:26:12 -0400
Subject: drm/amdgpu/soc15: use atomfirmware for setting bios scratch for reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Need to use the atomfirmware interface rather than atombios since
soc15 is atomfirmware based.

Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 02baa1d7bc23..6b55d451ae7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include "drmP.h"
 #include "amdgpu.h"
-#include "amdgpu_atombios.h"
+#include "amdgpu_atomfirmware.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
@@ -405,11 +405,11 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev)
 
 static int soc15_asic_reset(struct amdgpu_device *adev)
 {
-	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+	amdgpu_atomfirmware_scratch_regs_engine_hung(adev, true);
 
 	soc15_gpu_pci_config_reset(adev);
 
-	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+	amdgpu_atomfirmware_scratch_regs_engine_hung(adev, false);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 09062ae1bb1c4a84e382560ff5059803d263a6c8 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 9 May 2017 13:08:39 -0400
Subject: drm/amdgpu: add some additional vega10 pci ids

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index eb1345627501..f2d705e6a75a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -455,7 +455,9 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0, 0, 0}
-- 
cgit v1.2.3


From cb3696fdeca24d3a347a5225d934ce50c6edccba Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Tue, 9 May 2017 15:34:07 +0800
Subject: drm/amd: fix init order of sched job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Need to increment after the fence check.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 5fd12db0fc58..28b92c8d9985 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -462,9 +462,9 @@ int amd_sched_job_init(struct amd_sched_job *job,
 	job->sched = sched;
 	job->s_entity = entity;
 	job->s_fence = amd_sched_fence_create(entity, owner);
-	job->id = atomic64_inc_return(&sched->job_id_count);
 	if (!job->s_fence)
 		return -ENOMEM;
+	job->id = atomic64_inc_return(&sched->job_id_count);
 
 	INIT_WORK(&job->finish_work, amd_sched_job_finish);
 	INIT_LIST_HEAD(&job->node);
-- 
cgit v1.2.3


From 30514decb27d45b98599612cb5d3e6a20ba733a5 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Tue, 9 May 2017 13:39:40 +0800
Subject: drm/amdgpu: fix dependency issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The problem is that executing the jobs in the right order doesn't give you the right result
because consecutive jobs executed on the same engine are pipelined.
In other words job B does it buffer read before job A has written it's result.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c        |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  2 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 17 +++++++++++++++++
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  2 ++
 6 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 86923c57908b..833c3c16501a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1129,6 +1129,7 @@ struct amdgpu_job {
 	void			*owner;
 	uint64_t		fence_ctx; /* the fence_context this job uses */
 	bool                    vm_needs_flush;
+	bool			need_pipeline_sync;
 	unsigned		vm_id;
 	uint64_t		vm_pd_addr;
 	uint32_t		gds_base, gds_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 2d11ac8d1aa9..6e4ae0d983c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -160,6 +160,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
 		return r;
 	}
+	if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
+		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (vm) {
 		r = amdgpu_vm_flush(ring, job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index c3cfeb335d99..7570f2439a11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
+	(*job)->need_pipeline_sync = false;
 
 	amdgpu_sync_create(&(*job)->sync);
 
@@ -152,6 +153,9 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 		fence = amdgpu_sync_get_fence(&job->sync);
 	}
 
+	if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
+		job->need_pipeline_sync = true;
+
 	return fence;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c42a9979d056..07ff3b1514f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -614,7 +614,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 	if (ring->funcs->init_cond_exec)
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
-	if (ring->funcs->emit_pipeline_sync)
+	if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync)
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 28b92c8d9985..fea96a765cf1 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -236,6 +236,23 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb
 	dma_fence_put(f);
 }
 
+bool amd_sched_dependency_optimized(struct dma_fence* fence,
+				    struct amd_sched_entity *entity)
+{
+	struct amd_gpu_scheduler *sched = entity->sched;
+	struct amd_sched_fence *s_fence;
+
+	if (!fence || dma_fence_is_signaled(fence))
+		return false;
+	if (fence->context == entity->fence_context)
+		return true;
+	s_fence = to_amd_sched_fence(fence);
+	if (s_fence && s_fence->sched == sched)
+		return true;
+
+	return false;
+}
+
 static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 {
 	struct amd_gpu_scheduler *sched = entity->sched;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 0255c7f8a6d8..924d4a5899e1 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -158,4 +158,6 @@ int amd_sched_job_init(struct amd_sched_job *job,
 		       void *owner);
 void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
 void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
+bool amd_sched_dependency_optimized(struct dma_fence* fence,
+				    struct amd_sched_entity *entity);
 #endif
-- 
cgit v1.2.3


From ded96c7389c2fdef174d3dbdc6e8c2822312c364 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 21 Apr 2017 17:26:38 +0800
Subject: drm/amd/powerplay: add more smu message on Vega10.

Add some new SMU messages.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
index 90beef35bba2..254974d3d371 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
@@ -122,7 +122,10 @@ typedef uint16_t PPSMC_Result;
 #define PPSMC_MSG_SetFanMinPwm                   0x52
 #define PPSMC_MSG_ConfigureGfxDidt               0x55
 #define PPSMC_MSG_NumOfDisplays                  0x56
-#define PPSMC_Message_Count                      0x57
+#define PPSMC_MSG_ReadSerialNumTop32             0x58
+#define PPSMC_MSG_ReadSerialNumBottom32          0x59
+#define PPSMC_Message_Count                      0x5A
+
 
 typedef int PPSMC_Msg;
 
-- 
cgit v1.2.3


From 4f93f09e5c3e59e72aa1ae877b24ae0cacdfdfa8 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 5 May 2017 17:37:20 +0800
Subject: drm/amdgpu: add amd fan ctrl mode enums.

Add common fan enums that can be used for both
powerplay and dpm.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/include/amd_shared.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 2ccf44e580de..1d1ac1ef94f7 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -138,6 +138,12 @@ struct amd_pp_profile {
 	uint8_t down_hyst;
 };
 
+enum amd_fan_ctrl_mode {
+	AMD_FAN_CTRL_NONE = 0,
+	AMD_FAN_CTRL_MANUAL = 1,
+	AMD_FAN_CTRL_AUTO = 2,
+};
+
 /* CG flags */
 #define AMD_CG_SUPPORT_GFX_MGCG			(1 << 0)
 #define AMD_CG_SUPPORT_GFX_MGLS			(1 << 1)
-- 
cgit v1.2.3


From aad22ca4368082cdd1cff1c55d8607c94ea1a74b Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 5 May 2017 16:56:45 +0800
Subject: drm/amdgpu: refine amdgpu pwm1_enable sysfs interface.

Make the interface consistent.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 990fde2cf4fd..7df503aedb69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -867,8 +867,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
 
 	pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 
-	/* never 0 (full-speed), fuse or smc-controlled always */
-	return sprintf(buf, "%i\n", pwm_mode == FDO_PWM_MODE_STATIC ? 1 : 2);
+	return sprintf(buf, "%i\n", pwm_mode);
 }
 
 static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
@@ -887,14 +886,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
 	if (err)
 		return err;
 
-	switch (value) {
-	case 1: /* manual, percent-based */
-		amdgpu_dpm_set_fan_control_mode(adev, FDO_PWM_MODE_STATIC);
-		break;
-	default: /* disable */
-		amdgpu_dpm_set_fan_control_mode(adev, 0);
-		break;
-	}
+	amdgpu_dpm_set_fan_control_mode(adev, value);
 
 	return count;
 }
-- 
cgit v1.2.3


From 7522ffc41b73a3dd1f9d19164f033bd649c0404b Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 5 May 2017 17:44:32 +0800
Subject: drm/amd/powerplay: refine pwm1_enable callback functions for Vega10.

Use the new enums for setting and getting the fan control mode.
Fixes problems due to previous inconsistencies between enums.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 44 ++++++++++++----------
 .../gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h   |  1 +
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 85a6c12ad1d4..ad30f5d3a10d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -3921,32 +3921,36 @@ static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 
 static int vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
 {
-	if (mode) {
-		/* stop auto-manage */
-		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
-				PHM_PlatformCaps_MicrocodeFanControl))
-			vega10_fan_ctrl_stop_smc_fan_control(hwmgr);
-		vega10_fan_ctrl_set_static_mode(hwmgr, mode);
-	} else
-		/* restart auto-manage */
-		vega10_fan_ctrl_reset_fan_speed_to_default(hwmgr);
+	int result = 0;
 
-	return 0;
+	switch (mode) {
+	case AMD_FAN_CTRL_NONE:
+		result = vega10_fan_ctrl_set_fan_speed_percent(hwmgr, 100);
+		break;
+	case AMD_FAN_CTRL_MANUAL:
+		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_MicrocodeFanControl))
+			result = vega10_fan_ctrl_stop_smc_fan_control(hwmgr);
+		break;
+	case AMD_FAN_CTRL_AUTO:
+		result = vega10_fan_ctrl_set_static_mode(hwmgr, mode);
+		if (!result)
+			result = vega10_fan_ctrl_start_smc_fan_control(hwmgr);
+		break;
+	default:
+		break;
+	}
+	return result;
 }
 
 static int vega10_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 {
-	uint32_t reg;
+	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
 
-	if (hwmgr->fan_ctrl_is_in_default_mode) {
-		return hwmgr->fan_ctrl_default_mode;
-	} else {
-		reg = soc15_get_register_offset(THM_HWID, 0,
-			mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2);
-		return (cgs_read_register(hwmgr->device, reg) &
-				CG_FDO_CTRL2__FDO_PWM_MODE_MASK) >>
-				CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT;
-	}
+	if (data->smu_features[GNLD_FAN_CONTROL].enabled == false)
+		return AMD_FAN_CTRL_MANUAL;
+	else
+		return AMD_FAN_CTRL_AUTO;
 }
 
 static int vega10_get_dal_power_level(struct pp_hwmgr *hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h
index 70c1d22b3d7d..776f3a2effc0 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h
@@ -79,6 +79,7 @@ extern int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr,
 extern int vega10_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr);
 extern uint32_t smu7_get_xclk(struct pp_hwmgr *hwmgr);
 extern int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr);
+int vega10_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr);
 
 #endif
 
-- 
cgit v1.2.3


From 2fde9ab218b7f8446c2428b7f9dad602afce8be6 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 5 May 2017 16:50:36 +0800
Subject: drm/amd/powerplay: refine pwm1_enable callback functions for vi.

Use the new enums for setting and getting the fan control mode.
Fixes problems due to previous inconsistencies between enums.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c   | 35 ++++++++++++----------
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c |  9 +++---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.h |  2 +-
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h          |  1 +
 4 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index b3d42858f68b..a74a3db3056c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -4334,26 +4334,31 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
 
 static int smu7_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
 {
-	if (mode) {
-		/* stop auto-manage */
-		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
-				PHM_PlatformCaps_MicrocodeFanControl))
-			smu7_fan_ctrl_stop_smc_fan_control(hwmgr);
-		smu7_fan_ctrl_set_static_mode(hwmgr, mode);
-	} else
-		/* restart auto-manage */
-		smu7_fan_ctrl_reset_fan_speed_to_default(hwmgr);
+	int result = 0;
 
-	return 0;
+	switch (mode) {
+	case AMD_FAN_CTRL_NONE:
+		result = smu7_fan_ctrl_set_fan_speed_percent(hwmgr, 100);
+		break;
+	case AMD_FAN_CTRL_MANUAL:
+		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_MicrocodeFanControl))
+			result = smu7_fan_ctrl_stop_smc_fan_control(hwmgr);
+		break;
+	case AMD_FAN_CTRL_AUTO:
+		result = smu7_fan_ctrl_set_static_mode(hwmgr, mode);
+		if (!result)
+			result = smu7_fan_ctrl_start_smc_fan_control(hwmgr);
+		break;
+	default:
+		break;
+	}
+	return result;
 }
 
 static int smu7_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 {
-	if (hwmgr->fan_ctrl_is_in_default_mode)
-		return hwmgr->fan_ctrl_default_mode;
-	else
-		return PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
-				CG_FDO_CTRL2, FDO_PWM_MODE);
+	return hwmgr->fan_ctrl_enabled ? AMD_FAN_CTRL_AUTO : AMD_FAN_CTRL_MANUAL;
 }
 
 static int smu7_get_sclk_od(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
index 436ca5ce8248..baddb569a8b8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
@@ -112,10 +112,9 @@ int smu7_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed)
 */
 int smu7_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
 {
-
 	if (hwmgr->fan_ctrl_is_in_default_mode) {
 		hwmgr->fan_ctrl_default_mode =
-				PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device,	CGS_IND_REG__SMC,
+				PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
 						CG_FDO_CTRL2, FDO_PWM_MODE);
 		hwmgr->tmin =
 				PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
@@ -149,7 +148,7 @@ int smu7_fan_ctrl_set_default_mode(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
-static int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
+int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
 {
 	int result;
 
@@ -179,6 +178,7 @@ static int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
 				PPSMC_MSG_SetFanTemperatureTarget,
 				hwmgr->thermal_controller.
 				advanceFanControlParameters.ucTargetTemperature);
+	hwmgr->fan_ctrl_enabled = true;
 
 	return result;
 }
@@ -186,6 +186,7 @@ static int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
 
 int smu7_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr)
 {
+	hwmgr->fan_ctrl_enabled = false;
 	return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_StopFanControl);
 }
 
@@ -280,7 +281,7 @@ int smu7_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed)
 	PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
 				CG_TACH_STATUS, TACH_PERIOD, tach_period);
 
-	return smu7_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC);
+	return smu7_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC_RPM);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.h
index 2ed774db42c7..ba71b608fa75 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.h
@@ -54,6 +54,6 @@ extern int smu7_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *spe
 extern int smu7_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr);
 extern int smu7_thermal_enable_alert(struct pp_hwmgr *hwmgr);
 extern int smu7_thermal_disable_alert(struct pp_hwmgr *hwmgr);
-
+extern int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr);
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index e4574c215c38..805b9df452a3 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -764,6 +764,7 @@ struct pp_hwmgr {
 	struct pp_thermal_controller_info thermal_controller;
 	bool fan_ctrl_is_in_default_mode;
 	uint32_t fan_ctrl_default_mode;
+	bool fan_ctrl_enabled;
 	uint32_t tmin;
 	struct phm_microcode_version_info microcode_version_info;
 	uint32_t ps_size;
-- 
cgit v1.2.3


From afa31879f0a62f769cdeeffc8cfec613da2bc482 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 5 May 2017 17:53:18 +0800
Subject: drm/amd/powerplay: refine pwm1_enable callback functions for CI.

Use the new enums for setting and getting the fan control mode.
Fixes problems due to previous inconsistencies between enums.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index a6dda3470003..6dc1410b380f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -1267,30 +1267,33 @@ static int ci_dpm_set_fan_speed_percent(struct amdgpu_device *adev,
 
 static void ci_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode)
 {
-	if (mode) {
-		/* stop auto-manage */
+	switch (mode) {
+	case AMD_FAN_CTRL_NONE:
 		if (adev->pm.dpm.fan.ucode_fan_control)
 			ci_fan_ctrl_stop_smc_fan_control(adev);
-		ci_fan_ctrl_set_static_mode(adev, mode);
-	} else {
-		/* restart auto-manage */
+		ci_dpm_set_fan_speed_percent(adev, 100);
+		break;
+	case AMD_FAN_CTRL_MANUAL:
+		if (adev->pm.dpm.fan.ucode_fan_control)
+			ci_fan_ctrl_stop_smc_fan_control(adev);
+		break;
+	case AMD_FAN_CTRL_AUTO:
 		if (adev->pm.dpm.fan.ucode_fan_control)
 			ci_thermal_start_smc_fan_control(adev);
-		else
-			ci_fan_ctrl_set_default_mode(adev);
+		break;
+	default:
+		break;
 	}
 }
 
 static u32 ci_dpm_get_fan_control_mode(struct amdgpu_device *adev)
 {
 	struct ci_power_info *pi = ci_get_pi(adev);
-	u32 tmp;
 
 	if (pi->fan_is_controlled_by_smc)
-		return 0;
-
-	tmp = RREG32_SMC(ixCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK;
-	return (tmp >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT);
+		return AMD_FAN_CTRL_AUTO;
+	else
+		return AMD_FAN_CTRL_MANUAL;
 }
 
 #if 0
-- 
cgit v1.2.3


From f961e3f2acae94b727380c0b74e2d3954d0edf79 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 5 May 2017 16:17:57 -0400
Subject: nfsd: encoders mustn't use unitialized values in error cases

In error cases, lgp->lg_layout_type may be out of bounds; so we
shouldn't be using it until after the check of nfserr.

This was seen to crash nfsd threads when the server receives a LAYOUTGET
request with a large layout type.

GETDEVICEINFO has the same problem.

Reported-by: Ari Kauppi <Ari.Kauppi@synopsys.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 33017d652b1d..8e9652f37f1f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4119,8 +4119,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
 		struct nfsd4_getdeviceinfo *gdev)
 {
 	struct xdr_stream *xdr = &resp->xdr;
-	const struct nfsd4_layout_ops *ops =
-		nfsd4_layout_ops[gdev->gd_layout_type];
+	const struct nfsd4_layout_ops *ops;
 	u32 starting_len = xdr->buf->len, needed_len;
 	__be32 *p;
 
@@ -4137,6 +4136,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 	/* If maxcount is 0 then just update notifications */
 	if (gdev->gd_maxcount != 0) {
+		ops = nfsd4_layout_ops[gdev->gd_layout_type];
 		nfserr = ops->encode_getdeviceinfo(xdr, gdev);
 		if (nfserr) {
 			/*
@@ -4189,8 +4189,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
 		struct nfsd4_layoutget *lgp)
 {
 	struct xdr_stream *xdr = &resp->xdr;
-	const struct nfsd4_layout_ops *ops =
-		nfsd4_layout_ops[lgp->lg_layout_type];
+	const struct nfsd4_layout_ops *ops;
 	__be32 *p;
 
 	dprintk("%s: err %d\n", __func__, nfserr);
@@ -4213,6 +4212,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
 	*p++ = cpu_to_be32(lgp->lg_seg.iomode);
 	*p++ = cpu_to_be32(lgp->lg_layout_type);
 
+	ops = nfsd4_layout_ops[lgp->lg_layout_type];
 	nfserr = ops->encode_layoutget(xdr, lgp);
 out:
 	kfree(lgp->lg_content);
-- 
cgit v1.2.3


From b26b78cb726007533d81fdf90a62e915002ef5c8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 9 May 2017 16:24:59 -0400
Subject: nfsd: Fix up the "supattr_exclcreat" attributes

If an NFSv4 client asks us for the supattr_exclcreat, then we must
not return attributes that are unsupported by this minor version.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Fixes: 75976de6556f ("NFSD: Return word2 bitmask if setting security..,")
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 8e9652f37f1f..26780d53a6f9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2831,9 +2831,14 @@ out_acl:
 	}
 #endif /* CONFIG_NFSD_PNFS */
 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
-		status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0,
-						  NFSD_SUPPATTR_EXCLCREAT_WORD1,
-						  NFSD_SUPPATTR_EXCLCREAT_WORD2);
+		u32 supp[3];
+
+		memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp));
+		supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0;
+		supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1;
+		supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2;
+
+		status = nfsd4_encode_bitmap(xdr, supp[0], supp[1], supp[2]);
 		if (status)
 			goto out;
 	}
-- 
cgit v1.2.3


From 8be193c7b1f44d3f4dcb27107df0831709c2deb1 Mon Sep 17 00:00:00 2001
From: Tomohiro Yoshidomi <sylph23k@gmail.com>
Date: Sat, 6 May 2017 13:00:31 -0700
Subject: Input: add support for PlayStation 1/2 joypads connected via SPI

PlayStation 1/2 joypads can be connected directly to the SPI interface.

Signed-off-by: Tomohiro Yoshidomi <sylph23k@gmail.com>
Acked-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/Kconfig      |  21 ++
 drivers/input/joystick/Makefile     |   1 +
 drivers/input/joystick/psxpad-spi.c | 401 ++++++++++++++++++++++++++++++++++++
 3 files changed, 423 insertions(+)
 create mode 100644 drivers/input/joystick/psxpad-spi.c

diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig
index 4215b5382092..f3c2f6ea8b44 100644
--- a/drivers/input/joystick/Kconfig
+++ b/drivers/input/joystick/Kconfig
@@ -330,4 +330,25 @@ config JOYSTICK_MAPLE
 	  To compile this as a module choose M here: the module will be called
 	  maplecontrol.
 
+config JOYSTICK_PSXPAD_SPI
+	tristate "PlayStation 1/2 joypads via SPI interface"
+	depends on SPI
+	select INPUT_POLLDEV
+	help
+	  Say Y here if you wish to connect PlayStation 1/2 joypads
+	  via SPI interface.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called psxpad-spi.
+
+config JOYSTICK_PSXPAD_SPI_FF
+	bool "PlayStation 1/2 joypads force feedback (rumble) support"
+	depends on JOYSTICK_PSXPAD_SPI
+	select INPUT_FF_MEMLESS
+	help
+	  Say Y here if you want to take advantage of PlayStation 1/2
+	  joypads rumble features.
+
+	  To drive rumble motor a dedicated power supply is required.
+
 endif
diff --git a/drivers/input/joystick/Makefile b/drivers/input/joystick/Makefile
index 92dc0de9dfed..496fd56b3f1b 100644
--- a/drivers/input/joystick/Makefile
+++ b/drivers/input/joystick/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_JOYSTICK_INTERACT)		+= interact.o
 obj-$(CONFIG_JOYSTICK_JOYDUMP)		+= joydump.o
 obj-$(CONFIG_JOYSTICK_MAGELLAN)		+= magellan.o
 obj-$(CONFIG_JOYSTICK_MAPLE)		+= maplecontrol.o
+obj-$(CONFIG_JOYSTICK_PSXPAD_SPI)	+= psxpad-spi.o
 obj-$(CONFIG_JOYSTICK_SIDEWINDER)	+= sidewinder.o
 obj-$(CONFIG_JOYSTICK_SPACEBALL)	+= spaceball.o
 obj-$(CONFIG_JOYSTICK_SPACEORB)		+= spaceorb.o
diff --git a/drivers/input/joystick/psxpad-spi.c b/drivers/input/joystick/psxpad-spi.c
new file mode 100644
index 000000000000..28b473f6cbb6
--- /dev/null
+++ b/drivers/input/joystick/psxpad-spi.c
@@ -0,0 +1,401 @@
+/*
+ * PlayStation 1/2 joypads via SPI interface Driver
+ *
+ * Copyright (C) 2017 Tomohiro Yoshidomi <sylph23k@gmail.com>
+ * Licensed under the GPL-2 or later.
+ *
+ * PlayStation 1/2 joypad's plug (not socket)
+ *  123 456 789
+ * (...|...|...)
+ *
+ * 1: DAT -> MISO (pullup with 1k owm to 3.3V)
+ * 2: CMD -> MOSI
+ * 3: 9V (for motor, if not use N.C.)
+ * 4: GND
+ * 5: 3.3V
+ * 6: Attention -> CS(SS)
+ * 7: SCK -> SCK
+ * 8: N.C.
+ * 9: ACK -> N.C.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/input-polldev.h>
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/types.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+
+#define REVERSE_BIT(x) ((((x) & 0x80) >> 7) | (((x) & 0x40) >> 5) | \
+	(((x) & 0x20) >> 3) | (((x) & 0x10) >> 1) | (((x) & 0x08) << 1) | \
+	(((x) & 0x04) << 3) | (((x) & 0x02) << 5) | (((x) & 0x01) << 7))
+
+/* PlayStation 1/2 joypad command and response are LSBFIRST. */
+
+/*
+ *	0x01, 0x42, 0x00, 0x00, 0x00,
+ *	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ *	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ */
+static const u8 PSX_CMD_POLL[] = {
+	0x80, 0x42, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+/*	0x01, 0x43, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 */
+static const u8 PSX_CMD_ENTER_CFG[] = {
+	0x80, 0xC2, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+/*	0x01, 0x43, 0x00, 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A */
+static const u8 PSX_CMD_EXIT_CFG[] = {
+	0x80, 0xC2, 0x00, 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A
+};
+/*	0x01, 0x4D, 0x00, 0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF */
+static const u8 PSX_CMD_ENABLE_MOTOR[]	= {
+	0x80, 0xB2, 0x00, 0x00, 0x80, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+struct psxpad {
+	struct spi_device *spi;
+	struct input_polled_dev *pdev;
+	char phys[0x20];
+	bool motor1enable;
+	bool motor2enable;
+	u8 motor1level;
+	u8 motor2level;
+	u8 sendbuf[0x20] ____cacheline_aligned;
+	u8 response[sizeof(PSX_CMD_POLL)] ____cacheline_aligned;
+};
+
+static int psxpad_command(struct psxpad *pad, const u8 sendcmdlen)
+{
+	struct spi_transfer xfers = {
+		.tx_buf		= pad->sendbuf,
+		.rx_buf		= pad->response,
+		.len		= sendcmdlen,
+	};
+	int err;
+
+	err = spi_sync_transfer(pad->spi, &xfers, 1);
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"%s: failed to SPI xfers mode: %d\n",
+			__func__, err);
+		return err;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_JOYSTICK_PSXPAD_SPI_FF
+static void psxpad_control_motor(struct psxpad *pad,
+				 bool motor1enable, bool motor2enable)
+{
+	int err;
+
+	pad->motor1enable = motor1enable;
+	pad->motor2enable = motor2enable;
+
+	memcpy(pad->sendbuf, PSX_CMD_ENTER_CFG, sizeof(PSX_CMD_ENTER_CFG));
+	err = psxpad_command(pad, sizeof(PSX_CMD_ENTER_CFG));
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"%s: failed to enter config mode: %d\n",
+			__func__, err);
+		return;
+	}
+
+	memcpy(pad->sendbuf, PSX_CMD_ENABLE_MOTOR,
+	       sizeof(PSX_CMD_ENABLE_MOTOR));
+	pad->sendbuf[3] = pad->motor1enable ? 0x00 : 0xFF;
+	pad->sendbuf[4] = pad->motor2enable ? 0x80 : 0xFF;
+	err = psxpad_command(pad, sizeof(PSX_CMD_ENABLE_MOTOR));
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"%s: failed to enable motor mode: %d\n",
+			__func__, err);
+		return;
+	}
+
+	memcpy(pad->sendbuf, PSX_CMD_EXIT_CFG, sizeof(PSX_CMD_EXIT_CFG));
+	err = psxpad_command(pad, sizeof(PSX_CMD_EXIT_CFG));
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"%s: failed to exit config mode: %d\n",
+			__func__, err);
+		return;
+	}
+}
+
+static void psxpad_set_motor_level(struct psxpad *pad,
+				   u8 motor1level, u8 motor2level)
+{
+	pad->motor1level = motor1level ? 0xFF : 0x00;
+	pad->motor2level = REVERSE_BIT(motor2level);
+}
+
+static int psxpad_spi_play_effect(struct input_dev *idev,
+				  void *data, struct ff_effect *effect)
+{
+	struct input_polled_dev *pdev = input_get_drvdata(idev);
+	struct psxpad *pad = pdev->private;
+
+	switch (effect->type) {
+	case FF_RUMBLE:
+		psxpad_set_motor_level(pad,
+			(effect->u.rumble.weak_magnitude >> 8) & 0xFFU,
+			(effect->u.rumble.strong_magnitude >> 8) & 0xFFU);
+		break;
+	}
+
+	return 0;
+}
+
+static int psxpad_spi_init_ff(struct psxpad *pad)
+{
+	int err;
+
+	input_set_capability(pad->pdev->input, EV_FF, FF_RUMBLE);
+
+	err = input_ff_create_memless(pad->pdev->input, NULL,
+				      psxpad_spi_play_effect);
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"input_ff_create_memless() failed: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+#else	/* CONFIG_JOYSTICK_PSXPAD_SPI_FF */
+
+static void psxpad_control_motor(struct psxpad *pad,
+				 bool motor1enable, bool motor2enable)
+{
+}
+
+static void psxpad_set_motor_level(struct psxpad *pad,
+				   u8 motor1level, u8 motor2level)
+{
+}
+
+static inline int psxpad_spi_init_ff(struct psxpad *pad)
+{
+	return 0;
+}
+#endif	/* CONFIG_JOYSTICK_PSXPAD_SPI_FF */
+
+static void psxpad_spi_poll_open(struct input_polled_dev *pdev)
+{
+	struct psxpad *pad = pdev->private;
+
+	pm_runtime_get_sync(&pad->spi->dev);
+}
+
+static void psxpad_spi_poll_close(struct input_polled_dev *pdev)
+{
+	struct psxpad *pad = pdev->private;
+
+	pm_runtime_put_sync(&pad->spi->dev);
+}
+
+static void psxpad_spi_poll(struct input_polled_dev *pdev)
+{
+	struct psxpad *pad = pdev->private;
+	struct input_dev *input = pdev->input;
+	u8 b_rsp3, b_rsp4;
+	int err;
+
+	psxpad_control_motor(pad, true, true);
+
+	memcpy(pad->sendbuf, PSX_CMD_POLL, sizeof(PSX_CMD_POLL));
+	pad->sendbuf[3] = pad->motor1enable ? pad->motor1level : 0x00;
+	pad->sendbuf[4] = pad->motor2enable ? pad->motor2level : 0x00;
+	err = psxpad_command(pad, sizeof(PSX_CMD_POLL));
+	if (err) {
+		dev_err(&pad->spi->dev,
+			"%s: poll command failed mode: %d\n", __func__, err);
+		return;
+	}
+
+	switch (pad->response[1]) {
+	case 0xCE:	/* 0x73 : analog 1 */
+		/* button data is inverted */
+		b_rsp3 = ~pad->response[3];
+		b_rsp4 = ~pad->response[4];
+
+		input_report_abs(input, ABS_X, REVERSE_BIT(pad->response[7]));
+		input_report_abs(input, ABS_Y, REVERSE_BIT(pad->response[8]));
+		input_report_abs(input, ABS_RX, REVERSE_BIT(pad->response[5]));
+		input_report_abs(input, ABS_RY, REVERSE_BIT(pad->response[6]));
+		input_report_key(input, BTN_DPAD_UP, b_rsp3 & BIT(3));
+		input_report_key(input, BTN_DPAD_DOWN, b_rsp3 & BIT(1));
+		input_report_key(input, BTN_DPAD_LEFT, b_rsp3 & BIT(0));
+		input_report_key(input, BTN_DPAD_RIGHT, b_rsp3 & BIT(2));
+		input_report_key(input, BTN_X, b_rsp4 & BIT(3));
+		input_report_key(input, BTN_A, b_rsp4 & BIT(2));
+		input_report_key(input, BTN_B, b_rsp4 & BIT(1));
+		input_report_key(input, BTN_Y, b_rsp4 & BIT(0));
+		input_report_key(input, BTN_TL, b_rsp4 & BIT(5));
+		input_report_key(input, BTN_TR, b_rsp4 & BIT(4));
+		input_report_key(input, BTN_TL2, b_rsp4 & BIT(7));
+		input_report_key(input, BTN_TR2, b_rsp4 & BIT(6));
+		input_report_key(input, BTN_THUMBL, b_rsp3 & BIT(6));
+		input_report_key(input, BTN_THUMBR, b_rsp3 & BIT(5));
+		input_report_key(input, BTN_SELECT, b_rsp3 & BIT(7));
+		input_report_key(input, BTN_START, b_rsp3 & BIT(4));
+		break;
+
+	case 0x82:	/* 0x41 : digital */
+		/* button data is inverted */
+		b_rsp3 = ~pad->response[3];
+		b_rsp4 = ~pad->response[4];
+
+		input_report_abs(input, ABS_X, 0x80);
+		input_report_abs(input, ABS_Y, 0x80);
+		input_report_abs(input, ABS_RX, 0x80);
+		input_report_abs(input, ABS_RY, 0x80);
+		input_report_key(input, BTN_DPAD_UP, b_rsp3 & BIT(3));
+		input_report_key(input, BTN_DPAD_DOWN, b_rsp3 & BIT(1));
+		input_report_key(input, BTN_DPAD_LEFT, b_rsp3 & BIT(0));
+		input_report_key(input, BTN_DPAD_RIGHT, b_rsp3 & BIT(2));
+		input_report_key(input, BTN_X, b_rsp4 & BIT(3));
+		input_report_key(input, BTN_A, b_rsp4 & BIT(2));
+		input_report_key(input, BTN_B, b_rsp4 & BIT(1));
+		input_report_key(input, BTN_Y, b_rsp4 & BIT(0));
+		input_report_key(input, BTN_TL, b_rsp4 & BIT(5));
+		input_report_key(input, BTN_TR, b_rsp4 & BIT(4));
+		input_report_key(input, BTN_TL2, b_rsp4 & BIT(7));
+		input_report_key(input, BTN_TR2, b_rsp4 & BIT(6));
+		input_report_key(input, BTN_THUMBL, false);
+		input_report_key(input, BTN_THUMBR, false);
+		input_report_key(input, BTN_SELECT, b_rsp3 & BIT(7));
+		input_report_key(input, BTN_START, b_rsp3 & BIT(4));
+		break;
+	}
+
+	input_sync(input);
+}
+
+static int psxpad_spi_probe(struct spi_device *spi)
+{
+	struct psxpad *pad;
+	struct input_polled_dev *pdev;
+	struct input_dev *idev;
+	int err;
+
+	pad = devm_kzalloc(&spi->dev, sizeof(struct psxpad), GFP_KERNEL);
+	if (!pad)
+		return -ENOMEM;
+
+	pdev = input_allocate_polled_device();
+	if (!pdev) {
+		dev_err(&spi->dev, "failed to allocate input device\n");
+		return -ENOMEM;
+	}
+
+	/* input poll device settings */
+	pad->pdev = pdev;
+	pad->spi = spi;
+
+	pdev->private = pad;
+	pdev->open = psxpad_spi_poll_open;
+	pdev->close = psxpad_spi_poll_close;
+	pdev->poll = psxpad_spi_poll;
+	/* poll interval is about 60fps */
+	pdev->poll_interval = 16;
+	pdev->poll_interval_min = 8;
+	pdev->poll_interval_max = 32;
+
+	/* input device settings */
+	idev = pdev->input;
+	idev->name = "PlayStation 1/2 joypad";
+	snprintf(pad->phys, sizeof(pad->phys), "%s/input", dev_name(&spi->dev));
+	idev->id.bustype = BUS_SPI;
+
+	/* key/value map settings */
+	input_set_abs_params(idev, ABS_X, 0, 255, 0, 0);
+	input_set_abs_params(idev, ABS_Y, 0, 255, 0, 0);
+	input_set_abs_params(idev, ABS_RX, 0, 255, 0, 0);
+	input_set_abs_params(idev, ABS_RY, 0, 255, 0, 0);
+	input_set_capability(idev, EV_KEY, BTN_DPAD_UP);
+	input_set_capability(idev, EV_KEY, BTN_DPAD_DOWN);
+	input_set_capability(idev, EV_KEY, BTN_DPAD_LEFT);
+	input_set_capability(idev, EV_KEY, BTN_DPAD_RIGHT);
+	input_set_capability(idev, EV_KEY, BTN_A);
+	input_set_capability(idev, EV_KEY, BTN_B);
+	input_set_capability(idev, EV_KEY, BTN_X);
+	input_set_capability(idev, EV_KEY, BTN_Y);
+	input_set_capability(idev, EV_KEY, BTN_TL);
+	input_set_capability(idev, EV_KEY, BTN_TR);
+	input_set_capability(idev, EV_KEY, BTN_TL2);
+	input_set_capability(idev, EV_KEY, BTN_TR2);
+	input_set_capability(idev, EV_KEY, BTN_THUMBL);
+	input_set_capability(idev, EV_KEY, BTN_THUMBR);
+	input_set_capability(idev, EV_KEY, BTN_SELECT);
+	input_set_capability(idev, EV_KEY, BTN_START);
+
+	err = psxpad_spi_init_ff(pad);
+	if (err)
+		return err;
+
+	/* SPI settings */
+	spi->mode = SPI_MODE_3;
+	spi->bits_per_word = 8;
+	/* (PlayStation 1/2 joypad might be possible works 250kHz/500kHz) */
+	spi->master->min_speed_hz = 125000;
+	spi->master->max_speed_hz = 125000;
+	spi_setup(spi);
+
+	/* pad settings */
+	psxpad_set_motor_level(pad, 0, 0);
+
+	/* register input poll device */
+	err = input_register_polled_device(pdev);
+	if (err) {
+		dev_err(&spi->dev,
+			"failed to register input poll device: %d\n", err);
+		return err;
+	}
+
+	pm_runtime_enable(&spi->dev);
+
+	return 0;
+}
+
+static int __maybe_unused psxpad_spi_suspend(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct psxpad *pad = spi_get_drvdata(spi);
+
+	psxpad_set_motor_level(pad, 0, 0);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(psxpad_spi_pm, psxpad_spi_suspend, NULL);
+
+static const struct spi_device_id psxpad_spi_id[] = {
+	{ "psxpad-spi", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, psxpad_spi_id);
+
+static struct spi_driver psxpad_spi_driver = {
+	.driver = {
+		.name = "psxpad-spi",
+		.pm = &psxpad_spi_pm,
+	},
+	.id_table = psxpad_spi_id,
+	.probe   = psxpad_spi_probe,
+};
+
+module_spi_driver(psxpad_spi_driver);
+
+MODULE_AUTHOR("Tomohiro Yoshidomi <sylph23k@gmail.com>");
+MODULE_DESCRIPTION("PlayStation 1/2 joypads via SPI interface Driver");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 7db789d08cb23aeedb4d6392c2994ce387067021 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 3 May 2017 14:18:24 +0200
Subject: mtd: nand: gpio: update binding

This patch updates the binding documentation in accordance with
commit 44dd182861f99 ("mtd: nand: gpio: make nCE GPIO optional")

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reported-by: Brian Norris <computersforpeace@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 Documentation/devicetree/bindings/mtd/gpio-control-nand.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
index af8915b41ccf..486a17d533d7 100644
--- a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
@@ -12,7 +12,7 @@ Required properties:
 - #address-cells, #size-cells : Must be present if the device has sub-nodes
   representing partitions.
 - gpios : Specifies the GPIO pins to control the NAND device.  The order of
-  GPIO references is:  RDY, nCE, ALE, CLE, and an optional nWP.
+  GPIO references is:  RDY, nCE, ALE, CLE, and nWP. nCE and nWP are optional.
 
 Optional properties:
 - bank-width : Width (in bytes) of the device.  If not present, the width
@@ -36,7 +36,7 @@ gpio-nand@1,0 {
 	#address-cells = <1>;
 	#size-cells = <1>;
 	gpios = <&banka 1 0>,	/* RDY */
-		<&banka 2 0>, 	/* nCE */
+		<0>, 		/* nCE */
 		<&banka 3 0>, 	/* ALE */
 		<&banka 4 0>, 	/* CLE */
 		<0>;		/* nWP */
-- 
cgit v1.2.3


From a9402889f41cc2db7a9b162990bef271be098ff0 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 3 May 2017 08:48:55 +0200
Subject: MAINTAINERS: Update NAND subsystem git repositories

NAND branches are now hosted on MTD repos, nand/next is on l2-mtd and
nand/fixes will be on linux-mtd.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
[Brian: added branch names]
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 MAINTAINERS | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 732050a0c3be..597f52c4d21d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8228,8 +8228,8 @@ M:	Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
-T:	git git://git.infradead.org/linux-mtd.git
-T:	git git://git.infradead.org/l2-mtd.git
+T:	git git://git.infradead.org/linux-mtd.git master
+T:	git git://git.infradead.org/l2-mtd.git master
 S:	Maintained
 F:	Documentation/devicetree/bindings/mtd/
 F:	drivers/mtd/
@@ -8605,7 +8605,8 @@ R:	Richard Weinberger <richard@nod.at>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
-T:	git git://github.com/linux-nand/linux.git
+T:	git git://git.infradead.org/linux-mtd.git nand/fixes
+T:	git git://git.infradead.org/l2-mtd.git nand/next
 S:	Maintained
 F:	drivers/mtd/nand/
 F:	include/linux/mtd/nand*.h
-- 
cgit v1.2.3


From 3ae3d67ba705c754a3c91ac009f9ce73a0e7286a Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Wed, 10 May 2017 15:01:30 -0600
Subject: libnvdimm: add an atomic vs process context flag to rw_bytes

nsio_rw_bytes can clear media errors, but this cannot be done while we
are in an atomic context due to locking within ACPI. From the BTT,
->rw_bytes may be called either from atomic or process context depending
on whether the calls happen during initialization or during IO.

During init, we want to ensure error clearing happens, and the flag
marking process context allows nsio_rw_bytes to do that. When called
during IO, we're in atomic context, and error clearing can be skipped.

Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/blk.c      |  3 ++-
 drivers/nvdimm/btt.c      | 67 +++++++++++++++++++++++++----------------------
 drivers/nvdimm/btt_devs.c |  2 +-
 drivers/nvdimm/claim.c    |  6 +++--
 drivers/nvdimm/nd.h       |  1 +
 drivers/nvdimm/pfn_devs.c |  4 +--
 include/linux/nd.h        | 12 +++++----
 7 files changed, 53 insertions(+), 42 deletions(-)

diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 9faaa9694d87..822198a75e96 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -218,7 +218,8 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *iobuf, size_t n, int rw)
+		resource_size_t offset, void *iobuf, size_t n, int rw,
+		unsigned long flags)
 {
 	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev);
 	struct nd_blk_region *ndbr = to_ndbr(nsblk);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 368795aad5c9..aa977cd4869d 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -32,25 +32,25 @@ enum log_ent_request {
 };
 
 static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
-		void *buf, size_t n)
+		void *buf, size_t n, unsigned long flags)
 {
 	struct nd_btt *nd_btt = arena->nd_btt;
 	struct nd_namespace_common *ndns = nd_btt->ndns;
 
 	/* arena offsets are 4K from the base of the device */
 	offset += SZ_4K;
-	return nvdimm_read_bytes(ndns, offset, buf, n);
+	return nvdimm_read_bytes(ndns, offset, buf, n, flags);
 }
 
 static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
-		void *buf, size_t n)
+		void *buf, size_t n, unsigned long flags)
 {
 	struct nd_btt *nd_btt = arena->nd_btt;
 	struct nd_namespace_common *ndns = nd_btt->ndns;
 
 	/* arena offsets are 4K from the base of the device */
 	offset += SZ_4K;
-	return nvdimm_write_bytes(ndns, offset, buf, n);
+	return nvdimm_write_bytes(ndns, offset, buf, n, flags);
 }
 
 static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
@@ -58,19 +58,19 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
 	int ret;
 
 	ret = arena_write_bytes(arena, arena->info2off, super,
-			sizeof(struct btt_sb));
+			sizeof(struct btt_sb), 0);
 	if (ret)
 		return ret;
 
 	return arena_write_bytes(arena, arena->infooff, super,
-			sizeof(struct btt_sb));
+			sizeof(struct btt_sb), 0);
 }
 
 static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
 {
 	WARN_ON(!super);
 	return arena_read_bytes(arena, arena->infooff, super,
-			sizeof(struct btt_sb));
+			sizeof(struct btt_sb), 0);
 }
 
 /*
@@ -79,16 +79,17 @@ static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
  *   mapping is in little-endian
  *   mapping contains 'E' and 'Z' flags as desired
  */
-static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping)
+static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping,
+		unsigned long flags)
 {
 	u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
 
 	WARN_ON(lba >= arena->external_nlba);
-	return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE);
+	return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags);
 }
 
 static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
-			u32 z_flag, u32 e_flag)
+			u32 z_flag, u32 e_flag, unsigned long rwb_flags)
 {
 	u32 ze;
 	__le32 mapping_le;
@@ -127,11 +128,11 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
 	}
 
 	mapping_le = cpu_to_le32(mapping);
-	return __btt_map_write(arena, lba, mapping_le);
+	return __btt_map_write(arena, lba, mapping_le, rwb_flags);
 }
 
 static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
-			int *trim, int *error)
+			int *trim, int *error, unsigned long rwb_flags)
 {
 	int ret;
 	__le32 in;
@@ -140,7 +141,7 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
 
 	WARN_ON(lba >= arena->external_nlba);
 
-	ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE);
+	ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags);
 	if (ret)
 		return ret;
 
@@ -189,7 +190,7 @@ static int btt_log_read_pair(struct arena_info *arena, u32 lane,
 	WARN_ON(!ent);
 	return arena_read_bytes(arena,
 			arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
-			2 * LOG_ENT_SIZE);
+			2 * LOG_ENT_SIZE, 0);
 }
 
 static struct dentry *debugfs_root;
@@ -335,7 +336,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
  * btt_flog_write is the wrapper for updating the freelist elements
  */
 static int __btt_log_write(struct arena_info *arena, u32 lane,
-			u32 sub, struct log_entry *ent)
+			u32 sub, struct log_entry *ent, unsigned long flags)
 {
 	int ret;
 	/*
@@ -350,13 +351,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
 	void *src = ent;
 
 	/* split the 16B write into atomic, durable halves */
-	ret = arena_write_bytes(arena, ns_off, src, log_half);
+	ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
 	if (ret)
 		return ret;
 
 	ns_off += log_half;
 	src += log_half;
-	return arena_write_bytes(arena, ns_off, src, log_half);
+	return arena_write_bytes(arena, ns_off, src, log_half, flags);
 }
 
 static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
@@ -364,7 +365,7 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
 {
 	int ret;
 
-	ret = __btt_log_write(arena, lane, sub, ent);
+	ret = __btt_log_write(arena, lane, sub, ent, NVDIMM_IO_ATOMIC);
 	if (ret)
 		return ret;
 
@@ -397,7 +398,7 @@ static int btt_map_init(struct arena_info *arena)
 		size_t size = min(mapsize, chunk_size);
 
 		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
-				size);
+				size, 0);
 		if (ret)
 			goto free;
 
@@ -428,10 +429,10 @@ static int btt_log_init(struct arena_info *arena)
 		log.old_map = cpu_to_le32(arena->external_nlba + i);
 		log.new_map = cpu_to_le32(arena->external_nlba + i);
 		log.seq = cpu_to_le32(LOG_SEQ_INIT);
-		ret = __btt_log_write(arena, i, 0, &log);
+		ret = __btt_log_write(arena, i, 0, &log, 0);
 		if (ret)
 			return ret;
-		ret = __btt_log_write(arena, i, 1, &zerolog);
+		ret = __btt_log_write(arena, i, 1, &zerolog, 0);
 		if (ret)
 			return ret;
 	}
@@ -470,7 +471,7 @@ static int btt_freelist_init(struct arena_info *arena)
 
 		/* Check if map recovery is needed */
 		ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry,
-				NULL, NULL);
+				NULL, NULL, 0);
 		if (ret)
 			return ret;
 		if ((le32_to_cpu(log_new.new_map) != map_entry) &&
@@ -480,7 +481,7 @@ static int btt_freelist_init(struct arena_info *arena)
 			 * to complete the map write. So fix up the map.
 			 */
 			ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
-					le32_to_cpu(log_new.new_map), 0, 0);
+					le32_to_cpu(log_new.new_map), 0, 0, 0);
 			if (ret)
 				return ret;
 		}
@@ -875,7 +876,7 @@ static int btt_data_read(struct arena_info *arena, struct page *page,
 	u64 nsoff = to_namespace_offset(arena, lba);
 	void *mem = kmap_atomic(page);
 
-	ret = arena_read_bytes(arena, nsoff, mem + off, len);
+	ret = arena_read_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
 	kunmap_atomic(mem);
 
 	return ret;
@@ -888,7 +889,7 @@ static int btt_data_write(struct arena_info *arena, u32 lba,
 	u64 nsoff = to_namespace_offset(arena, lba);
 	void *mem = kmap_atomic(page);
 
-	ret = arena_write_bytes(arena, nsoff, mem + off, len);
+	ret = arena_write_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
 	kunmap_atomic(mem);
 
 	return ret;
@@ -931,10 +932,12 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
 		mem = kmap_atomic(bv.bv_page);
 		if (rw)
 			ret = arena_write_bytes(arena, meta_nsoff,
-					mem + bv.bv_offset, cur_len);
+					mem + bv.bv_offset, cur_len,
+					NVDIMM_IO_ATOMIC);
 		else
 			ret = arena_read_bytes(arena, meta_nsoff,
-					mem + bv.bv_offset, cur_len);
+					mem + bv.bv_offset, cur_len,
+					NVDIMM_IO_ATOMIC);
 
 		kunmap_atomic(mem);
 		if (ret)
@@ -976,7 +979,8 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
 
 		cur_len = min(btt->sector_size, len);
 
-		ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag);
+		ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag,
+				NVDIMM_IO_ATOMIC);
 		if (ret)
 			goto out_lane;
 
@@ -1006,7 +1010,7 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
 			barrier();
 
 			ret = btt_map_read(arena, premap, &new_map, &t_flag,
-						&e_flag);
+						&e_flag, NVDIMM_IO_ATOMIC);
 			if (ret)
 				goto out_rtt;
 
@@ -1093,7 +1097,8 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
 		}
 
 		lock_map(arena, premap);
-		ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL);
+		ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL,
+				NVDIMM_IO_ATOMIC);
 		if (ret)
 			goto out_map;
 		if (old_postmap >= arena->internal_nlba) {
@@ -1110,7 +1115,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
 		if (ret)
 			goto out_map;
 
-		ret = btt_map_write(arena, premap, new_postmap, 0, 0);
+		ret = btt_map_write(arena, premap, new_postmap, 0, 0, 0);
 		if (ret)
 			goto out_map;
 
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 4b76af2b8715..ae00dc0d9791 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -273,7 +273,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt,
 	if (!btt_sb || !ndns || !nd_btt)
 		return -ENODEV;
 
-	if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb)))
+	if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
 		return -ENXIO;
 
 	if (nvdimm_namespace_capacity(ndns) < SZ_16M)
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 93d128da1c92..7ceb5fa4f2a1 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -228,7 +228,8 @@ u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb)
 EXPORT_SYMBOL(nd_sb_checksum);
 
 static int nsio_rw_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *buf, size_t size, int rw)
+		resource_size_t offset, void *buf, size_t size, int rw,
+		unsigned long flags)
 {
 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
 	unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
@@ -259,7 +260,8 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 		 * work around this collision.
 		 */
 		if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
-				&& (!ndns->claim || !is_nd_btt(ndns->claim))) {
+				&& !(flags & NVDIMM_IO_ATOMIC)
+				&& !ndns->claim) {
 			long cleared;
 
 			cleared = nvdimm_clear_poison(&ndns->dev,
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 77d032192bf7..03852d738eec 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -31,6 +31,7 @@ enum {
 	ND_MAX_LANES = 256,
 	SECTOR_SHIFT = 9,
 	INT_LBASIZE_ALIGNMENT = 64,
+	NVDIMM_IO_ATOMIC = 1,
 };
 
 struct nd_poison {
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 335c8175410b..a6c403600d19 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -357,7 +357,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 	if (!is_nd_pmem(nd_pfn->dev.parent))
 		return -ENODEV;
 
-	if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
+	if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0))
 		return -ENXIO;
 
 	if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0)
@@ -662,7 +662,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
 	checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
 	pfn_sb->checksum = cpu_to_le64(checksum);
 
-	return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
+	return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0);
 }
 
 /*
diff --git a/include/linux/nd.h b/include/linux/nd.h
index fa66aeed441a..194b8e002ea7 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -48,7 +48,7 @@ struct nd_namespace_common {
 	struct device dev;
 	struct device *claim;
 	int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset,
-			void *buf, size_t size, int rw);
+			void *buf, size_t size, int rw, unsigned long flags);
 };
 
 static inline struct nd_namespace_common *to_ndns(struct device *dev)
@@ -134,9 +134,10 @@ static inline struct nd_namespace_blk *to_nd_namespace_blk(const struct device *
  * @buf is up-to-date upon return from this routine.
  */
 static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *buf, size_t size)
+		resource_size_t offset, void *buf, size_t size,
+		unsigned long flags)
 {
-	return ndns->rw_bytes(ndns, offset, buf, size, READ);
+	return ndns->rw_bytes(ndns, offset, buf, size, READ, flags);
 }
 
 /**
@@ -152,9 +153,10 @@ static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns,
  * to media is handled internal to the @ndns driver, if at all.
  */
 static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *buf, size_t size)
+		resource_size_t offset, void *buf, size_t size,
+		unsigned long flags)
 {
-	return ndns->rw_bytes(ndns, offset, buf, size, WRITE);
+	return ndns->rw_bytes(ndns, offset, buf, size, WRITE, flags);
 }
 
 #define MODULE_ALIAS_ND_DEVICE(type) \
-- 
cgit v1.2.3


From b177fe85dd27de1ee4c29f59c4e82b3ea3b78784 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Wed, 10 May 2017 15:01:31 -0600
Subject: libnvdimm, btt: ensure that initializing metadata clears poison

If we had badblocks/poison in the metadata area of a BTT, recreating the
BTT would not clear the poison in all cases, notably the flog area. This
is because rw_bytes will only clear errors if the request being sent
down is 512B aligned and sized.

Make sure that when writing the map and info blocks, the rw_bytes being
sent are of the correct size/alignment. For the flog, instead of doing
the smaller log_entry writes only, first do a 'wipe' of the entire area
by writing zeroes in large enough chunks so that errors get cleared.

Cc: Andy Rudoff <andy.rudoff@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/btt.c | 54 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index aa977cd4869d..983718b8fd9b 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -57,6 +57,14 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
 {
 	int ret;
 
+	/*
+	 * infooff and info2off should always be at least 512B aligned.
+	 * We rely on that to make sure rw_bytes does error clearing
+	 * correctly, so make sure that is the case.
+	 */
+	WARN_ON_ONCE(!IS_ALIGNED(arena->infooff, 512));
+	WARN_ON_ONCE(!IS_ALIGNED(arena->info2off, 512));
+
 	ret = arena_write_bytes(arena, arena->info2off, super,
 			sizeof(struct btt_sb), 0);
 	if (ret)
@@ -394,9 +402,17 @@ static int btt_map_init(struct arena_info *arena)
 	if (!zerobuf)
 		return -ENOMEM;
 
+	/*
+	 * mapoff should always be at least 512B  aligned. We rely on that to
+	 * make sure rw_bytes does error clearing correctly, so make sure that
+	 * is the case.
+	 */
+	WARN_ON_ONCE(!IS_ALIGNED(arena->mapoff, 512));
+
 	while (mapsize) {
 		size_t size = min(mapsize, chunk_size);
 
+		WARN_ON_ONCE(size < 512);
 		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
 				size, 0);
 		if (ret)
@@ -418,11 +434,36 @@ static int btt_map_init(struct arena_info *arena)
  */
 static int btt_log_init(struct arena_info *arena)
 {
+	size_t logsize = arena->info2off - arena->logoff;
+	size_t chunk_size = SZ_4K, offset = 0;
+	struct log_entry log;
+	void *zerobuf;
 	int ret;
 	u32 i;
-	struct log_entry log, zerolog;
 
-	memset(&zerolog, 0, sizeof(zerolog));
+	zerobuf = kzalloc(chunk_size, GFP_KERNEL);
+	if (!zerobuf)
+		return -ENOMEM;
+	/*
+	 * logoff should always be at least 512B  aligned. We rely on that to
+	 * make sure rw_bytes does error clearing correctly, so make sure that
+	 * is the case.
+	 */
+	WARN_ON_ONCE(!IS_ALIGNED(arena->logoff, 512));
+
+	while (logsize) {
+		size_t size = min(logsize, chunk_size);
+
+		WARN_ON_ONCE(size < 512);
+		ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
+				size, 0);
+		if (ret)
+			goto free;
+
+		offset += size;
+		logsize -= size;
+		cond_resched();
+	}
 
 	for (i = 0; i < arena->nfree; i++) {
 		log.lba = cpu_to_le32(i);
@@ -431,13 +472,12 @@ static int btt_log_init(struct arena_info *arena)
 		log.seq = cpu_to_le32(LOG_SEQ_INIT);
 		ret = __btt_log_write(arena, i, 0, &log, 0);
 		if (ret)
-			return ret;
-		ret = __btt_log_write(arena, i, 1, &zerolog, 0);
-		if (ret)
-			return ret;
+			goto free;
 	}
 
-	return 0;
+ free:
+	kfree(zerobuf);
+	return ret;
 }
 
 static int btt_freelist_init(struct arena_info *arena)
-- 
cgit v1.2.3


From e84b83b9ee2187817cf895471675f1ccdf64cd53 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 10 May 2017 19:38:13 -0700
Subject: filesystem-dax: fix broken __dax_zero_page_range() conversion

The conversion of __dax_zero_page_range() to 'struct dax_operations'
caused it to frequently fail. The mistake was treating the @size
parameter as a dax mapping length rather than just a length of the
clear_pmem() operation. The dax mapping length is assumed to be hard
coded as PAGE_SIZE.

Without this fix any page unaligned zeroing request will trigger a
-EINVAL return from bdev_dax_pgoff().

Cc: Jan Kara <jack@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Fixes: cccbce671582 ("filesystem-dax: convert to dax_direct_access()")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index ce9dc9c3e829..5ee1d212d81f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -971,12 +971,12 @@ int __dax_zero_page_range(struct block_device *bdev,
 		void *kaddr;
 		pfn_t pfn;
 
-		rc = bdev_dax_pgoff(bdev, sector, size, &pgoff);
+		rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
 		if (rc)
 			return rc;
 
 		id = dax_read_lock();
-		rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr,
+		rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr,
 				&pfn);
 		if (rc < 0) {
 			dax_read_unlock(id);
-- 
cgit v1.2.3


From 984a9d4c40bed351a92ed31f0723a710444295da Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 11 May 2017 00:23:08 -0700
Subject: Revert "target: Fix VERIFY and WRITE VERIFY command parsing"

This reverts commit 0e2eb7d12eaa8e391bf5615d4271bb87a649caaa

  Author: Bart Van Assche <bart.vanassche@sandisk.com>
  Date:   Thu Mar 30 10:12:39 2017 -0700

      target: Fix VERIFY and WRITE VERIFY command parsing

This patch broke existing behaviour for WRITE_VERIFY because
it dropped the original SCF_SCSI_DATA_CDB assignment for
bytchk = 0 so target_cmd_size_check() no longer rejected
this case, allowing an overflow case to trigger an OOPs
in iscsi-target.

Since the short term and long term fixes are still being
discussed, revert it for now since it's late in the merge
window and try again in v4.13-rc1.

Conflicts:
	drivers/target/target_core_sbc.c

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c | 74 ++++++----------------------------------
 1 file changed, 10 insertions(+), 64 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index a0ad618f1b1a..4316f7b65fb7 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -831,60 +831,6 @@ sbc_check_dpofua(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb)
 	return 0;
 }
 
-/**
- * sbc_parse_verify - parse VERIFY, VERIFY_16 and WRITE VERIFY commands
- * @cmd:     (in)  structure that describes the SCSI command to be parsed.
- * @sectors: (out) Number of logical blocks on the storage medium that will be
- *           affected by the SCSI command.
- * @bufflen: (out) Expected length of the SCSI Data-Out buffer.
- */
-static sense_reason_t sbc_parse_verify(struct se_cmd *cmd, int *sectors,
-				       u32 *bufflen)
-{
-	struct se_device *dev = cmd->se_dev;
-	u8 *cdb = cmd->t_task_cdb;
-	u8 bytchk = (cdb[1] >> 1) & 3;
-	sense_reason_t ret;
-
-	switch (cdb[0]) {
-	case VERIFY:
-	case WRITE_VERIFY:
-		*sectors = transport_get_sectors_10(cdb);
-		cmd->t_task_lba = transport_lba_32(cdb);
-		break;
-	case VERIFY_16:
-	case WRITE_VERIFY_16:
-		*sectors = transport_get_sectors_16(cdb);
-		cmd->t_task_lba = transport_lba_64(cdb);
-		break;
-	default:
-		WARN_ON_ONCE(true);
-		return TCM_UNSUPPORTED_SCSI_OPCODE;
-	}
-
-	if (sbc_check_dpofua(dev, cmd, cdb))
-		return TCM_INVALID_CDB_FIELD;
-
-	ret = sbc_check_prot(dev, cmd, cdb, *sectors, true);
-	if (ret)
-		return ret;
-
-	switch (bytchk) {
-	case 0:
-		*bufflen = 0;
-		break;
-	case 1:
-		*bufflen = sbc_get_size(cmd, *sectors);
-		cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-		break;
-	default:
-		pr_err("Unsupported BYTCHK value %d for SCSI opcode %#x\n",
-		       bytchk, cdb[0]);
-		return TCM_INVALID_CDB_FIELD;
-	}
-	return TCM_NO_SENSE;
-}
-
 sense_reason_t
 sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 {
@@ -952,6 +898,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		cmd->execute_cmd = sbc_execute_rw;
 		break;
 	case WRITE_10:
+	case WRITE_VERIFY:
 		sectors = transport_get_sectors_10(cdb);
 		cmd->t_task_lba = transport_lba_32(cdb);
 
@@ -965,13 +912,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
 		cmd->execute_cmd = sbc_execute_rw;
 		break;
-	case WRITE_VERIFY:
-	case WRITE_VERIFY_16:
-		ret = sbc_parse_verify(cmd, &sectors, &size);
-		if (ret)
-			return ret;
-		cmd->execute_cmd = sbc_execute_rw;
-		goto check_lba;
 	case WRITE_12:
 		sectors = transport_get_sectors_12(cdb);
 		cmd->t_task_lba = transport_lba_32(cdb);
@@ -987,6 +927,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		cmd->execute_cmd = sbc_execute_rw;
 		break;
 	case WRITE_16:
+	case WRITE_VERIFY_16:
 		sectors = transport_get_sectors_16(cdb);
 		cmd->t_task_lba = transport_lba_64(cdb);
 
@@ -1169,9 +1110,14 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		break;
 	case VERIFY:
 	case VERIFY_16:
-		ret = sbc_parse_verify(cmd, &sectors, &size);
-		if (ret)
-			return ret;
+		size = 0;
+		if (cdb[0] == VERIFY) {
+			sectors = transport_get_sectors_10(cdb);
+			cmd->t_task_lba = transport_lba_32(cdb);
+		} else {
+			sectors = transport_get_sectors_16(cdb);
+			cmd->t_task_lba = transport_lba_64(cdb);
+		}
 		cmd->execute_cmd = sbc_emulate_noop;
 		goto check_lba;
 	case REZERO_UNIT:
-- 
cgit v1.2.3


From e118c3fec9c0d8d2a96462c4c035305dc952e402 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Thu, 11 May 2017 11:40:16 +0200
Subject: nios2: remove custom early console implementation

As of commits d8f347ba35cf ("nios2: enable earlycon support"),
0dcc0542a006 ("serial: altera_jtaguart: add earlycon support") and
4d9d7d896d77 ("serial: altera_uart: add earlycon support"), the nios2
architecture and the altera_uart/altera_jtaguart drivers support
earlycon. Thus, the custom early console implementation for nios2 is no
longer necessary to get early boot messages. Remove it and rely fully on
earlycon support.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 arch/nios2/include/asm/prom.h    |  22 -------
 arch/nios2/include/asm/setup.h   |   2 -
 arch/nios2/kernel/Makefile       |   1 -
 arch/nios2/kernel/early_printk.c | 121 ---------------------------------------
 arch/nios2/kernel/prom.c         |  49 ----------------
 arch/nios2/kernel/setup.c        |   4 --
 6 files changed, 199 deletions(-)
 delete mode 100644 arch/nios2/include/asm/prom.h
 delete mode 100644 arch/nios2/kernel/early_printk.c

diff --git a/arch/nios2/include/asm/prom.h b/arch/nios2/include/asm/prom.h
deleted file mode 100644
index 75fffb42cfa5..000000000000
--- a/arch/nios2/include/asm/prom.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright Altera Corporation (C) <2015>. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ASM_NIOS2_PROM_H__
-#define __ASM_NIOS2_PROM_H__
-
-extern unsigned long __init of_early_console(void);
-
-#endif
diff --git a/arch/nios2/include/asm/setup.h b/arch/nios2/include/asm/setup.h
index dcbf8cf1a344..ac9bff248e6d 100644
--- a/arch/nios2/include/asm/setup.h
+++ b/arch/nios2/include/asm/setup.h
@@ -30,8 +30,6 @@ extern char fast_handler_end[];
 
 extern void pagetable_init(void);
 
-extern void setup_early_printk(void);
-
 #endif/* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/nios2/kernel/Makefile b/arch/nios2/kernel/Makefile
index 1aae25703657..06d07432b38d 100644
--- a/arch/nios2/kernel/Makefile
+++ b/arch/nios2/kernel/Makefile
@@ -20,7 +20,6 @@ obj-y	+= syscall_table.o
 obj-y	+= time.o
 obj-y	+= traps.o
 
-obj-$(CONFIG_EARLY_PRINTK)		+= early_printk.o
 obj-$(CONFIG_KGDB)			+= kgdb.o
 obj-$(CONFIG_MODULES)			+= module.o
 obj-$(CONFIG_NIOS2_ALIGNMENT_TRAP)	+= misaligned.o
diff --git a/arch/nios2/kernel/early_printk.c b/arch/nios2/kernel/early_printk.c
deleted file mode 100644
index 4a7bb98f744c..000000000000
--- a/arch/nios2/kernel/early_printk.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Early printk for Nios2.
- *
- * Copyright (C) 2015, Altera Corporation
- * Copyright (C) 2010, Tobias Klauser <tklauser@distanz.ch>
- * Copyright (C) 2009, Wind River Systems Inc
- *   Implemented by fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/console.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/io.h>
-
-#include <asm/prom.h>
-
-static unsigned long base_addr;
-
-#if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE)
-
-#define ALTERA_JTAGUART_DATA_REG		0
-#define ALTERA_JTAGUART_CONTROL_REG		4
-#define ALTERA_JTAGUART_CONTROL_WSPACE_MSK	0xFFFF0000
-#define ALTERA_JTAGUART_CONTROL_AC_MSK		0x00000400
-
-#define JUART_GET_CR() \
-	__builtin_ldwio((void *)(base_addr + ALTERA_JTAGUART_CONTROL_REG))
-#define JUART_SET_CR(v) \
-	__builtin_stwio((void *)(base_addr + ALTERA_JTAGUART_CONTROL_REG), v)
-#define JUART_SET_TX(v) \
-	__builtin_stwio((void *)(base_addr + ALTERA_JTAGUART_DATA_REG), v)
-
-static void early_console_write(struct console *con, const char *s, unsigned n)
-{
-	unsigned long status;
-
-	while (n-- && *s) {
-		while (((status = JUART_GET_CR())
-				& ALTERA_JTAGUART_CONTROL_WSPACE_MSK) == 0) {
-#if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE_BYPASS)
-			if ((status & ALTERA_JTAGUART_CONTROL_AC_MSK) == 0)
-				return;	/* no connection activity */
-#endif
-		}
-		JUART_SET_TX(*s);
-		s++;
-	}
-}
-
-#elif defined(CONFIG_SERIAL_ALTERA_UART_CONSOLE)
-
-#define ALTERA_UART_TXDATA_REG		4
-#define ALTERA_UART_STATUS_REG		8
-#define ALTERA_UART_STATUS_TRDY		0x0040
-
-#define UART_GET_SR() \
-	__builtin_ldwio((void *)(base_addr + ALTERA_UART_STATUS_REG))
-#define UART_SET_TX(v) \
-	__builtin_stwio((void *)(base_addr + ALTERA_UART_TXDATA_REG), v)
-
-static void early_console_putc(char c)
-{
-	while (!(UART_GET_SR() & ALTERA_UART_STATUS_TRDY))
-		;
-
-	UART_SET_TX(c);
-}
-
-static void early_console_write(struct console *con, const char *s, unsigned n)
-{
-	while (n-- && *s) {
-		early_console_putc(*s);
-		if (*s == '\n')
-			early_console_putc('\r');
-		s++;
-	}
-}
-
-#else
-
-static void early_console_write(struct console *con, const char *s, unsigned n)
-{
-}
-
-#endif
-
-static struct console early_console_prom = {
-	.name	= "early",
-	.write	= early_console_write,
-	.flags	= CON_PRINTBUFFER | CON_BOOT,
-	.index	= -1
-};
-
-void __init setup_early_printk(void)
-{
-#if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE) ||	\
-	defined(CONFIG_SERIAL_ALTERA_UART_CONSOLE)
-	base_addr = of_early_console();
-#else
-	base_addr = 0;
-#endif
-
-	if (!base_addr)
-		return;
-
-#if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE_BYPASS)
-	/* Clear activity bit so BYPASS doesn't stall if we've used JTAG for
-	 * downloading the kernel. This might cause early data to be lost even
-	 * if the JTAG terminal is running.
-	 */
-	JUART_SET_CR(JUART_GET_CR() | ALTERA_JTAGUART_CONTROL_AC_MSK);
-#endif
-
-	early_console = &early_console_prom;
-	register_console(early_console);
-	pr_info("early_console initialized at 0x%08lx\n", base_addr);
-}
diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c
index 3901b80d4420..6688576b3a47 100644
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c
@@ -30,7 +30,6 @@
 #include <linux/of_fdt.h>
 #include <linux/io.h>
 
-#include <asm/prom.h>
 #include <asm/sections.h>
 
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
@@ -71,51 +70,3 @@ void __init early_init_devtree(void *params)
 
 	early_init_dt_scan(params);
 }
-
-#ifdef CONFIG_EARLY_PRINTK
-static int __init early_init_dt_scan_serial(unsigned long node,
-			const char *uname, int depth, void *data)
-{
-	u64 *addr64 = (u64 *) data;
-	const char *p;
-
-	/* only consider serial nodes */
-	if (strncmp(uname, "serial", 6) != 0)
-		return 0;
-
-	p = of_get_flat_dt_prop(node, "compatible", NULL);
-	if (!p)
-		return 0;
-
-	/*
-	 * We found an altera_jtaguart but it wasn't configured for console, so
-	 * skip it.
-	 */
-#ifndef CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE
-	if (strncmp(p, "altr,juart", 10) == 0)
-		return 0;
-#endif
-
-	/*
-	 * Same for altera_uart.
-	 */
-#ifndef CONFIG_SERIAL_ALTERA_UART_CONSOLE
-	if (strncmp(p, "altr,uart", 9) == 0)
-		return 0;
-#endif
-
-	*addr64 = of_flat_dt_translate_address(node);
-
-	return *addr64 == OF_BAD_ADDR ? 0 : 1;
-}
-
-unsigned long __init of_early_console(void)
-{
-	u64 base = 0;
-
-	if (of_scan_flat_dt(early_init_dt_scan_serial, &base))
-		return (u32)ioremap(base, 32);
-	else
-		return 0;
-}
-#endif /* CONFIG_EARLY_PRINTK */
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index 8b5146082e3e..926a02b17b31 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -147,10 +147,6 @@ void __init setup_arch(char **cmdline_p)
 
 	console_verbose();
 
-#ifdef CONFIG_EARLY_PRINTK
-	setup_early_printk();
-#endif
-
 	memory_start = PAGE_ALIGN((unsigned long)__pa(_end));
 	memory_end = (unsigned long) CONFIG_NIOS2_MEM_BASE + memory_size;
 
-- 
cgit v1.2.3


From 03497d761c55438144fd63534d4223418fdfd345 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 27 Apr 2017 11:19:00 -0700
Subject: mm: Silence vmap() allocation failures based on caller gfp_flags

If the caller has set __GFP_NOWARN don't print the following message:
vmap allocation for size 15736832 failed: use vmalloc=<size> to increase
size.

This can happen with the ARM/Linux or ARM64/Linux module loader built
with CONFIG_ARM{,64}_MODULE_PLTS=y which does a first attempt at loading
a large module from module space, then falls back to vmalloc space.

Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 mm/vmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0b057628a7ba..b74f1d01ef76 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -521,7 +521,7 @@ overflow:
 		}
 	}
 
-	if (printk_ratelimit())
+	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
 		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
 			size);
 	kfree(va);
-- 
cgit v1.2.3


From 75d24d968af8913f641c612930c96acc5399e427 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 27 Apr 2017 11:19:01 -0700
Subject: ARM: Silence first allocation with CONFIG_ARM_MODULE_PLTS=y

When CONFIG_ARM_MODULE_PLTS is enabled, the first allocation using the
module space fails, because the module is too big, and then the module
allocation is attempted from vmalloc space. Silence the first allocation
failure in that case by setting __GFP_NOWARN.

Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/kernel/module.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 80254b47dc34..3ff571c2c71c 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -40,8 +40,15 @@
 #ifdef CONFIG_MMU
 void *module_alloc(unsigned long size)
 {
-	void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+	gfp_t gfp_mask = GFP_KERNEL;
+	void *p;
+
+	/* Silence the initial allocation */
+	if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
+		gfp_mask |= __GFP_NOWARN;
+
+	p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+				gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 	if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
 		return p;
-- 
cgit v1.2.3


From 0c2cf6d9487cb90be6ad7fac66044dfa8e8e5243 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 27 Apr 2017 11:19:02 -0700
Subject: arm64: Silence first allocation with CONFIG_ARM64_MODULE_PLTS=y

When CONFIG_ARM64_MODULE_PLTS is enabled, the first allocation using the
module space fails, because the module is too big, and then the module
allocation is attempted from vmalloc space. Silence the first allocation
failure in that case by setting __GFP_NOWARN.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/module.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index c9a2ab446dc6..f035ff6fb223 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -32,11 +32,16 @@
 
 void *module_alloc(unsigned long size)
 {
+	gfp_t gfp_mask = GFP_KERNEL;
 	void *p;
 
+	/* Silence the initial allocation */
+	if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+		gfp_mask |= __GFP_NOWARN;
+
 	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
 				module_alloc_base + MODULES_VSIZE,
-				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+				gfp_mask, PAGE_KERNEL_EXEC, 0,
 				NUMA_NO_NODE, __builtin_return_address(0));
 
 	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
-- 
cgit v1.2.3


From def9331a12977770cc6132d79f8e6565871e8e38 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 27 Apr 2017 07:01:20 +0200
Subject: x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running under Xen

When running as Xen pv guest X86_BUG_SYSRET_SS_ATTRS must not be set
on AMD cpus.

This bug/feature bit is kind of special as it will be used very early
when switching threads. Setting the bit and clearing it a little bit
later leaves a critical window where things can go wrong. This time
window has enlarged a little bit by using setup_clear_cpu_cap() instead
of the hypervisor's set_cpu_features callback. It seems this larger
window now makes it rather easy to hit the problem.

The proper solution is to never set the bit in case of Xen.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/kernel/cpu/amd.c   | 5 +++--
 arch/x86/xen/enlighten_pv.c | 1 -
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c36140d788fe..b6da6e75e3a8 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -799,8 +799,9 @@ static void init_amd(struct cpuinfo_x86 *c)
 		if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
 			set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
 
-	/* AMD CPUs don't reset SS attributes on SYSRET */
-	set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+	/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
+	if (!cpu_has(c, X86_FEATURE_XENPV))
+		set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index b5c54ab0e195..7cd442690f9d 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -290,7 +290,6 @@ static bool __init xen_check_xsave(void)
 
 static void __init xen_init_capabilities(void)
 {
-	setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS);
 	setup_force_cpu_cap(X86_FEATURE_XENPV);
 	setup_clear_cpu_cap(X86_FEATURE_DCA);
 	setup_clear_cpu_cap(X86_FEATURE_APERFMPERF);
-- 
cgit v1.2.3


From 69861e0a52f8733355ce246f0db15e1b240ad667 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 10 May 2017 06:08:44 +0200
Subject: xen: adjust early dom0 p2m handling to xen hypervisor behavior

When booted as pv-guest the p2m list presented by the Xen is already
mapped to virtual addresses. In dom0 case the hypervisor might make use
of 2M- or 1G-pages for this mapping. Unfortunately while being properly
aligned in virtual and machine address space, those pages might not be
aligned properly in guest physical address space.

So when trying to obtain the guest physical address of such a page
pud_pfn() and pmd_pfn() must be avoided as those will mask away guest
physical address bits not being zero in this special case.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/mmu_pv.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 9d9ae6650aa1..7397d8b8459d 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2025,7 +2025,8 @@ static unsigned long __init xen_read_phys_ulong(phys_addr_t addr)
 
 /*
  * Translate a virtual address to a physical one without relying on mapped
- * page tables.
+ * page tables. Don't rely on big pages being aligned in (guest) physical
+ * space!
  */
 static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
 {
@@ -2046,7 +2047,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
 						       sizeof(pud)));
 	if (!pud_present(pud))
 		return 0;
-	pa = pud_pfn(pud) << PAGE_SHIFT;
+	pa = pud_val(pud) & PTE_PFN_MASK;
 	if (pud_large(pud))
 		return pa + (vaddr & ~PUD_MASK);
 
@@ -2054,7 +2055,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
 						       sizeof(pmd)));
 	if (!pmd_present(pmd))
 		return 0;
-	pa = pmd_pfn(pmd) << PAGE_SHIFT;
+	pa = pmd_val(pmd) & PTE_PFN_MASK;
 	if (pmd_large(pmd))
 		return pa + (vaddr & ~PMD_MASK);
 
-- 
cgit v1.2.3


From ed6565e734249ef021d5c13ba34c167eb4e42f62 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 11 May 2017 12:34:38 +0200
Subject: block: handle partial completions for special payload requests

SCSI devices can return short writes on Write Same just like for normal
writes, so we need to handle this case for our special payload requests
as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Tested-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index c580b0138a7f..c7068520794b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2644,8 +2644,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		return false;
 	}
 
-	WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD);
-
 	req->__data_len -= total_bytes;
 
 	/* update sector only for requests with clear definition of sector */
@@ -2658,17 +2656,19 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
 	}
 
-	/*
-	 * If total number of sectors is less than the first segment
-	 * size, something has gone terribly wrong.
-	 */
-	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
-		blk_dump_rq_flags(req, "request botched");
-		req->__data_len = blk_rq_cur_bytes(req);
-	}
+	if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
+		/*
+		 * If total number of sectors is less than the first segment
+		 * size, something has gone terribly wrong.
+		 */
+		if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
+			blk_dump_rq_flags(req, "request botched");
+			req->__data_len = blk_rq_cur_bytes(req);
+		}
 
-	/* recalculate the number of segments */
-	blk_recalc_rq_segments(req);
+		/* recalculate the number of segments */
+		blk_recalc_rq_segments(req);
+	}
 
 	return true;
 }
-- 
cgit v1.2.3


From 25a0da73f24c95d57381bedd9cd89e4929b1bfde Mon Sep 17 00:00:00 2001
From: Michael Heimpold <michael.heimpold@i2se.com>
Date: Thu, 11 May 2017 10:13:44 +0200
Subject: doc: replace FTP URL to kernel.org with HTTPS one

FTP services were shutdown some weeks ago, so the FTP URL
does not work anymore. Fix this by replacing it with
corresponding HTTPS URL.

Signed-off-by: Michael Heimpold <michael.heimpold@i2se.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst
index 02f639aab06e..b96e80f79e85 100644
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@@ -362,7 +362,7 @@ If something goes wrong
    as is, otherwise you will have to use the ``ksymoops`` program to make
    sense of the dump (but compiling with CONFIG_KALLSYMS is usually preferred).
    This utility can be downloaded from
-   ftp://ftp.<country>.kernel.org/pub/linux/utils/kernel/ksymoops/ .
+   https://www.kernel.org/pub/linux/utils/kernel/ksymoops/ .
    Alternatively, you can do the dump lookup by hand:
 
  - In debugging dumps like the above, it helps enormously if you can
-- 
cgit v1.2.3


From f42c5707f9344a2e30128cde5ca2a02775e58994 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 1 May 2017 16:59:29 +1000
Subject: drm/nouveau/kms/nv50: remove pointless argument to window
 atomic_check_acquire()

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nv50_display.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index c9910c8537ed..b7dd5484e582 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -831,8 +831,7 @@ nv50_wndw_atomic_check_release(struct nv50_wndw *wndw,
 static int
 nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw,
 			       struct nv50_wndw_atom *asyw,
-			       struct nv50_head_atom *asyh,
-			       u32 pflip_flags)
+			       struct nv50_head_atom *asyh)
 {
 	struct nouveau_framebuffer *fb = nouveau_framebuffer(asyw->state.fb);
 	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
@@ -848,7 +847,10 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw,
 	asyw->image.h = fb->base.height;
 	asyw->image.kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8;
 
-	asyw->interval = pflip_flags & DRM_MODE_PAGE_FLIP_ASYNC ? 0 : 1;
+	if (asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)
+		asyw->interval = 0;
+	else
+		asyw->interval = 1;
 
 	if (asyw->image.kind) {
 		asyw->image.layout = 0;
@@ -887,7 +889,6 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
 	struct nv50_head_atom *harm = NULL, *asyh = NULL;
 	bool varm = false, asyv = false, asym = false;
 	int ret;
-	u32 pflip_flags = 0;
 
 	NV_ATOMIC(drm, "%s atomic_check\n", plane->name);
 	if (asyw->state.crtc) {
@@ -896,7 +897,6 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
 			return PTR_ERR(asyh);
 		asym = drm_atomic_crtc_needs_modeset(&asyh->state);
 		asyv = asyh->state.active;
-		pflip_flags = asyh->state.pageflip_flags;
 	}
 
 	if (armw->state.crtc) {
@@ -913,8 +913,7 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
 			asyw->set.point = true;
 
 		if (!varm || asym || armw->state.fb != asyw->state.fb) {
-			ret = nv50_wndw_atomic_check_acquire(
-					wndw, asyw, asyh, pflip_flags);
+			ret = nv50_wndw_atomic_check_acquire(wndw, asyw, asyh);
 			if (ret)
 				return ret;
 		}
-- 
cgit v1.2.3


From 36601c2b36e27435d9be33cfa092120ff69914eb Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 1 May 2017 16:52:03 +1000
Subject: drm/nouveau/kms/nv50: fix source-rect-only plane updates

This "optimisation" (which was originally meant to skip updating cursor
settings in the core channel on position-only updates) turned out to be
pointless in the final design of the code before it was merged.

Remove it completely, as it breaks other cases.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: stable@vger.kernel.org [4.10+]
---
 drivers/gpu/drm/nouveau/nv50_display.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index b7dd5484e582..84b9bb43d93a 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -912,11 +912,9 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
 		if (memcmp(&armw->point, &asyw->point, sizeof(asyw->point)))
 			asyw->set.point = true;
 
-		if (!varm || asym || armw->state.fb != asyw->state.fb) {
-			ret = nv50_wndw_atomic_check_acquire(wndw, asyw, asyh);
-			if (ret)
-				return ret;
-		}
+		ret = nv50_wndw_atomic_check_acquire(wndw, asyw, asyh);
+		if (ret)
+			return ret;
 	} else
 	if (varm) {
 		nv50_wndw_atomic_check_release(wndw, asyw, harm);
-- 
cgit v1.2.3


From e6db95799b1b870aae15682a6d0898df9e9dfb38 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 1 May 2017 16:53:40 +1000
Subject: drm/nouveau/kms/nv50: skip core channel cursor update on
 position-only changes

The DRM core used to only call prepare_fb/cleanup_fb() when a plane's
framebuffer changed, which achieved the desired effect.

It's apparently now up to the driver to decide on its own.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: stable@vger.kernel.org [4.11+]
---
 drivers/gpu/drm/nouveau/nv50_display.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 84b9bb43d93a..fcec2aba9ad7 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -1119,9 +1119,13 @@ static void
 nv50_curs_prepare(struct nv50_wndw *wndw, struct nv50_head_atom *asyh,
 		  struct nv50_wndw_atom *asyw)
 {
-	asyh->curs.handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle;
-	asyh->curs.offset = asyw->image.offset;
-	asyh->set.curs = asyh->curs.visible;
+	u32 handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle;
+	u32 offset = asyw->image.offset;
+	if (asyh->curs.handle != handle || asyh->curs.offset != offset) {
+		asyh->curs.handle = handle;
+		asyh->curs.offset = offset;
+		asyh->set.curs = asyh->curs.visible;
+	}
 }
 
 static void
-- 
cgit v1.2.3


From 80a92865f2113fe8bc268218b0c4156cfab4c9ac Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 28 Mar 2017 11:46:37 +1000
Subject: drm/nouveau/fb/ram/gf100-: remove 0x10f200 read

This reg has moved on Pascal, and causes a bus fault.

We never use the value anyway, so just remove the read.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
index c63975907c90..4a9bd4f1cb93 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
@@ -638,7 +638,6 @@ gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
 			return ret;
 	}
 
-	ram->ranks = (nvkm_rd32(device, 0x10f200) & 0x00000004) ? 2 : 1;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 89ed10a5721d569b053a8355c1a6bd812fed7c29 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 4 May 2017 17:29:03 +1000
Subject: drm/nouveau/core: fix static checker warning

object->engine cannot be NULL, it's either valid, or an error pointer.

This particular condition shouldn't actually be possible, but just in
case, we'll keep it.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/core/object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c
index 89d2e9da11c7..acd76fd4f6d8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/object.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c
@@ -295,7 +295,7 @@ nvkm_object_ctor(const struct nvkm_object_func *func,
 	INIT_LIST_HEAD(&object->head);
 	INIT_LIST_HEAD(&object->tree);
 	RB_CLEAR_NODE(&object->node);
-	WARN_ON(oclass->engine && !object->engine);
+	WARN_ON(IS_ERR(object->engine));
 }
 
 int
-- 
cgit v1.2.3


From 3733bd8b407211739e72d051e5f30ad82a52c4bc Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 11 May 2017 16:53:42 +1000
Subject: drm/nouveau/tmr: ack interrupt before processing alarms

Fixes a race where we can miss an alarm that triggers while we're already
processing previous alarms.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c
index 7b9ce87f0617..7f48249f41de 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c
@@ -76,8 +76,8 @@ nv04_timer_intr(struct nvkm_timer *tmr)
 	u32 stat = nvkm_rd32(device, NV04_PTIMER_INTR_0);
 
 	if (stat & 0x00000001) {
-		nvkm_timer_alarm_trigger(tmr);
 		nvkm_wr32(device, NV04_PTIMER_INTR_0, 0x00000001);
+		nvkm_timer_alarm_trigger(tmr);
 		stat &= ~0x00000001;
 	}
 
-- 
cgit v1.2.3


From 1b0f84380b10ee97f7d2dd191294de9017e94d1d Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 11 May 2017 17:19:48 +1000
Subject: drm/nouveau/tmr: handle races with hw when updating the next alarm
 time

If the time to the next alarm is short enough, we could race with HW and
end up with an ~4 second delay until it triggers.

Fix this by checking again after we update HW.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c | 26 +++++++++++++++---------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
index 07dc82bfe346..4e696627fdcd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
@@ -36,23 +36,29 @@ nvkm_timer_alarm_trigger(struct nvkm_timer *tmr)
 	unsigned long flags;
 	LIST_HEAD(exec);
 
-	/* move any due alarms off the pending list */
+	/* Process pending alarms. */
 	spin_lock_irqsave(&tmr->lock, flags);
 	list_for_each_entry_safe(alarm, atemp, &tmr->alarms, head) {
-		if (alarm->timestamp <= nvkm_timer_read(tmr))
-			list_move_tail(&alarm->head, &exec);
+		/* Have we hit the earliest alarm that hasn't gone off? */
+		if (alarm->timestamp > nvkm_timer_read(tmr)) {
+			/* Schedule it.  If we didn't race, we're done. */
+			tmr->func->alarm_init(tmr, alarm->timestamp);
+			if (alarm->timestamp > nvkm_timer_read(tmr))
+				break;
+		}
+
+		/* Move to completed list.  We'll drop the lock before
+		 * executing the callback so it can reschedule itself.
+		 */
+		list_move_tail(&alarm->head, &exec);
 	}
 
-	/* reschedule interrupt for next alarm time */
-	if (!list_empty(&tmr->alarms)) {
-		alarm = list_first_entry(&tmr->alarms, typeof(*alarm), head);
-		tmr->func->alarm_init(tmr, alarm->timestamp);
-	} else {
+	/* Shut down interrupt if no more pending alarms. */
+	if (list_empty(&tmr->alarms))
 		tmr->func->alarm_fini(tmr);
-	}
 	spin_unlock_irqrestore(&tmr->lock, flags);
 
-	/* execute any pending alarm handlers */
+	/* Execute completed callbacks. */
 	list_for_each_entry_safe(alarm, atemp, &exec, head) {
 		list_del_init(&alarm->head);
 		alarm->func(alarm);
-- 
cgit v1.2.3


From 9fc64667ee48c9a25e7dca1a6bcb6906fec5bcc5 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 11 May 2017 17:03:05 +1000
Subject: drm/nouveau/tmr: fix corruption of the pending list when rescheduling
 an alarm

At least therm/fantog "attempts" to work around this issue, which could
lead to corruption of the pending alarm list.

Fix it properly by not updating the timestamp without the lock held, or
trying to add an already pending alarm to the pending alarm list....

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
index 4e696627fdcd..d898787d514c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
@@ -71,14 +71,17 @@ nvkm_timer_alarm(struct nvkm_timer *tmr, u32 nsec, struct nvkm_alarm *alarm)
 	struct nvkm_alarm *list;
 	unsigned long flags;
 
-	alarm->timestamp = nvkm_timer_read(tmr) + nsec;
-
-	/* append new alarm to list, in soonest-alarm-first order */
+	/* Remove alarm from pending list.
+	 *
+	 * This both protects against the corruption of the list,
+	 * and implements alarm rescheduling/cancellation.
+	 */
 	spin_lock_irqsave(&tmr->lock, flags);
-	if (!nsec) {
-		if (!list_empty(&alarm->head))
-			list_del(&alarm->head);
-	} else {
+	list_del_init(&alarm->head);
+
+	if (nsec) {
+		/* Insert into pending list, ordered earliest to latest. */
+		alarm->timestamp = nvkm_timer_read(tmr) + nsec;
 		list_for_each_entry(list, &tmr->alarms, head) {
 			if (list->timestamp > alarm->timestamp)
 				break;
-- 
cgit v1.2.3


From 330bdf62fe6a6c5b99a647f7bf7157107c9348b3 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 11 May 2017 17:13:29 +1000
Subject: drm/nouveau/tmr: avoid processing completed alarms when adding a new
 one

The idea here was to avoid having to "manually" program the HW if there's
a new earliest alarm.  This was lazy and bad, as it leads to loads of fun
races between inter-related callers (ie. therm).

Turns out, it's not so difficult after all.  Go figure ;)

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
index d898787d514c..f2a86eae0a0d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
@@ -86,12 +86,22 @@ nvkm_timer_alarm(struct nvkm_timer *tmr, u32 nsec, struct nvkm_alarm *alarm)
 			if (list->timestamp > alarm->timestamp)
 				break;
 		}
+
 		list_add_tail(&alarm->head, &list->head);
+
+		/* Update HW if this is now the earliest alarm. */
+		list = list_first_entry(&tmr->alarms, typeof(*list), head);
+		if (list == alarm) {
+			tmr->func->alarm_init(tmr, alarm->timestamp);
+			/* This shouldn't happen if callers aren't stupid.
+			 *
+			 * Worst case scenario is that it'll take roughly
+			 * 4 seconds for the next alarm to trigger.
+			 */
+			WARN_ON(alarm->timestamp <= nvkm_timer_read(tmr));
+		}
 	}
 	spin_unlock_irqrestore(&tmr->lock, flags);
-
-	/* process pending alarms */
-	nvkm_timer_alarm_trigger(tmr);
 }
 
 void
-- 
cgit v1.2.3


From e4311ee51d1e2676001b2d8fcefd92bdd79aad85 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 11 May 2017 17:33:39 +1000
Subject: drm/nouveau/therm: remove ineffective workarounds for alarm bugs

These were ineffective due to touching the list without the alarm lock,
but should no longer be required.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c   | 2 +-
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c    | 2 +-
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c | 2 +-
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
index df949fa7d05d..be691a7b972f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
@@ -146,7 +146,7 @@ nvkm_therm_update(struct nvkm_therm *therm, int mode)
 		poll = false;
 	}
 
-	if (list_empty(&therm->alarm.head) && poll)
+	if (poll)
 		nvkm_timer_alarm(tmr, 1000000000ULL, &therm->alarm);
 	spin_unlock_irqrestore(&therm->lock, flags);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c
index 91198d79393a..e2feccec25f5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c
@@ -83,7 +83,7 @@ nvkm_fan_update(struct nvkm_fan *fan, bool immediate, int target)
 	spin_unlock_irqrestore(&fan->lock, flags);
 
 	/* schedule next fan update, if not at target speed already */
-	if (list_empty(&fan->alarm.head) && target != duty) {
+	if (target != duty) {
 		u16 bump_period = fan->bios.bump_period;
 		u16 slow_down_period = fan->bios.slow_down_period;
 		u64 delay;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c
index 59701b7a6597..ff9fbe7950e5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c
@@ -53,7 +53,7 @@ nvkm_fantog_update(struct nvkm_fantog *fan, int percent)
 	duty = !nvkm_gpio_get(gpio, 0, DCB_GPIO_FAN, 0xff);
 	nvkm_gpio_set(gpio, 0, DCB_GPIO_FAN, 0xff, duty);
 
-	if (list_empty(&fan->alarm.head) && percent != (duty * 100)) {
+	if (percent != (duty * 100)) {
 		u64 next_change = (percent * fan->period_us) / 100;
 		if (!duty)
 			next_change = fan->period_us - next_change;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c
index b9703c02d8ca..9a79e91fdfdc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c
@@ -185,7 +185,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm)
 	spin_unlock_irqrestore(&therm->sensor.alarm_program_lock, flags);
 
 	/* schedule the next poll in one second */
-	if (therm->func->temp_get(therm) >= 0 && list_empty(&alarm->head))
+	if (therm->func->temp_get(therm) >= 0)
 		nvkm_timer_alarm(tmr, 1000000000ULL, alarm);
 }
 
-- 
cgit v1.2.3


From 3af9256150b3ecc597c5428711f87ce105d1d58e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 11 May 2017 10:14:06 -0700
Subject: Input: cros_ec_keyb - remove extraneous 'const'

gcc-7 warns about 'const SIMPLE_DEV_PM_OPS', as that macro already contains
a 'const' keyword:

drivers/input/keyboard/cros_ec_keyb.c:663:14: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier]
 static const SIMPLE_DEV_PM_OPS(cros_ec_keyb_pm_ops, NULL, cros_ec_keyb_resume);

This removes the extra one.

Fixes: 6af6dc2d2aa6 ("input: Add ChromeOS EC keyboard driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/cros_ec_keyb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index c7a8120b13c0..79eb29550c34 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -660,7 +660,7 @@ static const struct of_device_id cros_ec_keyb_of_match[] = {
 MODULE_DEVICE_TABLE(of, cros_ec_keyb_of_match);
 #endif
 
-static const SIMPLE_DEV_PM_OPS(cros_ec_keyb_pm_ops, NULL, cros_ec_keyb_resume);
+static SIMPLE_DEV_PM_OPS(cros_ec_keyb_pm_ops, NULL, cros_ec_keyb_resume);
 
 static struct platform_driver cros_ec_keyb_driver = {
 	.probe = cros_ec_keyb_probe,
-- 
cgit v1.2.3


From 7b8cd3363e8a0e6b90a7067f75aaeaae61a7d612 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Sun, 7 May 2017 20:12:52 +0300
Subject: drm/i915: Make vblank evade warnings optional
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new Kconfig option to enable/disable the extra warnings
from the vblank evade code. For now we'll keep the warning
about an actually missed vblank always enabled as that can have
an actual user visible impact. But if we miss the deadline
othrwise there's no real need to bother the user with that.
We'll want these warnings enabled during development however
so that we can catch regressions.

Based on the reports it looks like this is still very easy
to hit on SKL, so we have more work ahead of us to optimize
the crtiical section further.

Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reported-by: Jens Axboe <axboe@kernel.dk>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: e1edbd44e23b ("drm/i915: Complain if we take too long under vblank evasion.")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/i915/Kconfig.debug  | 13 +++++++++++++
 drivers/gpu/drm/i915/intel_sprite.c |  7 +++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index e091809a9a9e..b00edd3b8800 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -87,3 +87,16 @@ config DRM_I915_LOW_LEVEL_TRACEPOINTS
           and also analyze the request dependency resolving timeline.
 
           If in doubt, say "N".
+
+config DRM_I915_DEBUG_VBLANK_EVADE
+	bool "Enable extra debug warnings for vblank evasion"
+	depends on DRM_I915
+	default n
+	help
+	  Choose this option to turn on extra debug warnings for the
+	  vblank evade mechanism. This gives a warning every time the
+	  the deadline allotted for the vblank evade critical section
+	  is exceeded, even if there isn't an actual risk of missing
+	  the vblank.
+
+	  If in doubt, say "N".
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index f7d431427115..8c87c717c7cd 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -198,12 +198,15 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work
 			  ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time),
 			  crtc->debug.min_vbl, crtc->debug.max_vbl,
 			  crtc->debug.scanline_start, scanline_end);
-	} else if (ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time) >
-		   VBLANK_EVASION_TIME_US)
+	}
+#ifdef CONFIG_DRM_I915_DEBUG_VBLANK_EVADE
+	else if (ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time) >
+		 VBLANK_EVASION_TIME_US)
 		DRM_WARN("Atomic update on pipe (%c) took %lld us, max time under evasion is %u us\n",
 			 pipe_name(pipe),
 			 ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time),
 			 VBLANK_EVASION_TIME_US);
+#endif
 }
 
 static void
-- 
cgit v1.2.3


From 31cbee6a5611f07d2d66f55bb6f8648db5947e32 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 11 May 2017 11:14:45 +0200
Subject: sound: Disable the build of OSS drivers

OSS drivers are left as badly unmaintained, and now we're facing a
problem to clean up the hackish set_fs() usage in their codes.  Since
most of drivers have been covered by ALSA, and the others are dead old
and inactive, let's leave them RIP.

This patch is the first step: disable the build of OSS drivers.
We'll eventually drop the whole codes and clean up later.

Note that sound/oss/dmasound is still kept, since it's a completely
different implementation of OSS, and it doesn't suffer from set_fs()
hack.  Moreover, the build of ALSA is disabled on M68K by some reason,
thus disabling it shall result in a regression.  This one will be
disabled / removed once when we add the support in ALSA side.

Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/Kconfig b/sound/Kconfig
index ee2e69a9ecd1..6a215a8c0490 100644
--- a/sound/Kconfig
+++ b/sound/Kconfig
@@ -115,6 +115,7 @@ endif # SND
 menuconfig SOUND_PRIME
 	tristate "Open Sound System (DEPRECATED)"
 	select SOUND_OSS_CORE
+	depends on BROKEN
 	help
 	  Say 'Y' or 'M' to enable Open Sound System drivers.
 
-- 
cgit v1.2.3


From 18365225f0440d09708ad9daade2ec11275c3df9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 12 May 2017 15:46:26 -0700
Subject: hwpoison, memcg: forcibly uncharge LRU pages

Laurent Dufour has noticed that hwpoinsoned pages are kept charged.  In
his particular case he has hit a bad_page("page still charged to
cgroup") when onlining a hwpoison page.  While this looks like something
that shouldn't happen in the first place because onlining hwpages and
returning them to the page allocator makes only little sense it shows a
real problem.

hwpoison pages do not get freed usually so we do not uncharge them (at
least not since commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge
API")).  Each charge pins memcg (since e8ea14cc6ead ("mm: memcontrol:
take a css reference for each charged page")) as well and so the
mem_cgroup and the associated state will never go away.  Fix this leak
by forcibly uncharging a LRU hwpoisoned page in delete_from_lru_cache().
We also have to tweak uncharge_list because it cannot rely on zero ref
count for these pages.

[akpm@linux-foundation.org: coding-style fixes]
Fixes: 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API")
Link: http://lkml.kernel.org/r/20170502185507.GB19165@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Tested-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c     | 2 +-
 mm/memory-failure.c | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ff73899af61a..94172089f52f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5528,7 +5528,7 @@ static void uncharge_list(struct list_head *page_list)
 		next = page->lru.next;
 
 		VM_BUG_ON_PAGE(PageLRU(page), page);
-		VM_BUG_ON_PAGE(page_count(page), page);
+		VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
 
 		if (!page->mem_cgroup)
 			continue;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 73066b80d14a..2527dfeddb00 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -539,6 +539,13 @@ static int delete_from_lru_cache(struct page *p)
 		 */
 		ClearPageActive(p);
 		ClearPageUnevictable(p);
+
+		/*
+		 * Poisoned page might never drop its ref count to 0 so we have
+		 * to uncharge it manually from its memcg.
+		 */
+		mem_cgroup_uncharge(p);
+
 		/*
 		 * drop the page count elevated by isolate_lru_page()
 		 */
-- 
cgit v1.2.3


From 572e0ca9b909339fbe017aaff1a225efb6db3b61 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Fri, 12 May 2017 15:46:29 -0700
Subject: time: delete current_fs_time()

All uses of the current_fs_time() function have been replaced by other
time interfaces.

And, its use cases can be fulfilled by current_time() or ktime_get_*
variants.

Link: http://lkml.kernel.org/r/1491613030-11599-13-git-send-email-deepa.kernel@gmail.com
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h |  1 -
 kernel/time/time.c | 14 --------------
 2 files changed, 15 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0ad325ed71e8..803e5a9b2654 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1431,7 +1431,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
 	inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
 }
 
-extern struct timespec current_fs_time(struct super_block *sb);
 extern struct timespec current_time(struct inode *inode);
 
 /*
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 6574bba44b55..49c73c6ed648 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -230,20 +230,6 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
 	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
 }
 
-/**
- * current_fs_time - Return FS time
- * @sb: Superblock.
- *
- * Return the current time truncated to the time granularity supported by
- * the fs.
- */
-struct timespec current_fs_time(struct super_block *sb)
-{
-	struct timespec now = current_kernel_time();
-	return timespec_trunc(now, sb->s_time_gran);
-}
-EXPORT_SYMBOL(current_fs_time);
-
 /*
  * Convert jiffies to milliseconds and back.
  *
-- 
cgit v1.2.3


From 8d35bb310698c69d73073b26fc581f2e3f7f621d Mon Sep 17 00:00:00 2001
From: Reza Arbab <arbab@linux.vnet.ibm.com>
Date: Fri, 12 May 2017 15:46:32 -0700
Subject: mm, vmstat: Remove spurious WARN() during zoneinfo print

After commit e2ecc8a79ed4 ("mm, vmstat: print non-populated zones in
zoneinfo"), /proc/zoneinfo will show unpopulated zones.

A memoryless node, having no populated zones at all, was previously
ignored, but will now trigger the WARN() in is_zone_first_populated().

Remove this warning, as its only purpose was to warn of a situation that
has since been enabled.

Aside: The "per-node stats" are still printed under the first populated
zone, but that's not necessarily the first stanza any more.  I'm not
sure which criteria is more important with regard to not breaking
parsers, but it looks a little weird to the eye.

Fixes:  e2ecc8a79ed4 ("mm, vmstat: print node-based stats in zoneinfo file")
Link: http://lkml.kernel.org/r/1493854905-10918-1-git-send-email-arbab@linux.vnet.ibm.com
Signed-off-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmstat.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mm/vmstat.c b/mm/vmstat.c
index f5fa1bd1eb16..76f73670200a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1359,8 +1359,6 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
 			return zone == compare;
 	}
 
-	/* The zone must be somewhere! */
-	WARN_ON_ONCE(1);
 	return false;
 }
 
-- 
cgit v1.2.3


From 05384213436ab690c46d9dfec706b80ef8d671ab Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Fri, 12 May 2017 15:46:35 -0700
Subject: gcov: support GCC 7.1

Starting from GCC 7.1, __gcov_exit is a new symbol expected to be
implemented in a profiling runtime.

[akpm@linux-foundation.org: coding-style fixes]
[mliska@suse.cz: v2]
  Link: http://lkml.kernel.org/r/e63a3c59-0149-c97e-4084-20ca8f146b26@suse.cz
Link: http://lkml.kernel.org/r/8c4084fa-3885-29fe-5fc4-0d4ca199c785@suse.cz
Signed-off-by: Martin Liska <mliska@suse.cz>
Acked-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/gcov/base.c    | 6 ++++++
 kernel/gcov/gcc_4_7.c | 4 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index 2f9df37940a0..c51a49c9be70 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c
@@ -98,6 +98,12 @@ void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters)
 }
 EXPORT_SYMBOL(__gcov_merge_icall_topn);
 
+void __gcov_exit(void)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_exit);
+
 /**
  * gcov_enable_events - enable event reporting through gcov_event()
  *
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index 6a5c239c7669..46a18e72bce6 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
 #include <linux/vmalloc.h>
 #include "gcov.h"
 
-#if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
+#if (__GNUC__ >= 7)
+#define GCOV_COUNTERS			9
+#elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
 #define GCOV_COUNTERS			10
 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
 #define GCOV_COUNTERS			9
-- 
cgit v1.2.3


From 835152a25949dd5561819c2e3fcc4b2dd27f3e1e Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj38.park@gmail.com>
Date: Fri, 12 May 2017 15:46:38 -0700
Subject: mm/khugepaged: add missed tracepoint for collapse_huge_page_swapin

One return case of `__collapse_huge_page_swapin()` does not invoke
tracepoint while every other return case does.  This commit adds a
tracepoint invocation for the case.

Link: http://lkml.kernel.org/r/20170507101813.30187-1-sj38.park@gmail.com
Signed-off-by: SeongJae Park <sj38.park@gmail.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 7cb9c88bb4a3..d1943e54ba95 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -907,8 +907,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 				return false;
 			}
 			/* check if the pmd is still valid */
-			if (mm_find_pmd(mm, address) != pmd)
+			if (mm_find_pmd(mm, address) != pmd) {
+				trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
 				return false;
+			}
 		}
 		if (ret & VM_FAULT_ERROR) {
 			trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
-- 
cgit v1.2.3


From 8594a21cf7a8318baedbedc3fcd2967a17ddeec0 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 12 May 2017 15:46:41 -0700
Subject: mm, vmalloc: fix vmalloc users tracking properly

Commit 1f5307b1e094 ("mm, vmalloc: properly track vmalloc users") has
pulled asm/pgtable.h include dependency to linux/vmalloc.h and that
turned out to be a bad idea for some architectures.  E.g.  m68k fails
with

   In file included from arch/m68k/include/asm/pgtable_mm.h:145:0,
                    from arch/m68k/include/asm/pgtable.h:4,
                    from include/linux/vmalloc.h:9,
                    from arch/m68k/kernel/module.c:9:
   arch/m68k/include/asm/mcf_pgtable.h: In function 'nocache_page':
>> arch/m68k/include/asm/mcf_pgtable.h:339:43: error: 'init_mm' undeclared (first use in this function)
    #define pgd_offset_k(address) pgd_offset(&init_mm, address)

as spotted by kernel build bot. nios2 fails for other reason

  In file included from include/asm-generic/io.h:767:0,
                   from arch/nios2/include/asm/io.h:61,
                   from include/linux/io.h:25,
                   from arch/nios2/include/asm/pgtable.h:18,
                   from include/linux/mm.h:70,
                   from include/linux/pid_namespace.h:6,
                   from include/linux/ptrace.h:9,
                   from arch/nios2/include/uapi/asm/elf.h:23,
                   from arch/nios2/include/asm/elf.h:22,
                   from include/linux/elf.h:4,
                   from include/linux/module.h:15,
                   from init/main.c:16:
  include/linux/vmalloc.h: In function '__vmalloc_node_flags':
  include/linux/vmalloc.h:99:40: error: 'PAGE_KERNEL' undeclared (first use in this function); did you mean 'GFP_KERNEL'?

which is due to the newly added #include <asm/pgtable.h>, which on nios2
includes <linux/io.h> and thus <asm/io.h> and <asm-generic/io.h> which
again includes <linux/vmalloc.h>.

Tweaking that around just turns out a bigger headache than necessary.
This patch reverts 1f5307b1e094 and reimplements the original fix in a
different way.  __vmalloc_node_flags can stay static inline which will
cover vmalloc* functions.  We only have one external user
(kvmalloc_node) and we can export __vmalloc_node_flags_caller and
provide the caller directly.  This is much simpler and it doesn't really
need any games with header files.

[akpm@linux-foundation.org: coding-style fixes]
[mhocko@kernel.org: revert old comment]
  Link: http://lkml.kernel.org/r/20170509211054.GB16325@dhcp22.suse.cz
Fixes: 1f5307b1e094 ("mm, vmalloc: properly track vmalloc users")
Link: http://lkml.kernel.org/r/20170509153702.GR6481@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Tobias Klauser <tklauser@distanz.ch>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h | 21 ++++++---------------
 mm/util.c               |  3 ++-
 mm/vmalloc.c            | 19 ++++++++++++++++++-
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 0328ce003992..2d92dd002abd 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -6,7 +6,6 @@
 #include <linux/list.h>
 #include <linux/llist.h>
 #include <asm/page.h>		/* pgprot_t */
-#include <asm/pgtable.h>	/* PAGE_KERNEL */
 #include <linux/rbtree.h>
 
 struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
@@ -83,22 +82,14 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
 			const void *caller);
 #ifndef CONFIG_MMU
 extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
-#else
-extern void *__vmalloc_node(unsigned long size, unsigned long align,
-			    gfp_t gfp_mask, pgprot_t prot,
-			    int node, const void *caller);
-
-/*
- * We really want to have this inlined due to caller tracking. This
- * function is used by the highlevel vmalloc apis and so we want to track
- * their callers and inlining will achieve that.
- */
-static inline void *__vmalloc_node_flags(unsigned long size,
-					int node, gfp_t flags)
+static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
+						gfp_t flags, void *caller)
 {
-	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
-					node, __builtin_return_address(0));
+	return __vmalloc_node_flags(size, node, flags);
 }
+#else
+extern void *__vmalloc_node_flags_caller(unsigned long size,
+					 int node, gfp_t flags, void *caller);
 #endif
 
 extern void vfree(const void *addr);
diff --git a/mm/util.c b/mm/util.c
index 718154debc87..464df3489903 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -382,7 +382,8 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 	if (ret || size <= PAGE_SIZE)
 		return ret;
 
-	return __vmalloc_node_flags(size, node, flags);
+	return __vmalloc_node_flags_caller(size, node, flags,
+			__builtin_return_address(0));
 }
 EXPORT_SYMBOL(kvmalloc_node);
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 194c22eccb9d..34a1c3e46ed7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1649,6 +1649,9 @@ void *vmap(struct page **pages, unsigned int count,
 }
 EXPORT_SYMBOL(vmap);
 
+static void *__vmalloc_node(unsigned long size, unsigned long align,
+			    gfp_t gfp_mask, pgprot_t prot,
+			    int node, const void *caller);
 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 				 pgprot_t prot, int node)
 {
@@ -1791,7 +1794,7 @@ fail:
  *	with mm people.
  *
  */
-void *__vmalloc_node(unsigned long size, unsigned long align,
+static void *__vmalloc_node(unsigned long size, unsigned long align,
 			    gfp_t gfp_mask, pgprot_t prot,
 			    int node, const void *caller)
 {
@@ -1806,6 +1809,20 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 }
 EXPORT_SYMBOL(__vmalloc);
 
+static inline void *__vmalloc_node_flags(unsigned long size,
+					int node, gfp_t flags)
+{
+	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
+					node, __builtin_return_address(0));
+}
+
+
+void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
+				  void *caller)
+{
+	return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller);
+}
+
 /**
  *	vmalloc  -  allocate virtually contiguous memory
  *	@size:		allocation size
-- 
cgit v1.2.3


From cea582247a0aea78b4674c32833c10b8820fcdbe Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 12 May 2017 15:46:44 -0700
Subject: Tigran has moved

Cc: Tigran Aivazian <aivazian.tigran@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/bfs.txt     | 2 +-
 MAINTAINERS                           | 2 +-
 arch/x86/kernel/cpu/microcode/amd.c   | 2 +-
 arch/x86/kernel/cpu/microcode/core.c  | 2 +-
 arch/x86/kernel/cpu/microcode/intel.c | 2 +-
 fs/bfs/inode.c                        | 4 ++--
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/bfs.txt b/Documentation/filesystems/bfs.txt
index 78043d5a8fc3..843ce91a2e40 100644
--- a/Documentation/filesystems/bfs.txt
+++ b/Documentation/filesystems/bfs.txt
@@ -54,4 +54,4 @@ The first 4 bytes should be 0x1badface.
 If you have any patches, questions or suggestions regarding this BFS
 implementation please contact the author:
 
-Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+Tigran Aivazian <aivazian.tigran@gmail.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index 6b36037cbe8f..f7d568b8f133 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2483,7 +2483,7 @@ S:	Maintained
 F:	drivers/net/ethernet/ec_bhf.c
 
 BFS FILE SYSTEM
-M:	"Tigran A. Aivazian" <tigran@aivazian.fsnet.co.uk>
+M:	"Tigran A. Aivazian" <aivazian.tigran@gmail.com>
 S:	Maintained
 F:	Documentation/filesystems/bfs.txt
 F:	fs/bfs/
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 1d38e53c2d5c..45db4d2ebd01 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -10,7 +10,7 @@
  *  Author: Peter Oruba <peter.oruba@amd.com>
  *
  *  Based on work by:
- *  Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ *  Tigran Aivazian <aivazian.tigran@gmail.com>
  *
  *  early loader:
  *  Copyright (C) 2013 Advanced Micro Devices, Inc.
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index b4a4cd39b358..e53d3c909840 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -1,7 +1,7 @@
 /*
  * CPU Microcode Update Driver for Linux
  *
- * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
  *	      2006	Shaohua Li <shaohua.li@intel.com>
  *	      2013-2016	Borislav Petkov <bp@alien8.de>
  *
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 8325d8a09ab0..afdfd237b59f 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -1,7 +1,7 @@
 /*
  * Intel CPU Microcode Update Driver for Linux
  *
- * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
  *		 2006 Shaohua Li <shaohua.li@intel.com>
  *
  * Intel CPU microcode early update for Linux
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index f2deec0a62f0..25e312cb6071 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -1,7 +1,7 @@
 /*
  *	fs/bfs/inode.c
  *	BFS superblock and inode operations.
- *	Copyright (C) 1999-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ *	Copyright (C) 1999-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
  *	From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
  *
  *      Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
@@ -19,7 +19,7 @@
 #include <linux/uaccess.h>
 #include "bfs.h"
 
-MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
+MODULE_AUTHOR("Tigran Aivazian <aivazian.tigran@gmail.com>");
 MODULE_DESCRIPTION("SCO UnixWare BFS filesystem for Linux");
 MODULE_LICENSE("GPL");
 
-- 
cgit v1.2.3


From 4636e70bb0a8b871998b6841a2e4b205cf2bc863 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Fri, 12 May 2017 15:46:47 -0700
Subject: dax: prevent invalidation of mapped DAX entries

Patch series "mm,dax: Fix data corruption due to mmap inconsistency",
v4.

This series fixes data corruption that can happen for DAX mounts when
page faults race with write(2) and as a result page tables get out of
sync with block mappings in the filesystem and thus data seen through
mmap is different from data seen through read(2).

The series passes testing with t_mmap_stale test program from Ross and
also other mmap related tests on DAX filesystem.

This patch (of 4):

dax_invalidate_mapping_entry() currently removes DAX exceptional entries
only if they are clean and unlocked.  This is done via:

  invalidate_mapping_pages()
    invalidate_exceptional_entry()
      dax_invalidate_mapping_entry()

However, for page cache pages removed in invalidate_mapping_pages()
there is an additional criteria which is that the page must not be
mapped.  This is noted in the comments above invalidate_mapping_pages()
and is checked in invalidate_inode_page().

For DAX entries this means that we can can end up in a situation where a
DAX exceptional entry, either a huge zero page or a regular DAX entry,
could end up mapped but without an associated radix tree entry.  This is
inconsistent with the rest of the DAX code and with what happens in the
page cache case.

We aren't able to unmap the DAX exceptional entry because according to
its comments invalidate_mapping_pages() isn't allowed to block, and
unmap_mapping_range() takes a write lock on the mapping->i_mmap_rwsem.

Since we essentially never have unmapped DAX entries to evict from the
radix tree, just remove dax_invalidate_mapping_entry().

Fixes: c6dcf52c23d2 ("mm: Invalidate DAX radix tree entries only if appropriate")
Link: http://lkml.kernel.org/r/20170510085419.27601-2-jack@suse.cz
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reported-by: Jan Kara <jack@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>    [4.10+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c            | 29 -----------------------------
 include/linux/dax.h |  1 -
 mm/truncate.c       |  9 +++------
 3 files changed, 3 insertions(+), 36 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 66d79067eedf..38deebb8c86e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -460,35 +460,6 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 	return ret;
 }
 
-/*
- * Invalidate exceptional DAX entry if easily possible. This handles DAX
- * entries for invalidate_inode_pages() so we evict the entry only if we can
- * do so without blocking.
- */
-int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index)
-{
-	int ret = 0;
-	void *entry, **slot;
-	struct radix_tree_root *page_tree = &mapping->page_tree;
-
-	spin_lock_irq(&mapping->tree_lock);
-	entry = __radix_tree_lookup(page_tree, index, NULL, &slot);
-	if (!entry || !radix_tree_exceptional_entry(entry) ||
-	    slot_locked(mapping, slot))
-		goto out;
-	if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
-	    radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
-		goto out;
-	radix_tree_delete(page_tree, index);
-	mapping->nrexceptional--;
-	ret = 1;
-out:
-	spin_unlock_irq(&mapping->tree_lock);
-	if (ret)
-		dax_wake_mapping_entry_waiter(mapping, index, entry, true);
-	return ret;
-}
-
 /*
  * Invalidate exceptional DAX entry if it is clean.
  */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index d3158e74a59e..d1236d16ef00 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -63,7 +63,6 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
 		    const struct iomap_ops *ops);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
-int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
 				      pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
diff --git a/mm/truncate.c b/mm/truncate.c
index 83a059e8cd1d..706cff171a15 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -67,17 +67,14 @@ static void truncate_exceptional_entry(struct address_space *mapping,
 
 /*
  * Invalidate exceptional entry if easily possible. This handles exceptional
- * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
- * clean entries.
+ * entries for invalidate_inode_pages().
  */
 static int invalidate_exceptional_entry(struct address_space *mapping,
 					pgoff_t index, void *entry)
 {
-	/* Handled by shmem itself */
-	if (shmem_mapping(mapping))
+	/* Handled by shmem itself, or for DAX we do nothing. */
+	if (shmem_mapping(mapping) || dax_mapping(mapping))
 		return 1;
-	if (dax_mapping(mapping))
-		return dax_invalidate_mapping_entry(mapping, index);
 	clear_shadow_entry(mapping, index, entry);
 	return 1;
 }
-- 
cgit v1.2.3


From cd656375f94632d7b5af57bf67b7b5c0270c591c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 12 May 2017 15:46:50 -0700
Subject: mm: fix data corruption due to stale mmap reads

Currently, we didn't invalidate page tables during invalidate_inode_pages2()
for DAX.  That could result in e.g. 2MiB zero page being mapped into
page tables while there were already underlying blocks allocated and
thus data seen through mmap were different from data seen by read(2).
The following sequence reproduces the problem:

 - open an mmap over a 2MiB hole

 - read from a 2MiB hole, faulting in a 2MiB zero page

 - write to the hole with write(3p). The write succeeds but we
   incorrectly leave the 2MiB zero page mapping intact.

 - via the mmap, read the data that was just written. Since the zero
   page mapping is still intact we read back zeroes instead of the new
   data.

Fix the problem by unconditionally calling invalidate_inode_pages2_range()
in dax_iomap_actor() for new block allocations and by properly
invalidating page tables in invalidate_inode_pages2_range() for DAX
mappings.

Fixes: c6dcf52c23d2d3fb5235cec42d7dd3f786b87d55
Link: http://lkml.kernel.org/r/20170510085419.27601-3-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c      |  2 +-
 mm/truncate.c | 12 +++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 38deebb8c86e..123d9903c77d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1015,7 +1015,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	 * into page tables. We have to tear down these mappings so that data
 	 * written by write(2) is visible in mmap.
 	 */
-	if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) {
+	if (iomap->flags & IOMAP_F_NEW) {
 		invalidate_inode_pages2_range(inode->i_mapping,
 					      pos >> PAGE_SHIFT,
 					      (end - 1) >> PAGE_SHIFT);
diff --git a/mm/truncate.c b/mm/truncate.c
index 706cff171a15..6479ed2afc53 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -686,7 +686,17 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 		cond_resched();
 		index++;
 	}
-
+	/*
+	 * For DAX we invalidate page tables after invalidating radix tree.  We
+	 * could invalidate page tables while invalidating each entry however
+	 * that would be expensive. And doing range unmapping before doesn't
+	 * work as we have no cheap way to find whether radix tree entry didn't
+	 * get remapped later.
+	 */
+	if (dax_mapping(mapping)) {
+		unmap_mapping_range(mapping, (loff_t)start << PAGE_SHIFT,
+				    (loff_t)(end - start + 1) << PAGE_SHIFT, 0);
+	}
 out:
 	cleancache_invalidate_inode(mapping);
 	return ret;
-- 
cgit v1.2.3


From fb26a1cbed8c90025572d48bc9eabe59f7571e88 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 12 May 2017 15:46:54 -0700
Subject: ext4: return to starting transaction in ext4_dax_huge_fault()

DAX will return to locking exceptional entry before mapping blocks for a
page fault to fix possible races with concurrent writes.  To avoid lock
inversion between exceptional entry lock and transaction start, start
the transaction already in ext4_dax_huge_fault().

Fixes: 9f141d6ef6258a3a37a045842d9ba7e68f368956
Link: http://lkml.kernel.org/r/20170510085419.27601-4-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/file.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index cefa9835f275..831fd6beebf0 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -257,6 +257,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
 		enum page_entry_size pe_size)
 {
 	int result;
+	handle_t *handle = NULL;
 	struct inode *inode = file_inode(vmf->vma->vm_file);
 	struct super_block *sb = inode->i_sb;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
@@ -264,12 +265,24 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
 	if (write) {
 		sb_start_pagefault(sb);
 		file_update_time(vmf->vma->vm_file);
+		down_read(&EXT4_I(inode)->i_mmap_sem);
+		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+					       EXT4_DATA_TRANS_BLOCKS(sb));
+	} else {
+		down_read(&EXT4_I(inode)->i_mmap_sem);
 	}
-	down_read(&EXT4_I(inode)->i_mmap_sem);
-	result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
-	up_read(&EXT4_I(inode)->i_mmap_sem);
-	if (write)
+	if (!IS_ERR(handle))
+		result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
+	else
+		result = VM_FAULT_SIGBUS;
+	if (write) {
+		if (!IS_ERR(handle))
+			ext4_journal_stop(handle);
+		up_read(&EXT4_I(inode)->i_mmap_sem);
 		sb_end_pagefault(sb);
+	} else {
+		up_read(&EXT4_I(inode)->i_mmap_sem);
+	}
 
 	return result;
 }
-- 
cgit v1.2.3


From 13e451fdc1af05568ea379d71c02a126295d2244 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 12 May 2017 15:46:57 -0700
Subject: dax: fix data corruption when fault races with write

Currently DAX read fault can race with write(2) in the following way:

CPU1 - write(2)			CPU2 - read fault
				dax_iomap_pte_fault()
				  ->iomap_begin() - sees hole
dax_iomap_rw()
  iomap_apply()
    ->iomap_begin - allocates blocks
    dax_iomap_actor()
      invalidate_inode_pages2_range()
        - there's nothing to invalidate
				  grab_mapping_entry()
				  - we add zero page in the radix tree
				    and map it to page tables

The result is that hole page is mapped into page tables (and thus zeros
are seen in mmap) while file has data written in that place.

Fix the problem by locking exception entry before mapping blocks for the
fault.  That way we are sure invalidate_inode_pages2_range() call for
racing write will either block on entry lock waiting for the fault to
finish (and unmap stale page tables after that) or read fault will see
already allocated blocks by write(2).

Fixes: 9f141d6ef6258a3a37a045842d9ba7e68f368956
Link: http://lkml.kernel.org/r/20170510085419.27601-5-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 123d9903c77d..32f020c9cedf 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1148,6 +1148,12 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
 		flags |= IOMAP_WRITE;
 
+	entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
+	if (IS_ERR(entry)) {
+		vmf_ret = dax_fault_return(PTR_ERR(entry));
+		goto out;
+	}
+
 	/*
 	 * Note that we don't bother to use iomap_apply here: DAX required
 	 * the file system block size to be equal the page size, which means
@@ -1156,17 +1162,11 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
 	if (error) {
 		vmf_ret = dax_fault_return(error);
-		goto out;
+		goto unlock_entry;
 	}
 	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
-		vmf_ret = dax_fault_return(-EIO);	/* fs corruption? */
-		goto finish_iomap;
-	}
-
-	entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
-	if (IS_ERR(entry)) {
-		vmf_ret = dax_fault_return(PTR_ERR(entry));
-		goto finish_iomap;
+		error = -EIO;	/* fs corruption? */
+		goto error_finish_iomap;
 	}
 
 	sector = dax_iomap_sector(&iomap, pos);
@@ -1188,13 +1188,13 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 		}
 
 		if (error)
-			goto error_unlock_entry;
+			goto error_finish_iomap;
 
 		__SetPageUptodate(vmf->cow_page);
 		vmf_ret = finish_fault(vmf);
 		if (!vmf_ret)
 			vmf_ret = VM_FAULT_DONE_COW;
-		goto unlock_entry;
+		goto finish_iomap;
 	}
 
 	switch (iomap.type) {
@@ -1214,7 +1214,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 	case IOMAP_HOLE:
 		if (!(vmf->flags & FAULT_FLAG_WRITE)) {
 			vmf_ret = dax_load_hole(mapping, &entry, vmf);
-			goto unlock_entry;
+			goto finish_iomap;
 		}
 		/*FALLTHRU*/
 	default:
@@ -1223,10 +1223,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 		break;
 	}
 
- error_unlock_entry:
+ error_finish_iomap:
 	vmf_ret = dax_fault_return(error) | major;
- unlock_entry:
-	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
  finish_iomap:
 	if (ops->iomap_end) {
 		int copied = PAGE_SIZE;
@@ -1241,7 +1239,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 		 */
 		ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
 	}
-out:
+ unlock_entry:
+	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+ out:
 	trace_dax_pte_fault_done(inode, vmf, vmf_ret);
 	return vmf_ret;
 }
-- 
cgit v1.2.3


From 876f29460cbd4086b43475890c1bf2488fa11d40 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Fri, 12 May 2017 15:47:00 -0700
Subject: dax: fix PMD data corruption when fault races with write

This is based on a patch from Jan Kara that fixed the equivalent race in
the DAX PTE fault path.

Currently DAX PMD read fault can race with write(2) in the following
way:

CPU1 - write(2)                 CPU2 - read fault
                                dax_iomap_pmd_fault()
                                  ->iomap_begin() - sees hole

dax_iomap_rw()
  iomap_apply()
    ->iomap_begin - allocates blocks
    dax_iomap_actor()
      invalidate_inode_pages2_range()
        - there's nothing to invalidate

                                  grab_mapping_entry()
				  - we add huge zero page to the radix tree
				    and map it to page tables

The result is that hole page is mapped into page tables (and thus zeros
are seen in mmap) while file has data written in that place.

Fix the problem by locking exception entry before mapping blocks for the
fault.  That way we are sure invalidate_inode_pages2_range() call for
racing write will either block on entry lock waiting for the fault to
finish (and unmap stale page tables after that) or read fault will see
already allocated blocks by write(2).

Fixes: 9f141d6ef6258 ("dax: Call ->iomap_begin without entry lock during dax fault")
Link: http://lkml.kernel.org/r/20170510172700.18991-1-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 32f020c9cedf..93ae87297ffa 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1387,6 +1387,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 	if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
 		goto fallback;
 
+	/*
+	 * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
+	 * PMD or a HZP entry.  If it can't (because a 4k page is already in
+	 * the tree, for instance), it will return -EEXIST and we just fall
+	 * back to 4k entries.
+	 */
+	entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
+	if (IS_ERR(entry))
+		goto fallback;
+
 	/*
 	 * Note that we don't use iomap_apply here.  We aren't doing I/O, only
 	 * setting up a mapping, so really we're using iomap_begin() as a way
@@ -1395,21 +1405,11 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 	pos = (loff_t)pgoff << PAGE_SHIFT;
 	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
 	if (error)
-		goto fallback;
+		goto unlock_entry;
 
 	if (iomap.offset + iomap.length < pos + PMD_SIZE)
 		goto finish_iomap;
 
-	/*
-	 * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
-	 * PMD or a HZP entry.  If it can't (because a 4k page is already in
-	 * the tree, for instance), it will return -EEXIST and we just fall
-	 * back to 4k entries.
-	 */
-	entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
-	if (IS_ERR(entry))
-		goto finish_iomap;
-
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
 		result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry);
@@ -1417,7 +1417,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
 		if (WARN_ON_ONCE(write))
-			goto unlock_entry;
+			break;
 		result = dax_pmd_load_hole(vmf, &iomap, &entry);
 		break;
 	default:
@@ -1425,8 +1425,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 		break;
 	}
 
- unlock_entry:
-	put_locked_mapping_entry(mapping, pgoff, entry);
  finish_iomap:
 	if (ops->iomap_end) {
 		int copied = PMD_SIZE;
@@ -1442,6 +1440,8 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 		ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
 				&iomap);
 	}
+ unlock_entry:
+	put_locked_mapping_entry(mapping, pgoff, entry);
  fallback:
 	if (result == VM_FAULT_FALLBACK) {
 		split_huge_pmd(vma, vmf->pmd, vmf->address);
-- 
cgit v1.2.3


From 338a16ba154959b049458d4f507d60f694550b68 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 12 May 2017 15:47:03 -0700
Subject: mm, thp: copying user pages must schedule on collapse

We have encountered need_resched warnings in __collapse_huge_page_copy()
while doing {clear,copy}_user_highpage() over HPAGE_PMD_NR source pages.

mm->mmap_sem is held for write, but the iteration is well bounded.

Reschedule as needed.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1705101426380.109808@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index d1943e54ba95..945fd1ca49b5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -612,7 +612,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 				      spinlock_t *ptl)
 {
 	pte_t *_pte;
-	for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++) {
+	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
+				_pte++, page++, address += PAGE_SIZE) {
 		pte_t pteval = *_pte;
 		struct page *src_page;
 
@@ -651,9 +652,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 			spin_unlock(ptl);
 			free_page_and_swap_cache(src_page);
 		}
-
-		address += PAGE_SIZE;
-		page++;
+		cond_resched();
 	}
 }
 
-- 
cgit v1.2.3


From 791b48b642324c4e6bfef27f552aaebead81905f Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Fri, 12 May 2017 15:47:06 -0700
Subject: mm: vmscan: scan until it finds eligible pages

Although there are a ton of free swap and anonymous LRU page in elgible
zones, OOM happened.

  balloon invoked oom-killer: gfp_mask=0x17080c0(GFP_KERNEL_ACCOUNT|__GFP_ZERO|__GFP_NOTRACK), nodemask=(null),  order=0, oom_score_adj=0
  CPU: 7 PID: 1138 Comm: balloon Not tainted 4.11.0-rc6-mm1-zram-00289-ge228d67e9677-dirty #17
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
  Call Trace:
   oom_kill_process+0x21d/0x3f0
   out_of_memory+0xd8/0x390
   __alloc_pages_slowpath+0xbc1/0xc50
   __alloc_pages_nodemask+0x1a5/0x1c0
   pte_alloc_one+0x20/0x50
   __pte_alloc+0x1e/0x110
   __handle_mm_fault+0x919/0x960
   handle_mm_fault+0x77/0x120
   __do_page_fault+0x27a/0x550
   trace_do_page_fault+0x43/0x150
   do_async_page_fault+0x2c/0x90
   async_page_fault+0x28/0x30
  Mem-Info:
  active_anon:424716 inactive_anon:65314 isolated_anon:0
   active_file:52 inactive_file:46 isolated_file:0
   unevictable:0 dirty:27 writeback:0 unstable:0
   slab_reclaimable:3967 slab_unreclaimable:4125
   mapped:133 shmem:43 pagetables:1674 bounce:0
   free:4637 free_pcp:225 free_cma:0
  Node 0 active_anon:1698864kB inactive_anon:261256kB active_file:208kB inactive_file:184kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:532kB dirty:108kB writeback:0kB shmem:172kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no
  DMA free:7316kB min:32kB low:44kB high:56kB active_anon:8064kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15992kB managed:15908kB mlocked:0kB slab_reclaimable:464kB slab_unreclaimable:40kB kernel_stack:0kB pagetables:24kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
  lowmem_reserve[]: 0 992 992 1952
  DMA32 free:9088kB min:2048kB low:3064kB high:4080kB active_anon:952176kB inactive_anon:0kB active_file:36kB inactive_file:0kB unevictable:0kB writepending:88kB present:1032192kB managed:1019388kB mlocked:0kB slab_reclaimable:13532kB slab_unreclaimable:16460kB kernel_stack:3552kB pagetables:6672kB bounce:0kB free_pcp:56kB local_pcp:24kB free_cma:0kB
  lowmem_reserve[]: 0 0 0 959
  Movable free:3644kB min:1980kB low:2960kB high:3940kB active_anon:738560kB inactive_anon:261340kB active_file:188kB inactive_file:640kB unevictable:0kB writepending:20kB present:1048444kB managed:1010816kB mlocked:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:832kB local_pcp:60kB free_cma:0kB
  lowmem_reserve[]: 0 0 0 0
  DMA: 1*4kB (E) 0*8kB 18*16kB (E) 10*32kB (E) 10*64kB (E) 9*128kB (ME) 8*256kB (E) 2*512kB (E) 2*1024kB (E) 0*2048kB 0*4096kB = 7524kB
  DMA32: 417*4kB (UMEH) 181*8kB (UMEH) 68*16kB (UMEH) 48*32kB (UMEH) 14*64kB (MH) 3*128kB (M) 1*256kB (H) 1*512kB (M) 2*1024kB (M) 0*2048kB 0*4096kB = 9836kB
  Movable: 1*4kB (M) 1*8kB (M) 1*16kB (M) 1*32kB (M) 0*64kB 1*128kB (M) 2*256kB (M) 4*512kB (M) 1*1024kB (M) 0*2048kB 0*4096kB = 3772kB
  378 total pagecache pages
  17 pages in swap cache
  Swap cache stats: add 17325, delete 17302, find 0/27
  Free swap  = 978940kB
  Total swap = 1048572kB
  524157 pages RAM
  0 pages HighMem/MovableOnly
  12629 pages reserved
  0 pages cma reserved
  0 pages hwpoisoned
  [ pid ]   uid  tgid total_vm      rss nr_ptes nr_pmds swapents oom_score_adj name
  [  433]     0   433     4904        5      14       3       82             0 upstart-udev-br
  [  438]     0   438    12371        5      27       3      191         -1000 systemd-udevd

With investigation, skipping page of isolate_lru_pages makes reclaim
void because it returns zero nr_taken easily so LRU shrinking is
effectively nothing and just increases priority aggressively.  Finally,
OOM happens.

The problem is that get_scan_count determines nr_to_scan with eligible
zones so although priority drops to zero, it couldn't reclaim any pages
if the LRU contains mostly ineligible pages.

get_scan_count:

        size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
	size = size >> sc->priority;

Assumes sc->priority is 0 and LRU list is as follows.

	N-N-N-N-H-H-H-H-H-H-H-H-H-H-H-H-H-H-H-H

(Ie, small eligible pages are in the head of LRU but others are
 almost ineligible pages)

In that case, size becomes 4 so VM want to scan 4 pages but 4 pages from
tail of the LRU are not eligible pages.  If get_scan_count counts
skipped pages, it doesn't reclaim any pages remained after scanning 4
pages so it ends up OOM happening.

This patch makes isolate_lru_pages try to scan pages until it encounters
eligible zones's pages.

[akpm@linux-foundation.org: clean up mind-bending `for' statement.  Tweak comment text]
Fixes: 3db65812d688 ("Revert "mm, vmscan: account for skipped pages as a partial scan"")
Link: http://lkml.kernel.org/r/1494457232-27401-1-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2f45c0520f43..8ad39bbc79e6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1449,7 +1449,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
  *
  * Appropriate locks must be held before calling this function.
  *
- * @nr_to_scan:	The number of pages to look through on the list.
+ * @nr_to_scan:	The number of eligible pages to look through on the list.
  * @lruvec:	The LRU vector to pull pages from.
  * @dst:	The temp list to put pages on to.
  * @nr_scanned:	The number of pages that were scanned.
@@ -1469,11 +1469,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 	unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
 	unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
 	unsigned long skipped = 0;
-	unsigned long scan, nr_pages;
+	unsigned long scan, total_scan, nr_pages;
 	LIST_HEAD(pages_skipped);
 
-	for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan &&
-					!list_empty(src); scan++) {
+	scan = 0;
+	for (total_scan = 0;
+	     scan < nr_to_scan && nr_taken < nr_to_scan && !list_empty(src);
+	     total_scan++) {
 		struct page *page;
 
 		page = lru_to_page(src);
@@ -1487,6 +1489,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 			continue;
 		}
 
+		/*
+		 * Do not count skipped pages because that makes the function
+		 * return with no isolated pages if the LRU mostly contains
+		 * ineligible pages.  This causes the VM to not reclaim any
+		 * pages, triggering a premature OOM.
+		 */
+		scan++;
 		switch (__isolate_lru_page(page, mode)) {
 		case 0:
 			nr_pages = hpage_nr_pages(page);
@@ -1524,9 +1533,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 			skipped += nr_skipped[zid];
 		}
 	}
-	*nr_scanned = scan;
+	*nr_scanned = total_scan;
 	trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
-				    scan, skipped, nr_taken, mode, lru);
+				    total_scan, skipped, nr_taken, mode, lru);
 	update_lru_sizes(lruvec, lru, nr_zone_taken);
 	return nr_taken;
 }
-- 
cgit v1.2.3


From b340959ea281dbac15344277094d0a294dbe8aca Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 12 May 2017 15:47:09 -0700
Subject: mm, docs: update memory.stat description with workingset* entries

Commit 4b4cea91691d ("mm: vmscan: fix IO/refault regression in cache
workingset transition") introduced three new entries in memory stat
file:

 - workingset_refault
 - workingset_activate
 - workingset_nodereclaim

This commit adds a corresponding description to the cgroup v2 docs.

Link: http://lkml.kernel.org/r/1494530293-31236-1-git-send-email-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroup-v2.txt | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index e50b95c25868..dc5e2dcdbef4 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -918,6 +918,18 @@ PAGE_SIZE multiple when read back.
 
 		Number of major page faults incurred
 
+	  workingset_refault
+
+		Number of refaults of previously evicted pages
+
+	  workingset_activate
+
+		Number of refaulted pages that were immediately activated
+
+	  workingset_nodereclaim
+
+		Number of times a shadow node has been reclaimed
+
   memory.swap.current
 
 	A read-only single value file which exists on non-root
-- 
cgit v1.2.3


From 2ea659a9ef488125eb46da6eb571de5eae5c43f6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 13 May 2017 13:19:49 -0700
Subject: Linux 4.12-rc1

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 2c2dfd8e04f9..b400c0604fac 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
-PATCHLEVEL = 11
+PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3