diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 13:23:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 13:23:46 -0700 |
commit | 09fa30226130652af75152d9010c603c66d46f6e (patch) | |
tree | 26dc4ba7fc66dc0c10e442d81595dd319eef4c75 /drivers/gpu | |
parent | be53bfdb8088e9d1924199cc1a96e113756b1075 (diff) | |
parent | 1b2681ba271c9f5bb66cb0d8ceeaa215fcd218d8 (diff) | |
download | linux-09fa30226130652af75152d9010c603c66d46f6e.tar.bz2 |
Merge branch 'drm-radeon-sitn-support' of git://people.freedesktop.org/~airlied/linux
Pull radeon southern islands / trinity support from Dave Airlie:
"This is support from AMD for their newest GPU and APUs. The products
called RadeonHD 7xxx, and the Trinity APU series.
This did come in a bit late, due to some over-complicated AMD internal
review process, which from the outside seems unnecessary once the
company has decided it wants to support open source. However as I
said previously I'd rather not put the people who've got this hw for 3
months now being forced to use fglrx on it if there is open code.
Its pretty well self contained and just plugs into the driver in
various places."
* 'drm-radeon-sitn-support' of git://people.freedesktop.org/~airlied/linux: (48 commits)
drm/radeon/kms: update duallink checks for DCE6
drm/radeon/kms: add trinity pci ids
drm/radeon/kms: add radeon_asic struct for trinity
drm/radeon/kms: add support for ucode loading on trinity (v2)
drm/radeon/kms/vm: set vram base offset properly for TN
drm/radeon/kms: Update evergreen functions for trinity
drm/radeon/kms: cayman gpu init updates for trinity
drm/radeon/kms: Add checks for TN in the DP bridge code
drm/radeon/kms/DCE6.1: ss is not supported on the internal pplls
drm/radeon/kms: disable PPLL0 on DCE6.1 when not in use
drm/radeon/kms: Adjust pll picker for DCE6.1
drm/radeon/kms: DCE6.1 disp eng pll updates
drm/radeon/kms: DCE6.1 watermark updates for TN
drm/radeon/kms: no support for internal thermal sensor on TN yet
drm/radeon/kms: add trinity (TN) chip family
drm/radeon/kms: Add SI pci ids
drm/radeon: Update radeon_info_ioctl for SI. (v2)
drm/radeon/kms: add radeon_asic struct for SI
drm/radeon/kms: add support for compute rings in CS ioctl on SI
drm/radeon/kms: fill in startup/shutdown callbacks for SI
...
Diffstat (limited to 'drivers/gpu')
33 files changed, 7173 insertions, 144 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 84104153a684..9d83729956ff 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -71,7 +71,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \ evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \ radeon_trace_points.o ni.o cayman_blit_shaders.o atombios_encoders.o \ - radeon_semaphore.o radeon_sa.o atombios_i2c.o + radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o si_blit_shaders.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o diff --git a/drivers/gpu/drm/radeon/ObjectID.h b/drivers/gpu/drm/radeon/ObjectID.h index c61c3fe9fb98..ca4b038050d2 100644 --- a/drivers/gpu/drm/radeon/ObjectID.h +++ b/drivers/gpu/drm/radeon/ObjectID.h @@ -85,6 +85,7 @@ #define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA 0x1F #define ENCODER_OBJECT_ID_INTERNAL_UNIPHY1 0x20 #define ENCODER_OBJECT_ID_INTERNAL_UNIPHY2 0x21 +#define ENCODER_OBJECT_ID_INTERNAL_VCE 0x24 #define ENCODER_OBJECT_ID_GENERAL_EXTERNAL_DVO 0xFF @@ -387,6 +388,10 @@ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\ ENCODER_OBJECT_ID_NUTMEG << OBJECT_ID_SHIFT) +#define ENCODER_VCE_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\ + GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\ + ENCODER_OBJECT_ID_INTERNAL_VCE << OBJECT_ID_SHIFT) + /****************************************************/ /* Connector Object ID definition - Shared with BIOS */ /****************************************************/ diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h index 1b50ad8919d5..4b04ba3828e8 100644 --- a/drivers/gpu/drm/radeon/atombios.h +++ b/drivers/gpu/drm/radeon/atombios.h @@ -101,6 +101,7 @@ #define ATOM_LCD_SELFTEST_START (ATOM_DISABLE+5) #define ATOM_LCD_SELFTEST_STOP (ATOM_ENABLE+5) #define ATOM_ENCODER_INIT (ATOM_DISABLE+7) +#define ATOM_INIT (ATOM_DISABLE+7) #define ATOM_GET_STATUS (ATOM_DISABLE+8) #define ATOM_BLANKING 1 @@ -251,25 +252,25 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{ USHORT SetEngineClock; //Function Table,directly used by various SW components,latest version 1.1 USHORT SetMemoryClock; //Function Table,directly used by various SW components,latest version 1.1 USHORT SetPixelClock; //Function Table,directly used by various SW components,latest version 1.2 - USHORT DynamicClockGating; //Atomic Table, indirectly used by various SW components,called from ASIC_Init + USHORT EnableDispPowerGating; //Atomic Table, indirectly used by various SW components,called from ASIC_Init USHORT ResetMemoryDLL; //Atomic Table, indirectly used by various SW components,called from SetMemoryClock USHORT ResetMemoryDevice; //Atomic Table, indirectly used by various SW components,called from SetMemoryClock - USHORT MemoryPLLInit; - USHORT AdjustDisplayPll; //only used by Bios + USHORT MemoryPLLInit; //Atomic Table, used only by Bios + USHORT AdjustDisplayPll; //Atomic Table, used by various SW componentes. USHORT AdjustMemoryController; //Atomic Table, indirectly used by various SW components,called from SetMemoryClock USHORT EnableASIC_StaticPwrMgt; //Atomic Table, only used by Bios USHORT ASIC_StaticPwrMgtStatusChange; //Obsolete , only used by Bios USHORT DAC_LoadDetection; //Atomic Table, directly used by various SW components,latest version 1.2 USHORT LVTMAEncoderControl; //Atomic Table,directly used by various SW components,latest version 1.3 - USHORT LCD1OutputControl; //Atomic Table, directly used by various SW components,latest version 1.1 + USHORT HW_Misc_Operation; //Atomic Table, directly used by various SW components,latest version 1.1 USHORT DAC1EncoderControl; //Atomic Table, directly used by various SW components,latest version 1.1 USHORT DAC2EncoderControl; //Atomic Table, directly used by various SW components,latest version 1.1 USHORT DVOOutputControl; //Atomic Table, directly used by various SW components,latest version 1.1 USHORT CV1OutputControl; //Atomic Table, Atomic Table, Obsolete from Ry6xx, use DAC2 Output instead - USHORT GetConditionalGoldenSetting; //only used by Bios + USHORT GetConditionalGoldenSetting; //Only used by Bios USHORT TVEncoderControl; //Function Table,directly used by various SW components,latest version 1.1 - USHORT TMDSAEncoderControl; //Atomic Table, directly used by various SW components,latest version 1.3 - USHORT LVDSEncoderControl; //Atomic Table, directly used by various SW components,latest version 1.3 + USHORT PatchMCSetting; //only used by BIOS + USHORT MC_SEQ_Control; //only used by BIOS USHORT TV1OutputControl; //Atomic Table, Obsolete from Ry6xx, use DAC2 Output instead USHORT EnableScaler; //Atomic Table, used only by Bios USHORT BlankCRTC; //Atomic Table, directly used by various SW components,latest version 1.1 @@ -282,7 +283,7 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{ USHORT SetCRTC_Replication; //Atomic Table, used only by Bios USHORT SelectCRTC_Source; //Atomic Table, directly used by various SW components,latest version 1.1 USHORT EnableGraphSurfaces; //Atomic Table, used only by Bios - USHORT UpdateCRTC_DoubleBufferRegisters; + USHORT UpdateCRTC_DoubleBufferRegisters; //Atomic Table, used only by Bios USHORT LUT_AutoFill; //Atomic Table, only used by Bios USHORT EnableHW_IconCursor; //Atomic Table, only used by Bios USHORT GetMemoryClock; //Atomic Table, directly used by various SW components,latest version 1.1 @@ -308,27 +309,36 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{ USHORT SetVoltage; //Function Table,directly and/or indirectly used by various SW components,latest version 1.1 USHORT DAC1OutputControl; //Atomic Table, directly used by various SW components,latest version 1.1 USHORT DAC2OutputControl; //Atomic Table, directly used by various SW components,latest version 1.1 - USHORT SetupHWAssistedI2CStatus; //Function Table,only used by Bios, obsolete soon.Switch to use "ReadEDIDFromHWAssistedI2C" + USHORT ComputeMemoryClockParam; //Function Table,only used by Bios, obsolete soon.Switch to use "ReadEDIDFromHWAssistedI2C" USHORT ClockSource; //Atomic Table, indirectly used by various SW components,called from ASIC_Init USHORT MemoryDeviceInit; //Atomic Table, indirectly used by various SW components,called from SetMemoryClock - USHORT EnableYUV; //Atomic Table, indirectly used by various SW components,called from EnableVGARender + USHORT GetDispObjectInfo; //Atomic Table, indirectly used by various SW components,called from EnableVGARender USHORT DIG1EncoderControl; //Atomic Table,directly used by various SW components,latest version 1.1 USHORT DIG2EncoderControl; //Atomic Table,directly used by various SW components,latest version 1.1 USHORT DIG1TransmitterControl; //Atomic Table,directly used by various SW components,latest version 1.1 USHORT DIG2TransmitterControl; //Atomic Table,directly used by various SW components,latest version 1.1 USHORT ProcessAuxChannelTransaction; //Function Table,only used by Bios USHORT DPEncoderService; //Function Table,only used by Bios + USHORT GetVoltageInfo; //Function Table,only used by Bios since SI }ATOM_MASTER_LIST_OF_COMMAND_TABLES; // For backward compatible #define ReadEDIDFromHWAssistedI2C ProcessI2cChannelTransaction -#define UNIPHYTransmitterControl DIG1TransmitterControl -#define LVTMATransmitterControl DIG2TransmitterControl +#define DPTranslatorControl DIG2EncoderControl +#define UNIPHYTransmitterControl DIG1TransmitterControl +#define LVTMATransmitterControl DIG2TransmitterControl #define SetCRTC_DPM_State GetConditionalGoldenSetting #define SetUniphyInstance ASIC_StaticPwrMgtStatusChange #define HPDInterruptService ReadHWAssistedI2CStatus #define EnableVGA_Access GetSCLKOverMCLKRatio -#define GetDispObjectInfo EnableYUV +#define EnableYUV GetDispObjectInfo +#define DynamicClockGating EnableDispPowerGating +#define SetupHWAssistedI2CStatus ComputeMemoryClockParam + +#define TMDSAEncoderControl PatchMCSetting +#define LVDSEncoderControl MC_SEQ_Control +#define LCD1OutputControl HW_Misc_Operation + typedef struct _ATOM_MASTER_COMMAND_TABLE { @@ -495,6 +505,34 @@ typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V5 // ucInputFlag #define ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN 1 // 1-StrobeMode, 0-PerformanceMode +// use for ComputeMemoryClockParamTable +typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1 +{ + union + { + ULONG ulClock; + ATOM_S_MPLL_FB_DIVIDER ulFbDiv; //Output:UPPER_WORD=FB_DIV_INTEGER, LOWER_WORD=FB_DIV_FRAC shl (16-FB_FRACTION_BITS) + }; + UCHAR ucDllSpeed; //Output + UCHAR ucPostDiv; //Output + union{ + UCHAR ucInputFlag; //Input : ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN: 1-StrobeMode, 0-PerformanceMode + UCHAR ucPllCntlFlag; //Output: + }; + UCHAR ucBWCntl; +}COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1; + +// definition of ucInputFlag +#define MPLL_INPUT_FLAG_STROBE_MODE_EN 0x01 +// definition of ucPllCntlFlag +#define MPLL_CNTL_FLAG_VCO_MODE_MASK 0x03 +#define MPLL_CNTL_FLAG_BYPASS_DQ_PLL 0x04 +#define MPLL_CNTL_FLAG_QDR_ENABLE 0x08 +#define MPLL_CNTL_FLAG_AD_HALF_RATE 0x10 + +//MPLL_CNTL_FLAG_BYPASS_AD_PLL has a wrong name, should be BYPASS_DQ_PLL +#define MPLL_CNTL_FLAG_BYPASS_AD_PLL 0x04 + typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER { ATOM_COMPUTE_CLOCK_FREQ ulClock; @@ -562,6 +600,16 @@ typedef struct _DYNAMIC_CLOCK_GATING_PARAMETERS #define DYNAMIC_CLOCK_GATING_PS_ALLOCATION DYNAMIC_CLOCK_GATING_PARAMETERS /****************************************************************************/ +// Structure used by EnableDispPowerGatingTable.ctb +/****************************************************************************/ +typedef struct _ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 +{ + UCHAR ucDispPipeId; // ATOM_CRTC1, ATOM_CRTC2, ... + UCHAR ucEnable; // ATOM_ENABLE or ATOM_DISABLE + UCHAR ucPadding[2]; +}ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1; + +/****************************************************************************/ // Structure used by EnableASIC_StaticPwrMgtTable.ctb /****************************************************************************/ typedef struct _ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS @@ -807,6 +855,7 @@ typedef struct _ATOM_DIG_ENCODER_CONFIG_V4 #define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_1_62GHZ 0x00 #define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_2_70GHZ 0x01 #define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_5_40GHZ 0x02 +#define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_3_24GHZ 0x03 #define ATOM_ENCODER_CONFIG_V4_ENCODER_SEL 0x70 #define ATOM_ENCODER_CONFIG_V4_DIG0_ENCODER 0x00 #define ATOM_ENCODER_CONFIG_V4_DIG1_ENCODER 0x10 @@ -814,6 +863,7 @@ typedef struct _ATOM_DIG_ENCODER_CONFIG_V4 #define ATOM_ENCODER_CONFIG_V4_DIG3_ENCODER 0x30 #define ATOM_ENCODER_CONFIG_V4_DIG4_ENCODER 0x40 #define ATOM_ENCODER_CONFIG_V4_DIG5_ENCODER 0x50 +#define ATOM_ENCODER_CONFIG_V4_DIG6_ENCODER 0x60 typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V4 { @@ -1171,6 +1221,106 @@ typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V4 #define ATOM_TRANSMITTER_CONFIG_V4_TRANSMITTER3 0x80 //EF +typedef struct _ATOM_DIG_TRANSMITTER_CONFIG_V5 +{ +#if ATOM_BIG_ENDIAN + UCHAR ucReservd1:1; + UCHAR ucHPDSel:3; + UCHAR ucPhyClkSrcId:2; + UCHAR ucCoherentMode:1; + UCHAR ucReserved:1; +#else + UCHAR ucReserved:1; + UCHAR ucCoherentMode:1; + UCHAR ucPhyClkSrcId:2; + UCHAR ucHPDSel:3; + UCHAR ucReservd1:1; +#endif +}ATOM_DIG_TRANSMITTER_CONFIG_V5; + +typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 +{ + USHORT usSymClock; // Encoder Clock in 10kHz,(DP mode)= linkclock/10, (TMDS/LVDS/HDMI)= pixel clock, (HDMI deep color), =pixel clock * deep_color_ratio + UCHAR ucPhyId; // 0=UNIPHYA, 1=UNIPHYB, 2=UNIPHYC, 3=UNIPHYD, 4= UNIPHYE 5=UNIPHYF + UCHAR ucAction; // define as ATOM_TRANSMITER_ACTION_xxx + UCHAR ucLaneNum; // indicate lane number 1-8 + UCHAR ucConnObjId; // Connector Object Id defined in ObjectId.h + UCHAR ucDigMode; // indicate DIG mode + union{ + ATOM_DIG_TRANSMITTER_CONFIG_V5 asConfig; + UCHAR ucConfig; + }; + UCHAR ucDigEncoderSel; // indicate DIG front end encoder + UCHAR ucDPLaneSet; + UCHAR ucReserved; + UCHAR ucReserved1; +}DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5; + +//ucPhyId +#define ATOM_PHY_ID_UNIPHYA 0 +#define ATOM_PHY_ID_UNIPHYB 1 +#define ATOM_PHY_ID_UNIPHYC 2 +#define ATOM_PHY_ID_UNIPHYD 3 +#define ATOM_PHY_ID_UNIPHYE 4 +#define ATOM_PHY_ID_UNIPHYF 5 +#define ATOM_PHY_ID_UNIPHYG 6 + +// ucDigEncoderSel +#define ATOM_TRANMSITTER_V5__DIGA_SEL 0x01 +#define ATOM_TRANMSITTER_V5__DIGB_SEL 0x02 +#define ATOM_TRANMSITTER_V5__DIGC_SEL 0x04 +#define ATOM_TRANMSITTER_V5__DIGD_SEL 0x08 +#define ATOM_TRANMSITTER_V5__DIGE_SEL 0x10 +#define ATOM_TRANMSITTER_V5__DIGF_SEL 0x20 +#define ATOM_TRANMSITTER_V5__DIGG_SEL 0x40 + +// ucDigMode +#define ATOM_TRANSMITTER_DIGMODE_V5_DP 0 +#define ATOM_TRANSMITTER_DIGMODE_V5_LVDS 1 +#define ATOM_TRANSMITTER_DIGMODE_V5_DVI 2 +#define ATOM_TRANSMITTER_DIGMODE_V5_HDMI 3 +#define ATOM_TRANSMITTER_DIGMODE_V5_SDVO 4 +#define ATOM_TRANSMITTER_DIGMODE_V5_DP_MST 5 + +// ucDPLaneSet +#define DP_LANE_SET__0DB_0_4V 0x00 +#define DP_LANE_SET__0DB_0_6V 0x01 +#define DP_LANE_SET__0DB_0_8V 0x02 +#define DP_LANE_SET__0DB_1_2V 0x03 +#define DP_LANE_SET__3_5DB_0_4V 0x08 +#define DP_LANE_SET__3_5DB_0_6V 0x09 +#define DP_LANE_SET__3_5DB_0_8V 0x0a +#define DP_LANE_SET__6DB_0_4V 0x10 +#define DP_LANE_SET__6DB_0_6V 0x11 +#define DP_LANE_SET__9_5DB_0_4V 0x18 + +// ATOM_DIG_TRANSMITTER_CONFIG_V5 asConfig; +// Bit1 +#define ATOM_TRANSMITTER_CONFIG_V5_COHERENT 0x02 + +// Bit3:2 +#define ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SEL_MASK 0x0c +#define ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SEL_SHIFT 0x02 + +#define ATOM_TRANSMITTER_CONFIG_V5_P1PLL 0x00 +#define ATOM_TRANSMITTER_CONFIG_V5_P2PLL 0x04 +#define ATOM_TRANSMITTER_CONFIG_V5_P0PLL 0x08 +#define ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT 0x0c +// Bit6:4 +#define ATOM_TRANSMITTER_CONFIG_V5_HPD_SEL_MASK 0x70 +#define ATOM_TRANSMITTER_CONFIG_V5_HPD_SEL_SHIFT 0x04 + +#define ATOM_TRANSMITTER_CONFIG_V5_NO_HPD_SEL 0x00 +#define ATOM_TRANSMITTER_CONFIG_V5_HPD1_SEL 0x10 +#define ATOM_TRANSMITTER_CONFIG_V5_HPD2_SEL 0x20 +#define ATOM_TRANSMITTER_CONFIG_V5_HPD3_SEL 0x30 +#define ATOM_TRANSMITTER_CONFIG_V5_HPD4_SEL 0x40 +#define ATOM_TRANSMITTER_CONFIG_V5_HPD5_SEL 0x50 +#define ATOM_TRANSMITTER_CONFIG_V5_HPD6_SEL 0x60 + +#define DIG_TRANSMITTER_CONTROL_PS_ALLOCATION_V1_5 DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 + + /****************************************************************************/ // Structures used by ExternalEncoderControlTable V1.3 // ASIC Families: Evergreen, Llano, NI @@ -1793,6 +1943,7 @@ typedef struct _ENABLE_SPREAD_SPECTRUM_ON_PPLL_V2 #define ATOM_PPLL_SS_TYPE_V3_P1PLL 0x00 #define ATOM_PPLL_SS_TYPE_V3_P2PLL 0x04 #define ATOM_PPLL_SS_TYPE_V3_DCPLL 0x08 +#define ATOM_PPLL_SS_TYPE_V3_P0PLL ATOM_PPLL_SS_TYPE_V3_DCPLL #define ATOM_PPLL_SS_AMOUNT_V3_FBDIV_MASK 0x00FF #define ATOM_PPLL_SS_AMOUNT_V3_FBDIV_SHIFT 0 #define ATOM_PPLL_SS_AMOUNT_V3_NFRAC_MASK 0x0F00 @@ -2030,12 +2181,77 @@ typedef struct _SET_VOLTAGE_PARAMETERS_V2 USHORT usVoltageLevel; // real voltage level }SET_VOLTAGE_PARAMETERS_V2; + +typedef struct _SET_VOLTAGE_PARAMETERS_V1_3 +{ + UCHAR ucVoltageType; // To tell which voltage to set up, VDDC/MVDDC/MVDDQ/VDDCI + UCHAR ucVoltageMode; // Indicate action: Set voltage level + USHORT usVoltageLevel; // real voltage level in unit of mv or Voltage Phase (0, 1, 2, .. ) +}SET_VOLTAGE_PARAMETERS_V1_3; + +//ucVoltageType +#define VOLTAGE_TYPE_VDDC 1 +#define VOLTAGE_TYPE_MVDDC 2 +#define VOLTAGE_TYPE_MVDDQ 3 +#define VOLTAGE_TYPE_VDDCI 4 + +//SET_VOLTAGE_PARAMETERS_V3.ucVoltageMode +#define ATOM_SET_VOLTAGE 0 //Set voltage Level +#define ATOM_INIT_VOLTAGE_REGULATOR 3 //Init Regulator +#define ATOM_SET_VOLTAGE_PHASE 4 //Set Vregulator Phase +#define ATOM_GET_MAX_VOLTAGE 6 //Get Max Voltage, not used in SetVoltageTable v1.3 +#define ATOM_GET_VOLTAGE_LEVEL 6 //Get Voltage level from vitual voltage ID + +// define vitual voltage id in usVoltageLevel +#define ATOM_VIRTUAL_VOLTAGE_ID0 0xff01 +#define ATOM_VIRTUAL_VOLTAGE_ID1 0xff02 +#define ATOM_VIRTUAL_VOLTAGE_ID2 0xff03 +#define ATOM_VIRTUAL_VOLTAGE_ID3 0xff04 + typedef struct _SET_VOLTAGE_PS_ALLOCATION { SET_VOLTAGE_PARAMETERS sASICSetVoltage; WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved; }SET_VOLTAGE_PS_ALLOCATION; +// New Added from SI for GetVoltageInfoTable, input parameter structure +typedef struct _GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_1 +{ + UCHAR ucVoltageType; // Input: To tell which voltage to set up, VDDC/MVDDC/MVDDQ/VDDCI + UCHAR ucVoltageMode; // Input: Indicate action: Get voltage info + USHORT usVoltageLevel; // Input: real voltage level in unit of mv or Voltage Phase (0, 1, 2, .. ) or Leakage Id + ULONG ulReserved; +}GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_1; + +// New Added from SI for GetVoltageInfoTable, output parameter structure when ucVotlageMode == ATOM_GET_VOLTAGE_VID +typedef struct _GET_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1 +{ + ULONG ulVotlageGpioState; + ULONG ulVoltageGPioMask; +}GET_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1; + +// New Added from SI for GetVoltageInfoTable, output parameter structure when ucVotlageMode == ATOM_GET_VOLTAGE_STATEx_LEAKAGE_VID +typedef struct _GET_LEAKAGE_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1 +{ + USHORT usVoltageLevel; + USHORT usVoltageId; // Voltage Id programmed in Voltage Regulator + ULONG ulReseved; +}GET_LEAKAGE_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1; + + +// GetVoltageInfo v1.1 ucVoltageMode +#define ATOM_GET_VOLTAGE_VID 0x00 +#define ATOM_GET_VOTLAGE_INIT_SEQ 0x03 +#define ATOM_GET_VOLTTAGE_PHASE_PHASE_VID 0x04 +// for SI, this state map to 0xff02 voltage state in Power Play table, which is power boost state +#define ATOM_GET_VOLTAGE_STATE0_LEAKAGE_VID 0x10 + +// for SI, this state map to 0xff01 voltage state in Power Play table, which is performance state +#define ATOM_GET_VOLTAGE_STATE1_LEAKAGE_VID 0x11 +// undefined power state +#define ATOM_GET_VOLTAGE_STATE2_LEAKAGE_VID 0x12 +#define ATOM_GET_VOLTAGE_STATE3_LEAKAGE_VID 0x13 + /****************************************************************************/ // Structures used by TVEncoderControlTable /****************************************************************************/ @@ -2065,9 +2281,9 @@ typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES USHORT MultimediaConfigInfo; // Only used by MM Lib,latest version 2.1, not configuable from Bios, need to include the table to build Bios USHORT StandardVESA_Timing; // Only used by Bios USHORT FirmwareInfo; // Shared by various SW components,latest version 1.4 - USHORT DAC_Info; // Will be obsolete from R600 + USHORT PaletteData; // Only used by BIOS USHORT LCD_Info; // Shared by various SW components,latest version 1.3, was called LVDS_Info - USHORT TMDS_Info; // Will be obsolete from R600 + USHORT DIGTransmitterInfo; // Internal used by VBIOS only version 3.1 USHORT AnalogTV_Info; // Shared by various SW components,latest version 1.1 USHORT SupportedDevicesInfo; // Will be obsolete from R600 USHORT GPIO_I2C_Info; // Shared by various SW components,latest version 1.2 will be used from R600 @@ -2096,15 +2312,16 @@ typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES USHORT PowerSourceInfo; // Shared by various SW components, latest versoin 1.1 }ATOM_MASTER_LIST_OF_DATA_TABLES; -// For backward compatible -#define LVDS_Info LCD_Info - typedef struct _ATOM_MASTER_DATA_TABLE { ATOM_COMMON_TABLE_HEADER sHeader; ATOM_MASTER_LIST_OF_DATA_TABLES ListOfDataTables; }ATOM_MASTER_DATA_TABLE; +// For backward compatible +#define LVDS_Info LCD_Info +#define DAC_Info PaletteData +#define TMDS_Info DIGTransmitterInfo /****************************************************************************/ // Structure used in MultimediaCapabilityInfoTable @@ -2171,7 +2388,9 @@ typedef struct _ATOM_MULTIMEDIA_CONFIG_INFO typedef struct _ATOM_FIRMWARE_CAPABILITY { #if ATOM_BIG_ENDIAN - USHORT Reserved:3; + USHORT Reserved:1; + USHORT SCL2Redefined:1; + USHORT PostWithoutModeSet:1; USHORT HyperMemory_Size:4; USHORT HyperMemory_Support:1; USHORT PPMode_Assigned:1; @@ -2193,7 +2412,9 @@ typedef struct _ATOM_FIRMWARE_CAPABILITY USHORT PPMode_Assigned:1; USHORT HyperMemory_Support:1; USHORT HyperMemory_Size:4; - USHORT Reserved:3; + USHORT PostWithoutModeSet:1; + USHORT SCL2Redefined:1; + USHORT Reserved:1; #endif }ATOM_FIRMWARE_CAPABILITY; @@ -2418,7 +2639,8 @@ typedef struct _ATOM_FIRMWARE_INFO_V2_2 USHORT usLcdMaxPixelClockPLL_Output; // In MHz unit ULONG ulReserved4; //Was ulAsicMaximumVoltage ULONG ulMinPixelClockPLL_Output; //In 10Khz unit - ULONG ulReserved5; //Was usMinEngineClockPLL_Input and usMaxEngineClockPLL_Input + UCHAR ucRemoteDisplayConfig; + UCHAR ucReserved5[3]; //Was usMinEngineClockPLL_Input and usMaxEngineClockPLL_Input ULONG ulReserved6; //Was usMinEngineClockPLL_Output and usMinMemoryClockPLL_Input ULONG ulReserved7; //Was usMaxMemoryClockPLL_Input and usMinMemoryClockPLL_Output USHORT usReserved11; //Was usMaxPixelClock; //In 10Khz unit, Max. Pclk used only for DAC @@ -2438,6 +2660,11 @@ typedef struct _ATOM_FIRMWARE_INFO_V2_2 #define ATOM_FIRMWARE_INFO_LAST ATOM_FIRMWARE_INFO_V2_2 + +// definition of ucRemoteDisplayConfig +#define REMOTE_DISPLAY_DISABLE 0x00 +#define REMOTE_DISPLAY_ENABLE 0x01 + /****************************************************************************/ // Structures used in IntegratedSystemInfoTable /****************************************************************************/ @@ -2660,8 +2887,9 @@ usMinDownStreamHTLinkWidth: same as above. #define INTEGRATED_SYSTEM_INFO__AMD_CPU__GREYHOUND 2 #define INTEGRATED_SYSTEM_INFO__AMD_CPU__K8 3 #define INTEGRATED_SYSTEM_INFO__AMD_CPU__PHARAOH 4 +#define INTEGRATED_SYSTEM_INFO__AMD_CPU__OROCHI 5 -#define INTEGRATED_SYSTEM_INFO__AMD_CPU__MAX_CODE INTEGRATED_SYSTEM_INFO__AMD_CPU__PHARAOH // this deff reflects max defined CPU code +#define INTEGRATED_SYSTEM_INFO__AMD_CPU__MAX_CODE INTEGRATED_SYSTEM_INFO__AMD_CPU__OROCHI // this deff reflects max defined CPU code #define SYSTEM_CONFIG_POWEREXPRESS_ENABLE 0x00000001 #define SYSTEM_CONFIG_RUN_AT_OVERDRIVE_ENGINE 0x00000002 @@ -2753,6 +2981,7 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V5 #define ASIC_INT_DIG4_ENCODER_ID 0x0b #define ASIC_INT_DIG5_ENCODER_ID 0x0c #define ASIC_INT_DIG6_ENCODER_ID 0x0d +#define ASIC_INT_DIG7_ENCODER_ID 0x0e //define Encoder attribute #define ATOM_ANALOG_ENCODER 0 @@ -3226,15 +3455,23 @@ typedef struct _ATOM_LCD_INFO_V13 UCHAR ucPowerSequenceDIGONtoDE_in4Ms; UCHAR ucPowerSequenceDEtoVARY_BL_in4Ms; - UCHAR ucPowerSequenceDEtoDIGON_in4Ms; UCHAR ucPowerSequenceVARY_BLtoDE_in4Ms; + UCHAR ucPowerSequenceDEtoDIGON_in4Ms; UCHAR ucOffDelay_in4Ms; UCHAR ucPowerSequenceVARY_BLtoBLON_in4Ms; UCHAR ucPowerSequenceBLONtoVARY_BL_in4Ms; UCHAR ucReserved1; - ULONG ulReserved[4]; + UCHAR ucDPCD_eDP_CONFIGURATION_CAP; // dpcd 0dh + UCHAR ucDPCD_MAX_LINK_RATE; // dpcd 01h + UCHAR ucDPCD_MAX_LANE_COUNT; // dpcd 02h + UCHAR ucDPCD_MAX_DOWNSPREAD; // dpcd 03h + + USHORT usMaxPclkFreqInSingleLink; // Max PixelClock frequency in single link mode. + UCHAR uceDPToLVDSRxId; + UCHAR ucLcdReservd; + ULONG ulReserved[2]; }ATOM_LCD_INFO_V13; #define ATOM_LCD_INFO_LAST ATOM_LCD_INFO_V13 @@ -3273,6 +3510,11 @@ typedef struct _ATOM_LCD_INFO_V13 //Use this cap bit for a quick reference whether an embadded panel (LCD1 ) is LVDS or eDP. #define LCDPANEL_CAP_V13_eDP 0x4 // = LCDPANEL_CAP_eDP no change comparing to previous version +//uceDPToLVDSRxId +#define eDP_TO_LVDS_RX_DISABLE 0x00 // no eDP->LVDS translator chip +#define eDP_TO_LVDS_COMMON_ID 0x01 // common eDP->LVDS translator chip without AMD SW init +#define eDP_TO_LVDS_RT_ID 0x02 // RT tanslator which require AMD SW init + typedef struct _ATOM_PATCH_RECORD_MODE { UCHAR ucRecordType; @@ -3317,6 +3559,7 @@ typedef struct _ATOM_PANEL_RESOLUTION_PATCH_RECORD #define LCD_CAP_RECORD_TYPE 3 #define LCD_FAKE_EDID_PATCH_RECORD_TYPE 4 #define LCD_PANEL_RESOLUTION_RECORD_TYPE 5 +#define LCD_EDID_OFFSET_PATCH_RECORD_TYPE 6 #define ATOM_RECORD_END_TYPE 0xFF /****************************Spread Spectrum Info Table Definitions **********************/ @@ -3528,6 +3771,7 @@ else //Non VGA case CAIL needs to claim an reserved area defined by FBAccessAreaOffset and usFBUsedbyDrvInKB in non VGA case.*/ +/***********************************************************************************/ #define ATOM_MAX_FIRMWARE_VRAM_USAGE_INFO 1 typedef struct _ATOM_FIRMWARE_VRAM_RESERVE_INFO @@ -3818,13 +4062,17 @@ typedef struct _EXT_DISPLAY_PATH ATOM_DP_CONN_CHANNEL_MAPPING asDPMapping; ATOM_DVI_CONN_CHANNEL_MAPPING asDVIMapping; }; - UCHAR ucReserved; - USHORT usReserved[2]; + UCHAR ucChPNInvert; // bit vector for up to 8 lanes, =0: P and N is not invert, =1 P and N is inverted + USHORT usCaps; + USHORT usReserved; }EXT_DISPLAY_PATH; #define NUMBER_OF_UCHAR_FOR_GUID 16 #define MAX_NUMBER_OF_EXT_DISPLAY_PATH 7 +//usCaps +#define EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE 0x01 + typedef struct _ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO { ATOM_COMMON_TABLE_HEADER sHeader; @@ -3832,7 +4080,9 @@ typedef struct _ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO EXT_DISPLAY_PATH sPath[MAX_NUMBER_OF_EXT_DISPLAY_PATH]; // total of fixed 7 entries. UCHAR ucChecksum; // a simple Checksum of the sum of whole structure equal to 0x0. UCHAR uc3DStereoPinId; // use for eDP panel - UCHAR Reserved [6]; // for potential expansion + UCHAR ucRemoteDisplayConfig; + UCHAR uceDPToLVDSRxId; + UCHAR Reserved[4]; // for potential expansion }ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO; //Related definitions, all records are different but they have a commond header @@ -3977,6 +4227,7 @@ typedef struct _ATOM_OBJECT_GPIO_CNTL_RECORD #define GPIO_PIN_STATE_ACTIVE_HIGH 0x1 // Indexes to GPIO array in GLSync record +// GLSync record is for Frame Lock/Gen Lock feature. #define ATOM_GPIO_INDEX_GLSYNC_REFCLK 0 #define ATOM_GPIO_INDEX_GLSYNC_HSYNC 1 #define ATOM_GPIO_INDEX_GLSYNC_VSYNC 2 @@ -3984,7 +4235,9 @@ typedef struct _ATOM_OBJECT_GPIO_CNTL_RECORD #define ATOM_GPIO_INDEX_GLSYNC_SWAP_GNT 4 #define ATOM_GPIO_INDEX_GLSYNC_INTERRUPT 5 #define ATOM_GPIO_INDEX_GLSYNC_V_RESET 6 -#define ATOM_GPIO_INDEX_GLSYNC_MAX 7 +#define ATOM_GPIO_INDEX_GLSYNC_SWAP_CNTL 7 +#define ATOM_GPIO_INDEX_GLSYNC_SWAP_SEL 8 +#define ATOM_GPIO_INDEX_GLSYNC_MAX 9 typedef struct _ATOM_ENCODER_DVO_CF_RECORD { @@ -3994,7 +4247,8 @@ typedef struct _ATOM_ENCODER_DVO_CF_RECORD }ATOM_ENCODER_DVO_CF_RECORD; // Bit maps for ATOM_ENCODER_CAP_RECORD.ucEncoderCap -#define ATOM_ENCODER_CAP_RECORD_HBR2 0x01 // DP1.2 HBR2 is supported by this path +#define ATOM_ENCODER_CAP_RECORD_HBR2 0x01 // DP1.2 HBR2 is supported by HW encoder +#define ATOM_ENCODER_CAP_RECORD_HBR2_EN 0x02 // DP1.2 HBR2 setting is qualified and HBR2 can be enabled typedef struct _ATOM_ENCODER_CAP_RECORD { @@ -4003,11 +4257,13 @@ typedef struct _ATOM_ENCODER_CAP_RECORD USHORT usEncoderCap; struct { #if ATOM_BIG_ENDIAN - USHORT usReserved:15; // Bit1-15 may be defined for other capability in future + USHORT usReserved:14; // Bit1-15 may be defined for other capability in future + USHORT usHBR2En:1; // Bit1 is for DP1.2 HBR2 enable USHORT usHBR2Cap:1; // Bit0 is for DP1.2 HBR2 capability. #else USHORT usHBR2Cap:1; // Bit0 is for DP1.2 HBR2 capability. - USHORT usReserved:15; // Bit1-15 may be defined for other capability in future + USHORT usHBR2En:1; // Bit1 is for DP1.2 HBR2 enable + USHORT usReserved:14; // Bit1-15 may be defined for other capability in future #endif }; }; @@ -4157,6 +4413,7 @@ typedef struct _ATOM_VOLTAGE_CONTROL #define VOLTAGE_CONTROL_ID_VT1556M 0x07 #define VOLTAGE_CONTROL_ID_CHL822x 0x08 #define VOLTAGE_CONTROL_ID_VT1586M 0x09 +#define VOLTAGE_CONTROL_ID_UP1637 0x0A typedef struct _ATOM_VOLTAGE_OBJECT { @@ -4193,6 +4450,69 @@ typedef struct _ATOM_LEAKID_VOLTAGE USHORT usVoltage; }ATOM_LEAKID_VOLTAGE; +typedef struct _ATOM_VOLTAGE_OBJECT_HEADER_V3{ + UCHAR ucVoltageType; //Indicate Voltage Source: VDDC, MVDDC, MVDDQ or MVDDCI + UCHAR ucVoltageMode; //Indicate voltage control mode: Init/Set/Leakage/Set phase + USHORT usSize; //Size of Object +}ATOM_VOLTAGE_OBJECT_HEADER_V3; + +typedef struct _VOLTAGE_LUT_ENTRY_V2 +{ + ULONG ulVoltageId; // The Voltage ID which is used to program GPIO register + USHORT usVoltageValue; // The corresponding Voltage Value, in mV +}VOLTAGE_LUT_ENTRY_V2; + +typedef struct _LEAKAGE_VOLTAGE_LUT_ENTRY_V2 +{ + USHORT usVoltageLevel; // The Voltage ID which is used to program GPIO register + USHORT usVoltageId; + USHORT usLeakageId; // The corresponding Voltage Value, in mV +}LEAKAGE_VOLTAGE_LUT_ENTRY_V2; + +typedef struct _ATOM_I2C_VOLTAGE_OBJECT_V3 +{ + ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader; + UCHAR ucVoltageRegulatorId; //Indicate Voltage Regulator Id + UCHAR ucVoltageControlI2cLine; + UCHAR ucVoltageControlAddress; + UCHAR ucVoltageControlOffset; + ULONG ulReserved; + VOLTAGE_LUT_ENTRY asVolI2cLut[1]; // end with 0xff +}ATOM_I2C_VOLTAGE_OBJECT_V3; + +typedef struct _ATOM_GPIO_VOLTAGE_OBJECT_V3 +{ + ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader; + UCHAR ucVoltageGpioCntlId; // default is 0 which indicate control through CG VID mode + UCHAR ucGpioEntryNum; // indiate the entry numbers of Votlage/Gpio value Look up table + UCHAR ucPhaseDelay; // phase delay in unit of micro second + UCHAR ucReserved; + ULONG ulGpioMaskVal; // GPIO Mask value + VOLTAGE_LUT_ENTRY_V2 asVolGpioLut[1]; +}ATOM_GPIO_VOLTAGE_OBJECT_V3; + +typedef struct _ATOM_LEAKAGE_VOLTAGE_OBJECT_V3 +{ + ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader; + UCHAR ucLeakageCntlId; // default is 0 + UCHAR ucLeakageEntryNum; // indicate the entry number of LeakageId/Voltage Lut table + UCHAR ucReserved[2]; + ULONG ulMaxVoltageLevel; + LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[1]; +}ATOM_LEAKAGE_VOLTAGE_OBJECT_V3; + +typedef union _ATOM_VOLTAGE_OBJECT_V3{ + ATOM_GPIO_VOLTAGE_OBJECT_V3 asGpioVoltageObj; + ATOM_I2C_VOLTAGE_OBJECT_V3 asI2cVoltageObj; + ATOM_LEAKAGE_VOLTAGE_OBJECT_V3 asLeakageObj; +}ATOM_VOLTAGE_OBJECT_V3; + +typedef struct _ATOM_VOLTAGE_OBJECT_INFO_V3_1 +{ + ATOM_COMMON_TABLE_HEADER sHeader; + ATOM_VOLTAGE_OBJECT_V3 asVoltageObj[3]; //Info for Voltage control +}ATOM_VOLTAGE_OBJECT_INFO_V3_1; + typedef struct _ATOM_ASIC_PROFILE_VOLTAGE { UCHAR ucProfileId; @@ -4305,7 +4625,18 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 USHORT usHDMISSpreadRateIn10Hz; USHORT usDVISSPercentage; USHORT usDVISSpreadRateIn10Hz; - ULONG ulReserved3[21]; + ULONG SclkDpmBoostMargin; + ULONG SclkDpmThrottleMargin; + USHORT SclkDpmTdpLimitPG; + USHORT SclkDpmTdpLimitBoost; + ULONG ulBoostEngineCLock; + UCHAR ulBoostVid_2bit; + UCHAR EnableBoost; + USHORT GnbTdpLimit; + USHORT usMaxLVDSPclkFreqInSingleLink; + UCHAR ucLvdsMisc; + UCHAR ucLVDSReserved; + ULONG ulReserved3[15]; ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO sExtDispConnInfo; }ATOM_INTEGRATED_SYSTEM_INFO_V6; @@ -4313,9 +4644,16 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 #define INTEGRATED_SYSTEM_INFO_V6_GPUCAPINFO__TMDSHDMI_COHERENT_SINGLEPLL_MODE 0x01 #define INTEGRATED_SYSTEM_INFO_V6_GPUCAPINFO__DISABLE_AUX_HW_MODE_DETECTION 0x08 -// ulOtherDisplayMisc -#define INTEGRATED_SYSTEM_INFO__GET_EDID_CALLBACK_FUNC_SUPPORT 0x01 +//ucLVDSMisc: +#define SYS_INFO_LVDSMISC__888_FPDI_MODE 0x01 +#define SYS_INFO_LVDSMISC__DL_CH_SWAP 0x02 +#define SYS_INFO_LVDSMISC__888_BPC 0x04 +#define SYS_INFO_LVDSMISC__OVERRIDE_EN 0x08 +#define SYS_INFO_LVDSMISC__BLON_ACTIVE_LOW 0x10 +// not used any more +#define SYS_INFO_LVDSMISC__VSYNC_ACTIVE_LOW 0x04 +#define SYS_INFO_LVDSMISC__HSYNC_ACTIVE_LOW 0x08 /********************************************************************************************************************** ATOM_INTEGRATED_SYSTEM_INFO_V6 Description @@ -4384,7 +4722,208 @@ ucUMAChannelNumber: System memory channel numbers. ulCSR_M3_ARB_CNTL_DEFAULT[10]: Arrays with values for CSR M3 arbiter for default ulCSR_M3_ARB_CNTL_UVD[10]: Arrays with values for CSR M3 arbiter for UVD playback. ulCSR_M3_ARB_CNTL_FS3D[10]: Arrays with values for CSR M3 arbiter for Full Screen 3D applications. -sAvail_SCLK[5]: Arrays to provide available list of SLCK and corresponding voltage, order from low to high +sAvail_SCLK[5]: Arrays to provide availabe list of SLCK and corresponding voltage, order from low to high +ulGMCRestoreResetTime: GMC power restore and GMC reset time to calculate data reconnection latency. Unit in ns. +ulMinimumNClk: Minimum NCLK speed among all NB-Pstates to calcualte data reconnection latency. Unit in 10kHz. +ulIdleNClk: NCLK speed while memory runs in self-refresh state. Unit in 10kHz. +ulDDR_DLL_PowerUpTime: DDR PHY DLL power up time. Unit in ns. +ulDDR_PLL_PowerUpTime: DDR PHY PLL power up time. Unit in ns. +usPCIEClkSSPercentage: PCIE Clock Spred Spectrum Percentage in unit 0.01%; 100 mean 1%. +usPCIEClkSSType: PCIE Clock Spred Spectrum Type. 0 for Down spread(default); 1 for Center spread. +usLvdsSSPercentage: LVDS panel ( not include eDP ) Spread Spectrum Percentage in unit of 0.01%, =0, use VBIOS default setting. +usLvdsSSpreadRateIn10Hz: LVDS panel ( not include eDP ) Spread Spectrum frequency in unit of 10Hz, =0, use VBIOS default setting. +usHDMISSPercentage: HDMI Spread Spectrum Percentage in unit 0.01%; 100 mean 1%, =0, use VBIOS default setting. +usHDMISSpreadRateIn10Hz: HDMI Spread Spectrum frequency in unit of 10Hz, =0, use VBIOS default setting. +usDVISSPercentage: DVI Spread Spectrum Percentage in unit 0.01%; 100 mean 1%, =0, use VBIOS default setting. +usDVISSpreadRateIn10Hz: DVI Spread Spectrum frequency in unit of 10Hz, =0, use VBIOS default setting. +usMaxLVDSPclkFreqInSingleLink: Max pixel clock LVDS panel single link, if=0 means VBIOS use default threhold, right now it is 85Mhz +ucLVDSMisc: [bit0] LVDS 888bit panel mode =0: LVDS 888 panel in LDI mode, =1: LVDS 888 panel in FPDI mode + [bit1] LVDS panel lower and upper link mapping =0: lower link and upper link not swap, =1: lower link and upper link are swapped + [bit2] LVDS 888bit per color mode =0: 666 bit per color =1:888 bit per color + [bit3] LVDS parameter override enable =0: ucLvdsMisc parameter are not used =1: ucLvdsMisc parameter should be used + [bit4] Polarity of signal sent to digital BLON output pin. =0: not inverted(active high) =1: inverted ( active low ) +**********************************************************************************************************************/ + +// this Table is used for Liano/Ontario APU +typedef struct _ATOM_FUSION_SYSTEM_INFO_V1 +{ + ATOM_INTEGRATED_SYSTEM_INFO_V6 sIntegratedSysInfo; + ULONG ulPowerplayTable[128]; +}ATOM_FUSION_SYSTEM_INFO_V1; +/********************************************************************************************************************** + ATOM_FUSION_SYSTEM_INFO_V1 Description +sIntegratedSysInfo: refer to ATOM_INTEGRATED_SYSTEM_INFO_V6 definition. +ulPowerplayTable[128]: This 512 bytes memory is used to save ATOM_PPLIB_POWERPLAYTABLE3, starting form ulPowerplayTable[0] +**********************************************************************************************************************/ + +// this IntegrateSystemInfoTable is used for Trinity APU +typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 +{ + ATOM_COMMON_TABLE_HEADER sHeader; + ULONG ulBootUpEngineClock; + ULONG ulDentistVCOFreq; + ULONG ulBootUpUMAClock; + ATOM_CLK_VOLT_CAPABILITY sDISPCLK_Voltage[4]; + ULONG ulBootUpReqDisplayVector; + ULONG ulOtherDisplayMisc; + ULONG ulGPUCapInfo; + ULONG ulSB_MMIO_Base_Addr; + USHORT usRequestedPWMFreqInHz; + UCHAR ucHtcTmpLmt; + UCHAR ucHtcHystLmt; + ULONG ulMinEngineClock; + ULONG ulSystemConfig; + ULONG ulCPUCapInfo; + USHORT usNBP0Voltage; + USHORT usNBP1Voltage; + USHORT usBootUpNBVoltage; + USHORT usExtDispConnInfoOffset; + USHORT usPanelRefreshRateRange; + UCHAR ucMemoryType; + UCHAR ucUMAChannelNumber; + UCHAR strVBIOSMsg[40]; + ULONG ulReserved[20]; + ATOM_AVAILABLE_SCLK_LIST sAvail_SCLK[5]; + ULONG ulGMCRestoreResetTime; + ULONG ulMinimumNClk; + ULONG ulIdleNClk; + ULONG ulDDR_DLL_PowerUpTime; + ULONG ulDDR_PLL_PowerUpTime; + USHORT usPCIEClkSSPercentage; + USHORT usPCIEClkSSType; + USHORT usLvdsSSPercentage; + USHORT usLvdsSSpreadRateIn10Hz; + USHORT usHDMISSPercentage; + USHORT usHDMISSpreadRateIn10Hz; + USHORT usDVISSPercentage; + USHORT usDVISSpreadRateIn10Hz; + ULONG SclkDpmBoostMargin; + ULONG SclkDpmThrottleMargin; + USHORT SclkDpmTdpLimitPG; + USHORT SclkDpmTdpLimitBoost; + ULONG ulBoostEngineCLock; + UCHAR ulBoostVid_2bit; + UCHAR EnableBoost; + USHORT GnbTdpLimit; + USHORT usMaxLVDSPclkFreqInSingleLink; + UCHAR ucLvdsMisc; + UCHAR ucLVDSReserved; + UCHAR ucLVDSPwrOnSeqDIGONtoDE_in4Ms; + UCHAR ucLVDSPwrOnSeqDEtoVARY_BL_in4Ms; + UCHAR ucLVDSPwrOffSeqVARY_BLtoDE_in4Ms; + UCHAR ucLVDSPwrOffSeqDEtoDIGON_in4Ms; + UCHAR ucLVDSOffToOnDelay_in4Ms; + UCHAR ucLVDSPwrOnSeqVARY_BLtoBLON_in4Ms; + UCHAR ucLVDSPwrOffSeqBLONtoVARY_BL_in4Ms; + UCHAR ucLVDSReserved1; + ULONG ulLCDBitDepthControlVal; + ULONG ulNbpStateMemclkFreq[4]; + USHORT usNBP2Voltage; + USHORT usNBP3Voltage; + ULONG ulNbpStateNClkFreq[4]; + UCHAR ucNBDPMEnable; + UCHAR ucReserved[3]; + UCHAR ucDPMState0VclkFid; + UCHAR ucDPMState0DclkFid; + UCHAR ucDPMState1VclkFid; + UCHAR ucDPMState1DclkFid; + UCHAR ucDPMState2VclkFid; + UCHAR ucDPMState2DclkFid; + UCHAR ucDPMState3VclkFid; + UCHAR ucDPMState3DclkFid; + ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO sExtDispConnInfo; +}ATOM_INTEGRATED_SYSTEM_INFO_V1_7; + +// ulOtherDisplayMisc +#define INTEGRATED_SYSTEM_INFO__GET_EDID_CALLBACK_FUNC_SUPPORT 0x01 +#define INTEGRATED_SYSTEM_INFO__GET_BOOTUP_DISPLAY_CALLBACK_FUNC_SUPPORT 0x02 +#define INTEGRATED_SYSTEM_INFO__GET_EXPANSION_CALLBACK_FUNC_SUPPORT 0x04 +#define INTEGRATED_SYSTEM_INFO__FAST_BOOT_SUPPORT 0x08 + +// ulGPUCapInfo +#define SYS_INFO_GPUCAPS__TMDSHDMI_COHERENT_SINGLEPLL_MODE 0x01 +#define SYS_INFO_GPUCAPS__DP_SINGLEPLL_MODE 0x02 +#define SYS_INFO_GPUCAPS__DISABLE_AUX_MODE_DETECT 0x08 + +/********************************************************************************************************************** + ATOM_INTEGRATED_SYSTEM_INFO_V1_7 Description +ulBootUpEngineClock: VBIOS bootup Engine clock frequency, in 10kHz unit. if it is equal 0, then VBIOS use pre-defined bootup engine clock +ulDentistVCOFreq: Dentist VCO clock in 10kHz unit. +ulBootUpUMAClock: System memory boot up clock frequency in 10Khz unit. +sDISPCLK_Voltage: Report Display clock voltage requirement. + +ulBootUpReqDisplayVector: VBIOS boot up display IDs, following are supported devices in Trinity projects: + ATOM_DEVICE_CRT1_SUPPORT 0x0001 + ATOM_DEVICE_DFP1_SUPPORT 0x0008 + ATOM_DEVICE_DFP6_SUPPORT 0x0040 + ATOM_DEVICE_DFP2_SUPPORT 0x0080 + ATOM_DEVICE_DFP3_SUPPORT 0x0200 + ATOM_DEVICE_DFP4_SUPPORT 0x0400 + ATOM_DEVICE_DFP5_SUPPORT 0x0800 + ATOM_DEVICE_LCD1_SUPPORT 0x0002 +ulOtherDisplayMisc: bit[0]=0: INT15 callback function Get LCD EDID ( ax=4e08, bl=1b ) is not supported by SBIOS. + =1: INT15 callback function Get LCD EDID ( ax=4e08, bl=1b ) is supported by SBIOS. + bit[1]=0: INT15 callback function Get boot display( ax=4e08, bl=01h) is not supported by SBIOS + =1: INT15 callback function Get boot display( ax=4e08, bl=01h) is supported by SBIOS + bit[2]=0: INT15 callback function Get panel Expansion ( ax=4e08, bl=02h) is not supported by SBIOS + =1: INT15 callback function Get panel Expansion ( ax=4e08, bl=02h) is supported by SBIOS + bit[3]=0: VBIOS fast boot is disable + =1: VBIOS fast boot is enable. ( VBIOS skip display device detection in every set mode if LCD panel is connect and LID is open) +ulGPUCapInfo: bit[0]=0: TMDS/HDMI Coherent Mode use cascade PLL mode. + =1: TMDS/HDMI Coherent Mode use signel PLL mode. + bit[1]=0: DP mode use cascade PLL mode ( New for Trinity ) + =1: DP mode use single PLL mode + bit[3]=0: Enable AUX HW mode detection logic + =1: Disable AUX HW mode detection logic + +ulSB_MMIO_Base_Addr: Physical Base address to SB MMIO space. Driver needs to initialize it for SMU usage. + +usRequestedPWMFreqInHz: When it's set to 0x0 by SBIOS: the LCD BackLight is not controlled by GPU(SW). + Any attempt to change BL using VBIOS function or enable VariBri from PP table is not effective since ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU==0; + + When it's set to a non-zero frequency, the BackLight is controlled by GPU (SW) in one of two ways below: + 1. SW uses the GPU BL PWM output to control the BL, in chis case, this non-zero frequency determines what freq GPU should use; + VBIOS will set up proper PWM frequency and ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU==1,as the result, + Changing BL using VBIOS function is functional in both driver and non-driver present environment; + and enabling VariBri under the driver environment from PP table is optional. + + 2. SW uses other means to control BL (like DPCD),this non-zero frequency serves as a flag only indicating + that BL control from GPU is expected. + VBIOS will NOT set up PWM frequency but make ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU==1 + Changing BL using VBIOS function could be functional in both driver and non-driver present environment,but + it's per platform + and enabling VariBri under the driver environment from PP table is optional. + +ucHtcTmpLmt: Refer to D18F3x64 bit[22:16], HtcTmpLmt. + Threshold on value to enter HTC_active state. +ucHtcHystLmt: Refer to D18F3x64 bit[27:24], HtcHystLmt. + To calculate threshold off value to exit HTC_active state, which is Threshold on vlaue minus ucHtcHystLmt. +ulMinEngineClock: Minimum SCLK allowed in 10kHz unit. This is calculated based on WRCK Fuse settings. +ulSystemConfig: Bit[0]=0: PCIE Power Gating Disabled + =1: PCIE Power Gating Enabled + Bit[1]=0: DDR-DLL shut-down feature disabled. + 1: DDR-DLL shut-down feature enabled. + Bit[2]=0: DDR-PLL Power down feature disabled. + 1: DDR-PLL Power down feature enabled. +ulCPUCapInfo: TBD +usNBP0Voltage: VID for voltage on NB P0 State +usNBP1Voltage: VID for voltage on NB P1 State +usNBP2Voltage: VID for voltage on NB P2 State +usNBP3Voltage: VID for voltage on NB P3 State +usBootUpNBVoltage: Voltage Index of GNB voltage configured by SBIOS, which is suffcient to support VBIOS DISPCLK requirement. +usExtDispConnInfoOffset: Offset to sExtDispConnInfo inside the structure +usPanelRefreshRateRange: Bit vector for LCD supported refresh rate range. If DRR is requestd by the platform, at least two bits need to be set + to indicate a range. + SUPPORTED_LCD_REFRESHRATE_30Hz 0x0004 + SUPPORTED_LCD_REFRESHRATE_40Hz 0x0008 + SUPPORTED_LCD_REFRESHRATE_50Hz 0x0010 + SUPPORTED_LCD_REFRESHRATE_60Hz 0x0020 +ucMemoryType: [3:0]=1:DDR1;=2:DDR2;=3:DDR3.[7:4] is reserved. +ucUMAChannelNumber: System memory channel numbers. +ulCSR_M3_ARB_CNTL_DEFAULT[10]: Arrays with values for CSR M3 arbiter for default +ulCSR_M3_ARB_CNTL_UVD[10]: Arrays with values for CSR M3 arbiter for UVD playback. +ulCSR_M3_ARB_CNTL_FS3D[10]: Arrays with values for CSR M3 arbiter for Full Screen 3D applications. +sAvail_SCLK[5]: Arrays to provide availabe list of SLCK and corresponding voltage, order from low to high ulGMCRestoreResetTime: GMC power restore and GMC reset time to calculate data reconnection latency. Unit in ns. ulMinimumNClk: Minimum NCLK speed among all NB-Pstates to calcualte data reconnection latency. Unit in 10kHz. ulIdleNClk: NCLK speed while memory runs in self-refresh state. Unit in 10kHz. @@ -4398,6 +4937,41 @@ usHDMISSPercentage: HDMI Spread Spectrum Percentage in unit 0.01%; usHDMISSpreadRateIn10Hz: HDMI Spread Spectrum frequency in unit of 10Hz, =0, use VBIOS default setting. usDVISSPercentage: DVI Spread Spectrum Percentage in unit 0.01%; 100 mean 1%, =0, use VBIOS default setting. usDVISSpreadRateIn10Hz: DVI Spread Spectrum frequency in unit of 10Hz, =0, use VBIOS default setting. +usMaxLVDSPclkFreqInSingleLink: Max pixel clock LVDS panel single link, if=0 means VBIOS use default threhold, right now it is 85Mhz +ucLVDSMisc: [bit0] LVDS 888bit panel mode =0: LVDS 888 panel in LDI mode, =1: LVDS 888 panel in FPDI mode + [bit1] LVDS panel lower and upper link mapping =0: lower link and upper link not swap, =1: lower link and upper link are swapped + [bit2] LVDS 888bit per color mode =0: 666 bit per color =1:888 bit per color + [bit3] LVDS parameter override enable =0: ucLvdsMisc parameter are not used =1: ucLvdsMisc parameter should be used + [bit4] Polarity of signal sent to digital BLON output pin. =0: not inverted(active high) =1: inverted ( active low ) +ucLVDSPwrOnSeqDIGONtoDE_in4Ms: LVDS power up sequence time in unit of 4ms, time delay from DIGON signal active to data enable signal active( DE ). + =0 mean use VBIOS default which is 8 ( 32ms ). The LVDS power up sequence is as following: DIGON->DE->VARY_BL->BLON. + This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable. +ucLVDSPwrOnDEtoVARY_BL_in4Ms: LVDS power up sequence time in unit of 4ms., time delay from DE( data enable ) active to Vary Brightness enable signal active( VARY_BL ). + =0 mean use VBIOS default which is 90 ( 360ms ). The LVDS power up sequence is as following: DIGON->DE->VARY_BL->BLON. + This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable. + +ucLVDSPwrOffVARY_BLtoDE_in4Ms: LVDS power down sequence time in unit of 4ms, time delay from data enable ( DE ) signal off to LCDVCC (DIGON) off. + =0 mean use VBIOS default delay which is 8 ( 32ms ). The LVDS power down sequence is as following: BLON->VARY_BL->DE->DIGON + This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable. + +ucLVDSPwrOffDEtoDIGON_in4Ms: LVDS power down sequence time in unit of 4ms, time delay from vary brightness enable signal( VARY_BL) off to data enable ( DE ) signal off. + =0 mean use VBIOS default which is 90 ( 360ms ). The LVDS power down sequence is as following: BLON->VARY_BL->DE->DIGON + This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable. + +ucLVDSOffToOnDelay_in4Ms: LVDS power down sequence time in unit of 4ms. Time delay from DIGON signal off to DIGON signal active. + =0 means to use VBIOS default delay which is 125 ( 500ms ). + This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable. + +ucLVDSPwrOnVARY_BLtoBLON_in4Ms: LVDS power up sequence time in unit of 4ms. Time delay from VARY_BL signal on to DLON signal active. + =0 means to use VBIOS default delay which is 0 ( 0ms ). + This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable. + +ucLVDSPwrOffBLONtoVARY_BL_in4Ms: LVDS power down sequence time in unit of 4ms. Time delay from BLON signal off to VARY_BL signal off. + =0 means to use VBIOS default delay which is 0 ( 0ms ). + This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable. + +ulNbpStateMemclkFreq[4]: system memory clock frequncey in unit of 10Khz in different NB pstate. + **********************************************************************************************************************/ /**************************************************************************/ @@ -4459,6 +5033,7 @@ typedef struct _ATOM_ASIC_SS_ASSIGNMENT #define ASIC_INTERNAL_SS_ON_DP 7 #define ASIC_INTERNAL_SS_ON_DCPLL 8 #define ASIC_EXTERNAL_SS_ON_DP_CLOCK 9 +#define ASIC_INTERNAL_VCE_SS 10 typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V2 { @@ -4520,7 +5095,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 #define ATOM_DOS_MODE_INFO_DEF 7 #define ATOM_I2C_CHANNEL_STATUS_DEF 8 #define ATOM_I2C_CHANNEL_STATUS1_DEF 9 - +#define ATOM_INTERNAL_TIMER_DEF 10 // BIOS_0_SCRATCH Definition #define ATOM_S0_CRT1_MONO 0x00000001L @@ -4648,6 +5223,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 #define ATOM_S2_DEVICE_DPMS_MASKw1 0x3FF #define ATOM_S2_FORCEDLOWPWRMODE_STATE_MASKb3 0x0C #define ATOM_S2_FORCEDLOWPWRMODE_STATE_CHANGEb3 0x10 +#define ATOM_S2_TMDS_COHERENT_MODEb3 0x10 // used by VBIOS code only, use coherent mode for TMDS/HDMI mode #define ATOM_S2_VRI_BRIGHT_ENABLEb3 0x20 #define ATOM_S2_ROTATION_STATE_MASKb3 0xC0 @@ -5038,6 +5614,23 @@ typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS_V1_3 USHORT usDeviceId; // Active Device Id for this surface. If no device, set to 0. }ENABLE_GRAPH_SURFACE_PARAMETERS_V1_3; +typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS_V1_4 +{ + USHORT usHight; // Image Hight + USHORT usWidth; // Image Width + USHORT usGraphPitch; + UCHAR ucColorDepth; + UCHAR ucPixelFormat; + UCHAR ucSurface; // Surface 1 or 2 + UCHAR ucEnable; // ATOM_ENABLE or ATOM_DISABLE + UCHAR ucModeType; + UCHAR ucReserved; +}ENABLE_GRAPH_SURFACE_PARAMETERS_V1_4; + +// ucEnable +#define ATOM_GRAPH_CONTROL_SET_PITCH 0x0f +#define ATOM_GRAPH_CONTROL_SET_DISP_START 0x10 + typedef struct _ENABLE_GRAPH_SURFACE_PS_ALLOCATION { ENABLE_GRAPH_SURFACE_PARAMETERS sSetSurface; @@ -5057,6 +5650,58 @@ typedef struct _GET_DISPLAY_SURFACE_SIZE_PARAMETERS USHORT usY_Size; }GET_DISPLAY_SURFACE_SIZE_PARAMETERS; +typedef struct _GET_DISPLAY_SURFACE_SIZE_PARAMETERS_V2 +{ + union{ + USHORT usX_Size; //When use as input parameter, usX_Size indicates which CRTC + USHORT usSurface; + }; + USHORT usY_Size; + USHORT usDispXStart; + USHORT usDispYStart; +}GET_DISPLAY_SURFACE_SIZE_PARAMETERS_V2; + + +typedef struct _PALETTE_DATA_CONTROL_PARAMETERS_V3 +{ + UCHAR ucLutId; + UCHAR ucAction; + USHORT usLutStartIndex; + USHORT usLutLength; + USHORT usLutOffsetInVram; +}PALETTE_DATA_CONTROL_PARAMETERS_V3; + +// ucAction: +#define PALETTE_DATA_AUTO_FILL 1 +#define PALETTE_DATA_READ 2 +#define PALETTE_DATA_WRITE 3 + + +typedef struct _INTERRUPT_SERVICE_PARAMETERS_V2 +{ + UCHAR ucInterruptId; + UCHAR ucServiceId; + UCHAR ucStatus; + UCHAR ucReserved; +}INTERRUPT_SERVICE_PARAMETER_V2; + +// ucInterruptId +#define HDP1_INTERRUPT_ID 1 +#define HDP2_INTERRUPT_ID 2 +#define HDP3_INTERRUPT_ID 3 +#define HDP4_INTERRUPT_ID 4 +#define HDP5_INTERRUPT_ID 5 +#define HDP6_INTERRUPT_ID 6 +#define SW_INTERRUPT_ID 11 + +// ucAction +#define INTERRUPT_SERVICE_GEN_SW_INT 1 +#define INTERRUPT_SERVICE_GET_STATUS 2 + + // ucStatus +#define INTERRUPT_STATUS__INT_TRIGGER 1 +#define INTERRUPT_STATUS__HPD_HIGH 2 + typedef struct _INDIRECT_IO_ACCESS { ATOM_COMMON_TABLE_HEADER sHeader; @@ -5189,7 +5834,7 @@ typedef struct _ATOM_INIT_REG_BLOCK{ #define END_OF_REG_INDEX_BLOCK 0x0ffff #define END_OF_REG_DATA_BLOCK 0x00000000 -#define ATOM_INIT_REG_MASK_FLAG 0x80 +#define ATOM_INIT_REG_MASK_FLAG 0x80 //Not used in BIOS #define CLOCK_RANGE_HIGHEST 0x00ffffff #define VALUE_DWORD SIZEOF ULONG @@ -5229,6 +5874,7 @@ typedef struct _ATOM_MC_INIT_PARAM_TABLE #define _128Mx8 0x51 #define _128Mx16 0x52 #define _256Mx8 0x61 +#define _256Mx16 0x62 #define SAMSUNG 0x1 #define INFINEON 0x2 @@ -5585,7 +6231,7 @@ typedef struct _ATOM_VRAM_MODULE_V7 ULONG ulChannelMapCfg; // mmMC_SHARED_CHREMAP USHORT usModuleSize; // Size of ATOM_VRAM_MODULE_V7 USHORT usPrivateReserved; // MC_ARB_RAMCFG (includes NOOFBANK,NOOFRANKS,NOOFROWS,NOOFCOLS) - USHORT usReserved; + USHORT usEnableChannels; // bit vector which indicate which channels are enabled UCHAR ucExtMemoryID; // Current memory module ID UCHAR ucMemoryType; // MEM_TYPE_DDR2/DDR3/GDDR3/GDDR5 UCHAR ucChannelNum; // Number of mem. channels supported in this module @@ -5597,7 +6243,8 @@ typedef struct _ATOM_VRAM_MODULE_V7 UCHAR ucNPL_RT; // Round trip delay (MC_SEQ_CAS_TIMING [28:24]:TCL=CL+NPL_RT-2). Always 2. UCHAR ucPreamble; // [7:4] Write Preamble, [3:0] Read Preamble UCHAR ucMemorySize; // Total memory size in unit of 16MB for CONFIG_MEMSIZE - bit[23:0] zeros - UCHAR ucReserved[3]; + USHORT usSEQSettingOffset; + UCHAR ucReserved; // Memory Module specific values USHORT usEMRS2Value; // EMRS2/MR2 Value. USHORT usEMRS3Value; // EMRS3/MR3 Value. @@ -5633,10 +6280,10 @@ typedef struct _ATOM_VRAM_INFO_V3 typedef struct _ATOM_VRAM_INFO_V4 { ATOM_COMMON_TABLE_HEADER sHeader; - USHORT usMemAdjustTblOffset; // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting - USHORT usMemClkPatchTblOffset; // offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting - USHORT usRerseved; - UCHAR ucMemDQ7_0ByteRemap; // DQ line byte remap, =0: Memory Data line BYTE0, =1: BYTE1, =2: BYTE2, =3: BYTE3 + USHORT usMemAdjustTblOffset; // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting + USHORT usMemClkPatchTblOffset; // offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting + USHORT usRerseved; + UCHAR ucMemDQ7_0ByteRemap; // DQ line byte remap, =0: Memory Data line BYTE0, =1: BYTE1, =2: BYTE2, =3: BYTE3 ULONG ulMemDQ7_0BitRemap; // each DQ line ( 7~0) use 3bits, like: DQ0=Bit[2:0], DQ1:[5:3], ... DQ7:[23:21] UCHAR ucReservde[4]; UCHAR ucNumOfVRAMModule; @@ -5648,9 +6295,10 @@ typedef struct _ATOM_VRAM_INFO_V4 typedef struct _ATOM_VRAM_INFO_HEADER_V2_1 { ATOM_COMMON_TABLE_HEADER sHeader; - USHORT usMemAdjustTblOffset; // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting - USHORT usMemClkPatchTblOffset; // offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting - USHORT usReserved[4]; + USHORT usMemAdjustTblOffset; // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting + USHORT usMemClkPatchTblOffset; // offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting + USHORT usPerBytePresetOffset; // offset of ATOM_INIT_REG_BLOCK structure for Per Byte Offset Preset Settings + USHORT usReserved[3]; UCHAR ucNumOfVRAMModule; // indicate number of VRAM module UCHAR ucMemoryClkPatchTblVer; // version of memory AC timing register list UCHAR ucVramModuleVer; // indicate ATOM_VRAM_MODUE version @@ -5935,6 +6583,52 @@ typedef struct _ATOM_DISP_OUT_INFO_V2 ASIC_ENCODER_INFO asEncoderInfo[1]; }ATOM_DISP_OUT_INFO_V2; + +typedef struct _ATOM_DISP_CLOCK_ID { + UCHAR ucPpllId; + UCHAR ucPpllAttribute; +}ATOM_DISP_CLOCK_ID; + +// ucPpllAttribute +#define CLOCK_SOURCE_SHAREABLE 0x01 +#define CLOCK_SOURCE_DP_MODE 0x02 +#define CLOCK_SOURCE_NONE_DP_MODE 0x04 + +//DispOutInfoTable +typedef struct _ASIC_TRANSMITTER_INFO_V2 +{ + USHORT usTransmitterObjId; + USHORT usDispClkIdOffset; // point to clock source id list supported by Encoder Object + UCHAR ucTransmitterCmdTblId; + UCHAR ucConfig; + UCHAR ucEncoderID; // available 1st encoder ( default ) + UCHAR ucOptionEncoderID; // available 2nd encoder ( optional ) + UCHAR uc2ndEncoderID; + UCHAR ucReserved; +}ASIC_TRANSMITTER_INFO_V2; + +typedef struct _ATOM_DISP_OUT_INFO_V3 +{ + ATOM_COMMON_TABLE_HEADER sHeader; + USHORT ptrTransmitterInfo; + USHORT ptrEncoderInfo; + USHORT ptrMainCallParserFar; // direct address of main parser call in VBIOS binary. + USHORT usReserved; + UCHAR ucDCERevision; + UCHAR ucMaxDispEngineNum; + UCHAR ucMaxActiveDispEngineNum; + UCHAR ucMaxPPLLNum; + UCHAR ucCoreRefClkSource; // value of CORE_REF_CLK_SOURCE + UCHAR ucReserved[3]; + ASIC_TRANSMITTER_INFO_V2 asTransmitterInfo[1]; // for alligment only +}ATOM_DISP_OUT_INFO_V3; + +typedef enum CORE_REF_CLK_SOURCE{ + CLOCK_SRC_XTALIN=0, + CLOCK_SRC_XO_IN=1, + CLOCK_SRC_XO_IN2=2, +}CORE_REF_CLK_SOURCE; + // DispDevicePriorityInfo typedef struct _ATOM_DISPLAY_DEVICE_PRIORITY_INFO { @@ -6070,6 +6764,39 @@ typedef struct _PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS #define HW_I2C_READ 0 #define I2C_2BYTE_ADDR 0x02 +/****************************************************************************/ +// Structures used by HW_Misc_OperationTable +/****************************************************************************/ +typedef struct _ATOM_HW_MISC_OPERATION_INPUT_PARAMETER_V1_1 +{ + UCHAR ucCmd; // Input: To tell which action to take + UCHAR ucReserved[3]; + ULONG ulReserved; +}ATOM_HW_MISC_OPERATION_INPUT_PARAMETER_V1_1; + +typedef struct _ATOM_HW_MISC_OPERATION_OUTPUT_PARAMETER_V1_1 +{ + UCHAR ucReturnCode; // Output: Return value base on action was taken + UCHAR ucReserved[3]; + ULONG ulReserved; +}ATOM_HW_MISC_OPERATION_OUTPUT_PARAMETER_V1_1; + +// Actions code +#define ATOM_GET_SDI_SUPPORT 0xF0 + +// Return code +#define ATOM_UNKNOWN_CMD 0 +#define ATOM_FEATURE_NOT_SUPPORTED 1 +#define ATOM_FEATURE_SUPPORTED 2 + +typedef struct _ATOM_HW_MISC_OPERATION_PS_ALLOCATION +{ + ATOM_HW_MISC_OPERATION_INPUT_PARAMETER_V1_1 sInput_Output; + PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS sReserved; +}ATOM_HW_MISC_OPERATION_PS_ALLOCATION; + +/****************************************************************************/ + typedef struct _SET_HWBLOCK_INSTANCE_PARAMETER_V2 { UCHAR ucHWBlkInst; // HW block instance, 0, 1, 2, ... @@ -6090,6 +6817,52 @@ typedef struct _SET_HWBLOCK_INSTANCE_PARAMETER_V2 #define SELECT_CRTC_PIXEL_RATE 7 #define SELECT_VGA_BLK 8 +// DIGTransmitterInfoTable structure used to program UNIPHY settings +typedef struct _DIG_TRANSMITTER_INFO_HEADER_V3_1{ + ATOM_COMMON_TABLE_HEADER sHeader; + USHORT usDPVsPreEmphSettingOffset; // offset of PHY_ANALOG_SETTING_INFO * with DP Voltage Swing and Pre-Emphasis for each Link clock + USHORT usPhyAnalogRegListOffset; // offset of CLOCK_CONDITION_REGESTER_INFO* with None-DP mode Analog Setting's register Info + USHORT usPhyAnalogSettingOffset; // offset of CLOCK_CONDITION_SETTING_ENTRY* with None-DP mode Analog Setting for each link clock range + USHORT usPhyPllRegListOffset; // offset of CLOCK_CONDITION_REGESTER_INFO* with Phy Pll register Info + USHORT usPhyPllSettingOffset; // offset of CLOCK_CONDITION_SETTING_ENTRY* with Phy Pll Settings +}DIG_TRANSMITTER_INFO_HEADER_V3_1; + +typedef struct _CLOCK_CONDITION_REGESTER_INFO{ + USHORT usRegisterIndex; + UCHAR ucStartBit; + UCHAR ucEndBit; +}CLOCK_CONDITION_REGESTER_INFO; + +typedef struct _CLOCK_CONDITION_SETTING_ENTRY{ + USHORT usMaxClockFreq; + UCHAR ucEncodeMode; + UCHAR ucPhySel; + ULONG ulAnalogSetting[1]; +}CLOCK_CONDITION_SETTING_ENTRY; + +typedef struct _CLOCK_CONDITION_SETTING_INFO{ + USHORT usEntrySize; + CLOCK_CONDITION_SETTING_ENTRY asClkCondSettingEntry[1]; +}CLOCK_CONDITION_SETTING_INFO; + +typedef struct _PHY_CONDITION_REG_VAL{ + ULONG ulCondition; + ULONG ulRegVal; +}PHY_CONDITION_REG_VAL; + +typedef struct _PHY_CONDITION_REG_INFO{ + USHORT usRegIndex; + USHORT usSize; + PHY_CONDITION_REG_VAL asRegVal[1]; +}PHY_CONDITION_REG_INFO; + +typedef struct _PHY_ANALOG_SETTING_INFO{ + UCHAR ucEncodeMode; + UCHAR ucPhySel; + USHORT usSize; + PHY_CONDITION_REG_INFO asAnalogSetting[1]; +}PHY_ANALOG_SETTING_INFO; + /****************************************************************************/ //Portion VI: Definitinos for vbios MC scratch registers that driver used /****************************************************************************/ @@ -6497,6 +7270,8 @@ typedef struct _ATOM_PPLIB_THERMALCONTROLLER #define ATOM_PP_THERMALCONTROLLER_EMC2103 13 /* 0x0D */ // Only fan control will be implemented, do NOT show this in PPGen. #define ATOM_PP_THERMALCONTROLLER_SUMO 14 /* 0x0E */ // Sumo type, used internally #define ATOM_PP_THERMALCONTROLLER_NISLANDS 15 +#define ATOM_PP_THERMALCONTROLLER_SISLANDS 16 +#define ATOM_PP_THERMALCONTROLLER_LM96163 17 // Thermal controller 'combo type' to use an external controller for Fan control and an internal controller for thermal. // We probably should reserve the bit 0x80 for this use. @@ -6512,6 +7287,7 @@ typedef struct _ATOM_PPLIB_STATE UCHAR ucClockStateIndices[1]; // variable-sized } ATOM_PPLIB_STATE; + typedef struct _ATOM_PPLIB_FANTABLE { UCHAR ucFanTableFormat; // Change this if the table format changes or version changes so that the other fields are not the same. @@ -6524,12 +7300,20 @@ typedef struct _ATOM_PPLIB_FANTABLE USHORT usPWMHigh; // The PWM value at THigh. } ATOM_PPLIB_FANTABLE; +typedef struct _ATOM_PPLIB_FANTABLE2 +{ + ATOM_PPLIB_FANTABLE basicTable; + USHORT usTMax; // The max temperature +} ATOM_PPLIB_FANTABLE2; + typedef struct _ATOM_PPLIB_EXTENDEDHEADER { USHORT usSize; ULONG ulMaxEngineClock; // For Overdrive. ULONG ulMaxMemoryClock; // For Overdrive. // Add extra system parameters here, always adjust size to include all fields. + USHORT usVCETableOffset; //points to ATOM_PPLIB_VCE_Table + USHORT usUVDTableOffset; //points to ATOM_PPLIB_UVD_Table } ATOM_PPLIB_EXTENDEDHEADER; //// ATOM_PPLIB_POWERPLAYTABLE::ulPlatformCaps @@ -6552,6 +7336,7 @@ typedef struct _ATOM_PPLIB_EXTENDEDHEADER #define ATOM_PP_PLATFORM_CAP_REGULATOR_HOT 0x00010000 // Enable the 'regulator hot' feature. #define ATOM_PP_PLATFORM_CAP_BACO 0x00020000 // Does the driver supports BACO state. + typedef struct _ATOM_PPLIB_POWERPLAYTABLE { ATOM_COMMON_TABLE_HEADER sHeader; @@ -6610,7 +7395,8 @@ typedef struct _ATOM_PPLIB_POWERPLAYTABLE4 USHORT usVddciDependencyOnMCLKOffset; USHORT usVddcDependencyOnMCLKOffset; USHORT usMaxClockVoltageOnDCOffset; - USHORT usReserved[2]; + USHORT usVddcPhaseShedLimitsTableOffset; // Points to ATOM_PPLIB_PhaseSheddingLimits_Table + USHORT usReserved; } ATOM_PPLIB_POWERPLAYTABLE4, *LPATOM_PPLIB_POWERPLAYTABLE4; typedef struct _ATOM_PPLIB_POWERPLAYTABLE5 @@ -6620,8 +7406,9 @@ typedef struct _ATOM_PPLIB_POWERPLAYTABLE5 ULONG ulNearTDPLimit; ULONG ulSQRampingThreshold; USHORT usCACLeakageTableOffset; // Points to ATOM_PPLIB_CAC_Leakage_Table - ULONG ulCACLeakage; // TBD, this parameter is still under discussion. Change to ulReserved if not needed. - ULONG ulReserved; + ULONG ulCACLeakage; // The iLeakage for driver calculated CAC leakage table + USHORT usTDPODLimit; + USHORT usLoadLineSlope; // in milliOhms * 100 } ATOM_PPLIB_POWERPLAYTABLE5, *LPATOM_PPLIB_POWERPLAYTABLE5; //// ATOM_PPLIB_NONCLOCK_INFO::usClassification @@ -6650,6 +7437,7 @@ typedef struct _ATOM_PPLIB_POWERPLAYTABLE5 //// ATOM_PPLIB_NONCLOCK_INFO::usClassification2 #define ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2 0x0001 #define ATOM_PPLIB_CLASSIFICATION2_ULV 0x0002 +#define ATOM_PPLIB_CLASSIFICATION2_MVC 0x0004 //Multi-View Codec (BD-3D) //// ATOM_PPLIB_NONCLOCK_INFO::ulCapsAndSettings #define ATOM_PPLIB_SINGLE_DISPLAY_ONLY 0x00000001 @@ -6673,7 +7461,9 @@ typedef struct _ATOM_PPLIB_POWERPLAYTABLE5 #define ATOM_PPLIB_SOFTWARE_DISABLE_LOADBALANCING 0x00001000 #define ATOM_PPLIB_SOFTWARE_ENABLE_SLEEP_FOR_TIMESTAMPS 0x00002000 -#define ATOM_PPLIB_DISALLOW_ON_DC 0x00004000 + +#define ATOM_PPLIB_DISALLOW_ON_DC 0x00004000 + #define ATOM_PPLIB_ENABLE_VARIBRIGHT 0x00008000 //memory related flags @@ -6735,7 +7525,7 @@ typedef struct _ATOM_PPLIB_R600_CLOCK_INFO #define ATOM_PPLIB_R600_FLAGS_UVDSAFE 2 #define ATOM_PPLIB_R600_FLAGS_BACKBIASENABLE 4 #define ATOM_PPLIB_R600_FLAGS_MEMORY_ODT_OFF 8 -#define ATOM_PPLIB_R600_FLAGS_MEMORY_DLL_OFF 16 +#define ATOM_PPLIB_R600_FLAGS_MEMORY_DLL_OFF 16 #define ATOM_PPLIB_R600_FLAGS_LOWPOWER 32 // On the RV770 use 'low power' setting (sequencer S0). typedef struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO @@ -6754,6 +7544,24 @@ typedef struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO } ATOM_PPLIB_EVERGREEN_CLOCK_INFO; +typedef struct _ATOM_PPLIB_SI_CLOCK_INFO +{ + USHORT usEngineClockLow; + UCHAR ucEngineClockHigh; + + USHORT usMemoryClockLow; + UCHAR ucMemoryClockHigh; + + USHORT usVDDC; + USHORT usVDDCI; + UCHAR ucPCIEGen; + UCHAR ucUnused1; + + ULONG ulFlags; // ATOM_PPLIB_SI_FLAGS_*, no flag is necessary for now + +} ATOM_PPLIB_SI_CLOCK_INFO; + + typedef struct _ATOM_PPLIB_RS780_CLOCK_INFO { @@ -6766,7 +7574,7 @@ typedef struct _ATOM_PPLIB_RS780_CLOCK_INFO UCHAR ucPadding; // For proper alignment and size. USHORT usVDDC; // For the 780, use: None, Low, High, Variable UCHAR ucMaxHTLinkWidth; // From SBIOS - {2, 4, 8, 16} - UCHAR ucMinHTLinkWidth; // From SBIOS - {2, 4, 8, 16}. Effective only if CDLW enabled. Minimum down stream width could be bigger as display BW requirement. + UCHAR ucMinHTLinkWidth; // From SBIOS - {2, 4, 8, 16}. Effective only if CDLW enabled. Minimum down stream width could be bigger as display BW requriement. USHORT usHTLinkFreq; // See definition ATOM_PPLIB_RS780_HTLINKFREQ_xxx or in MHz(>=200). ULONG ulFlags; } ATOM_PPLIB_RS780_CLOCK_INFO; @@ -6788,9 +7596,7 @@ typedef struct _ATOM_PPLIB_SUMO_CLOCK_INFO{ USHORT usEngineClockLow; //clockfrequency & 0xFFFF. The unit is in 10khz UCHAR ucEngineClockHigh; //clockfrequency >> 16. UCHAR vddcIndex; //2-bit vddc index; - UCHAR leakage; //please use 8-bit absolute value, not the 6-bit % value - //please initalize to 0 - UCHAR rsv; + USHORT tdpLimit; //please initalize to 0 USHORT rsv1; //please initialize to 0s @@ -6813,7 +7619,7 @@ typedef struct _ATOM_PPLIB_STATE_V2 UCHAR clockInfoIndex[1]; } ATOM_PPLIB_STATE_V2; -typedef struct StateArray{ +typedef struct _StateArray{ //how many states we have UCHAR ucNumEntries; @@ -6821,18 +7627,17 @@ typedef struct StateArray{ }StateArray; -typedef struct ClockInfoArray{ +typedef struct _ClockInfoArray{ //how many clock levels we have UCHAR ucNumEntries; - //sizeof(ATOM_PPLIB_SUMO_CLOCK_INFO) + //sizeof(ATOM_PPLIB_CLOCK_INFO) UCHAR ucEntrySize; - //this is for Sumo - ATOM_PPLIB_SUMO_CLOCK_INFO clockInfo[1]; + UCHAR clockInfo[1]; }ClockInfoArray; -typedef struct NonClockInfoArray{ +typedef struct _NonClockInfoArray{ //how many non-clock levels we have. normally should be same as number of states UCHAR ucNumEntries; @@ -6871,6 +7676,124 @@ typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Table ATOM_PPLIB_Clock_Voltage_Limit_Record entries[1]; // Dynamically allocate entries. }ATOM_PPLIB_Clock_Voltage_Limit_Table; +typedef struct _ATOM_PPLIB_CAC_Leakage_Record +{ + USHORT usVddc; // We use this field for the "fake" standardized VDDC for power calculations + ULONG ulLeakageValue; +}ATOM_PPLIB_CAC_Leakage_Record; + +typedef struct _ATOM_PPLIB_CAC_Leakage_Table +{ + UCHAR ucNumEntries; // Number of entries. + ATOM_PPLIB_CAC_Leakage_Record entries[1]; // Dynamically allocate entries. +}ATOM_PPLIB_CAC_Leakage_Table; + +typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Record +{ + USHORT usVoltage; + USHORT usSclkLow; + UCHAR ucSclkHigh; + USHORT usMclkLow; + UCHAR ucMclkHigh; +}ATOM_PPLIB_PhaseSheddingLimits_Record; + +typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Table +{ + UCHAR ucNumEntries; // Number of entries. + ATOM_PPLIB_PhaseSheddingLimits_Record entries[1]; // Dynamically allocate entries. +}ATOM_PPLIB_PhaseSheddingLimits_Table; + +typedef struct _VCEClockInfo{ + USHORT usEVClkLow; + UCHAR ucEVClkHigh; + USHORT usECClkLow; + UCHAR ucECClkHigh; +}VCEClockInfo; + +typedef struct _VCEClockInfoArray{ + UCHAR ucNumEntries; + VCEClockInfo entries[1]; +}VCEClockInfoArray; + +typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record +{ + USHORT usVoltage; + UCHAR ucVCEClockInfoIndex; +}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record; + +typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table +{ + UCHAR numEntries; + ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record entries[1]; +}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table; + +typedef struct _ATOM_PPLIB_VCE_State_Record +{ + UCHAR ucVCEClockInfoIndex; + UCHAR ucClockInfoIndex; //highest 2 bits indicates memory p-states, lower 6bits indicates index to ClockInfoArrary +}ATOM_PPLIB_VCE_State_Record; + +typedef struct _ATOM_PPLIB_VCE_State_Table +{ + UCHAR numEntries; + ATOM_PPLIB_VCE_State_Record entries[1]; +}ATOM_PPLIB_VCE_State_Table; + + +typedef struct _ATOM_PPLIB_VCE_Table +{ + UCHAR revid; +// VCEClockInfoArray array; +// ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table limits; +// ATOM_PPLIB_VCE_State_Table states; +}ATOM_PPLIB_VCE_Table; + + +typedef struct _UVDClockInfo{ + USHORT usVClkLow; + UCHAR ucVClkHigh; + USHORT usDClkLow; + UCHAR ucDClkHigh; +}UVDClockInfo; + +typedef struct _UVDClockInfoArray{ + UCHAR ucNumEntries; + UVDClockInfo entries[1]; +}UVDClockInfoArray; + +typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record +{ + USHORT usVoltage; + UCHAR ucUVDClockInfoIndex; +}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record; + +typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table +{ + UCHAR numEntries; + ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record entries[1]; +}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table; + +typedef struct _ATOM_PPLIB_UVD_State_Record +{ + UCHAR ucUVDClockInfoIndex; + UCHAR ucClockInfoIndex; //highest 2 bits indicates memory p-states, lower 6bits indicates index to ClockInfoArrary +}ATOM_PPLIB_UVD_State_Record; + +typedef struct _ATOM_PPLIB_UVD_State_Table +{ + UCHAR numEntries; + ATOM_PPLIB_UVD_State_Record entries[1]; +}ATOM_PPLIB_UVD_State_Table; + + +typedef struct _ATOM_PPLIB_UVD_Table +{ + UCHAR revid; +// UVDClockInfoArray array; +// ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table limits; +// ATOM_PPLIB_UVD_State_Table states; +}ATOM_PPLIB_UVD_Table; + /**************************************************************************/ @@ -7020,4 +7943,68 @@ typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Table #pragma pack() // BIOS data must use byte aligment +// +// AMD ACPI Table +// +#pragma pack(1) + +typedef struct { + ULONG Signature; + ULONG TableLength; //Length + UCHAR Revision; + UCHAR Checksum; + UCHAR OemId[6]; + UCHAR OemTableId[8]; //UINT64 OemTableId; + ULONG OemRevision; + ULONG CreatorId; + ULONG CreatorRevision; +} AMD_ACPI_DESCRIPTION_HEADER; +/* +//EFI_ACPI_DESCRIPTION_HEADER from AcpiCommon.h +typedef struct { + UINT32 Signature; //0x0 + UINT32 Length; //0x4 + UINT8 Revision; //0x8 + UINT8 Checksum; //0x9 + UINT8 OemId[6]; //0xA + UINT64 OemTableId; //0x10 + UINT32 OemRevision; //0x18 + UINT32 CreatorId; //0x1C + UINT32 CreatorRevision; //0x20 +}EFI_ACPI_DESCRIPTION_HEADER; +*/ +typedef struct { + AMD_ACPI_DESCRIPTION_HEADER SHeader; + UCHAR TableUUID[16]; //0x24 + ULONG VBIOSImageOffset; //0x34. Offset to the first GOP_VBIOS_CONTENT block from the beginning of the stucture. + ULONG Lib1ImageOffset; //0x38. Offset to the first GOP_LIB1_CONTENT block from the beginning of the stucture. + ULONG Reserved[4]; //0x3C +}UEFI_ACPI_VFCT; + +typedef struct { + ULONG PCIBus; //0x4C + ULONG PCIDevice; //0x50 + ULONG PCIFunction; //0x54 + USHORT VendorID; //0x58 + USHORT DeviceID; //0x5A + USHORT SSVID; //0x5C + USHORT SSID; //0x5E + ULONG Revision; //0x60 + ULONG ImageLength; //0x64 +}VFCT_IMAGE_HEADER; + + +typedef struct { + VFCT_IMAGE_HEADER VbiosHeader; + UCHAR VbiosContent[1]; +}GOP_VBIOS_CONTENT; + +typedef struct { + VFCT_IMAGE_HEADER Lib1Header; + UCHAR Lib1Content[1]; +}GOP_LIB1_CONTENT; + +#pragma pack() + + #endif /* _ATOMBIOS_H */ diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 72672ea3f6d3..083b3eada001 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -231,6 +231,22 @@ static void atombios_blank_crtc(struct drm_crtc *crtc, int state) atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); } +static void atombios_powergate_crtc(struct drm_crtc *crtc, int state) +{ + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; + int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); + ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args; + + memset(&args, 0, sizeof(args)); + + args.ucDispPipeId = radeon_crtc->crtc_id; + args.ucEnable = state; + + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); +} + void atombios_crtc_dpms(struct drm_crtc *crtc, int mode) { struct drm_device *dev = crtc->dev; @@ -242,8 +258,11 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode) radeon_crtc->enabled = true; /* adjust pm to dpms changes BEFORE enabling crtcs */ radeon_pm_compute_clocks(rdev); + /* disable crtc pair power gating before programming */ + if (ASIC_IS_DCE6(rdev)) + atombios_powergate_crtc(crtc, ATOM_DISABLE); atombios_enable_crtc(crtc, ATOM_ENABLE); - if (ASIC_IS_DCE3(rdev)) + if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev)) atombios_enable_crtc_memreq(crtc, ATOM_ENABLE); atombios_blank_crtc(crtc, ATOM_DISABLE); drm_vblank_post_modeset(dev, radeon_crtc->crtc_id); @@ -255,10 +274,29 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode) drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id); if (radeon_crtc->enabled) atombios_blank_crtc(crtc, ATOM_ENABLE); - if (ASIC_IS_DCE3(rdev)) + if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev)) atombios_enable_crtc_memreq(crtc, ATOM_DISABLE); atombios_enable_crtc(crtc, ATOM_DISABLE); radeon_crtc->enabled = false; + /* power gating is per-pair */ + if (ASIC_IS_DCE6(rdev)) { + struct drm_crtc *other_crtc; + struct radeon_crtc *other_radeon_crtc; + list_for_each_entry(other_crtc, &rdev->ddev->mode_config.crtc_list, head) { + other_radeon_crtc = to_radeon_crtc(other_crtc); + if (((radeon_crtc->crtc_id == 0) && (other_radeon_crtc->crtc_id == 1)) || + ((radeon_crtc->crtc_id == 1) && (other_radeon_crtc->crtc_id == 0)) || + ((radeon_crtc->crtc_id == 2) && (other_radeon_crtc->crtc_id == 3)) || + ((radeon_crtc->crtc_id == 3) && (other_radeon_crtc->crtc_id == 2)) || + ((radeon_crtc->crtc_id == 4) && (other_radeon_crtc->crtc_id == 5)) || + ((radeon_crtc->crtc_id == 5) && (other_radeon_crtc->crtc_id == 4))) { + /* if both crtcs in the pair are off, enable power gating */ + if (other_radeon_crtc->enabled == false) + atombios_powergate_crtc(crtc, ATOM_ENABLE); + break; + } + } + } /* adjust pm to dpms changes AFTER disabling crtcs */ radeon_pm_compute_clocks(rdev); break; @@ -436,7 +474,7 @@ static void atombios_crtc_program_ss(struct radeon_device *rdev, return; } args.v3.ucEnable = enable; - if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK)) + if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK) || ASIC_IS_DCE61(rdev)) args.v3.ucEnable = ATOM_DISABLE; } else if (ASIC_IS_DCE4(rdev)) { args.v2.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage); @@ -699,7 +737,7 @@ union set_pixel_clock { /* on DCE5, make sure the voltage is high enough to support the * required disp clk. */ -static void atombios_crtc_set_dcpll(struct radeon_device *rdev, +static void atombios_crtc_set_disp_eng_pll(struct radeon_device *rdev, u32 dispclk) { u8 frev, crev; @@ -729,7 +767,12 @@ static void atombios_crtc_set_dcpll(struct radeon_device *rdev, * SetPixelClock provides the dividers */ args.v6.ulDispEngClkFreq = cpu_to_le32(dispclk); - args.v6.ucPpll = ATOM_DCPLL; + if (ASIC_IS_DCE61(rdev)) + args.v6.ucPpll = ATOM_EXT_PLL1; + else if (ASIC_IS_DCE6(rdev)) + args.v6.ucPpll = ATOM_PPLL0; + else + args.v6.ucPpll = ATOM_DCPLL; break; default: DRM_ERROR("Unknown table version %d %d\n", frev, crev); @@ -1444,7 +1487,36 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) struct drm_crtc *test_crtc; uint32_t pll_in_use = 0; - if (ASIC_IS_DCE4(rdev)) { + if (ASIC_IS_DCE61(rdev)) { + list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) { + if (test_encoder->crtc && (test_encoder->crtc == crtc)) { + struct radeon_encoder *test_radeon_encoder = + to_radeon_encoder(test_encoder); + struct radeon_encoder_atom_dig *dig = + test_radeon_encoder->enc_priv; + + if ((test_radeon_encoder->encoder_id == + ENCODER_OBJECT_ID_INTERNAL_UNIPHY) && + (dig->linkb == false)) /* UNIPHY A uses PPLL2 */ + return ATOM_PPLL2; + } + } + /* UNIPHY B/C/D/E/F */ + list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_test_crtc; + + if (crtc == test_crtc) + continue; + + radeon_test_crtc = to_radeon_crtc(test_crtc); + if ((radeon_test_crtc->pll_id == ATOM_PPLL0) || + (radeon_test_crtc->pll_id == ATOM_PPLL1)) + pll_in_use |= (1 << radeon_test_crtc->pll_id); + } + if (!(pll_in_use & 4)) + return ATOM_PPLL0; + return ATOM_PPLL1; + } else if (ASIC_IS_DCE4(rdev)) { list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) { if (test_encoder->crtc && (test_encoder->crtc == crtc)) { /* in DP mode, the DP ref clock can come from PPLL, DCPLL, or ext clock, @@ -1483,10 +1555,12 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) } -void radeon_atom_dcpll_init(struct radeon_device *rdev) +void radeon_atom_disp_eng_pll_init(struct radeon_device *rdev) { /* always set DCPLL */ - if (ASIC_IS_DCE4(rdev)) { + if (ASIC_IS_DCE6(rdev)) + atombios_crtc_set_disp_eng_pll(rdev, rdev->clock.default_dispclk); + else if (ASIC_IS_DCE4(rdev)) { struct radeon_atom_ss ss; bool ss_enabled = radeon_atombios_get_asic_ss_info(rdev, &ss, ASIC_INTERNAL_SS_ON_DCPLL, @@ -1494,7 +1568,7 @@ void radeon_atom_dcpll_init(struct radeon_device *rdev) if (ss_enabled) atombios_crtc_program_ss(rdev, ATOM_DISABLE, ATOM_DCPLL, &ss); /* XXX: DCE5, make sure voltage, dispclk is high enough */ - atombios_crtc_set_dcpll(rdev, rdev->clock.default_dispclk); + atombios_crtc_set_disp_eng_pll(rdev, rdev->clock.default_dispclk); if (ss_enabled) atombios_crtc_program_ss(rdev, ATOM_ENABLE, ATOM_DCPLL, &ss); } @@ -1572,6 +1646,8 @@ static void atombios_crtc_commit(struct drm_crtc *crtc) static void atombios_crtc_disable(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; struct radeon_atom_ss ss; atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); @@ -1583,6 +1659,12 @@ static void atombios_crtc_disable(struct drm_crtc *crtc) atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id, 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); break; + case ATOM_PPLL0: + /* disable the ppll */ + if (ASIC_IS_DCE61(rdev)) + atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id, + 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); + break; default: break; } diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 191218ad92e7..6c62be226804 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -63,12 +63,12 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan, memset(&args, 0, sizeof(args)); - base = (unsigned char *)rdev->mode_info.atom_context->scratch; + base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1); memcpy(base, send, send_bytes); - args.v1.lpAuxRequest = 0; - args.v1.lpDataOut = 16; + args.v1.lpAuxRequest = 0 + 4; + args.v1.lpDataOut = 16 + 4; args.v1.ucDataOutLen = 0; args.v1.ucChannelID = chan->rec.i2c_id; args.v1.ucDelay = delay / 10; diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index b88c4608731b..468b874336f9 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -479,7 +479,7 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) * - 2 DIG encoder blocks. * DIG1/2 can drive UNIPHY0/1/2 link A or link B * - * DCE 4.0/5.0 + * DCE 4.0/5.0/6.0 * - 3 DIG transmitter blocks UNIPHY0/1/2 (links A and B). * Supports up to 6 digital outputs * - 6 DIG encoder blocks. @@ -495,7 +495,11 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) * - 3 DIG transmitter blocks UNIPHY0/1/2 (links A and B). * Supports up to 6 digital outputs * - 2 DIG encoder blocks. + * llano * DIG1/2 can drive UNIPHY0/1/2 link A or link B + * ontario + * DIG1 drives UNIPHY0/1/2 link A + * DIG2 drives UNIPHY0/1/2 link B * * Routing * crtc -> dig encoder -> UNIPHY/LVTMA (1 or 2 links) @@ -703,6 +707,7 @@ union dig_transmitter_control { DIG_TRANSMITTER_CONTROL_PARAMETERS_V2 v2; DIG_TRANSMITTER_CONTROL_PARAMETERS_V3 v3; DIG_TRANSMITTER_CONTROL_PARAMETERS_V4 v4; + DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 v5; }; void @@ -723,6 +728,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t int connector_object_id = 0; int igp_lane_info = 0; int dig_encoder = dig->dig_encoder; + int hpd_id = RADEON_HPD_NONE; if (action == ATOM_TRANSMITTER_ACTION_INIT) { connector = radeon_get_connector_for_encoder_init(encoder); @@ -738,6 +744,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; + hpd_id = radeon_connector->hpd.hpd; dp_clock = dig_connector->dp_clock; dp_lane_count = dig_connector->dp_lane_count; connector_object_id = @@ -1003,6 +1010,60 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t args.v4.acConfig.fDualLinkConnector = 1; } break; + case 5: + args.v5.ucAction = action; + if (is_dp) + args.v5.usSymClock = cpu_to_le16(dp_clock / 10); + else + args.v5.usSymClock = cpu_to_le16(radeon_encoder->pixel_clock / 10); + + switch (radeon_encoder->encoder_id) { + case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: + if (dig->linkb) + args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYB; + else + args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYA; + break; + case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: + if (dig->linkb) + args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYD; + else + args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYC; + break; + case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: + if (dig->linkb) + args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYF; + else + args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYE; + break; + } + if (is_dp) + args.v5.ucLaneNum = dp_lane_count; + else if (radeon_encoder->pixel_clock > 165000) + args.v5.ucLaneNum = 8; + else + args.v5.ucLaneNum = 4; + args.v5.ucConnObjId = connector_object_id; + args.v5.ucDigMode = atombios_get_encoder_mode(encoder); + + if (is_dp && rdev->clock.dp_extclk) + args.v5.asConfig.ucPhyClkSrcId = ENCODER_REFCLK_SRC_EXTCLK; + else + args.v5.asConfig.ucPhyClkSrcId = pll_id; + + if (is_dp) + args.v5.asConfig.ucCoherentMode = 1; /* DP requires coherent */ + else if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) { + if (dig->coherent_mode) + args.v5.asConfig.ucCoherentMode = 1; + } + if (hpd_id == RADEON_HPD_NONE) + args.v5.asConfig.ucHPDSel = 0; + else + args.v5.asConfig.ucHPDSel = hpd_id + 1; + args.v5.ucDigEncoderSel = 1 << dig_encoder; + args.v5.ucDPLaneSet = lane_set; + break; default: DRM_ERROR("Unknown table version %d, %d\n", frev, crev); break; @@ -1377,7 +1438,7 @@ radeon_atom_encoder_dpms_ext(struct drm_encoder *encoder, switch (mode) { case DRM_MODE_DPMS_ON: default: - if (ASIC_IS_DCE41(rdev)) { + if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev)) { atombios_external_encoder_setup(encoder, ext_encoder, EXTERNAL_ENCODER_ACTION_V3_ENABLE_OUTPUT); atombios_external_encoder_setup(encoder, ext_encoder, @@ -1388,7 +1449,7 @@ radeon_atom_encoder_dpms_ext(struct drm_encoder *encoder, case DRM_MODE_DPMS_STANDBY: case DRM_MODE_DPMS_SUSPEND: case DRM_MODE_DPMS_OFF: - if (ASIC_IS_DCE41(rdev)) { + if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev)) { atombios_external_encoder_setup(encoder, ext_encoder, EXTERNAL_ENCODER_ACTION_V3_ENCODER_BLANKING); atombios_external_encoder_setup(encoder, ext_encoder, @@ -1761,7 +1822,7 @@ radeon_atom_encoder_init(struct radeon_device *rdev) break; } - if (ext_encoder && ASIC_IS_DCE41(rdev)) + if (ext_encoder && (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev))) atombios_external_encoder_setup(encoder, ext_encoder, EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT); } @@ -1850,7 +1911,7 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder, } if (ext_encoder) { - if (ASIC_IS_DCE41(rdev)) + if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev)) atombios_external_encoder_setup(encoder, ext_encoder, EXTERNAL_ENCODER_ACTION_V3_ENCODER_SETUP); else diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 466db4115cd5..cfa372cb1cb3 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -581,7 +581,7 @@ static u32 evergreen_line_buffer_adjust(struct radeon_device *rdev, return 0; } -static u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev) +u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev) { u32 tmp = RREG32(MC_SHARED_CHMAP); @@ -1328,7 +1328,10 @@ void evergreen_mc_program(struct radeon_device *rdev) rdev->mc.vram_end >> 12); } WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, rdev->vram_scratch.gpu_addr >> 12); - if (rdev->flags & RADEON_IS_IGP) { + /* llano/ontario only */ + if ((rdev->family == CHIP_PALM) || + (rdev->family == CHIP_SUMO) || + (rdev->family == CHIP_SUMO2)) { tmp = RREG32(MC_FUS_VM_FB_OFFSET) & 0x000FFFFF; tmp |= ((rdev->mc.vram_end >> 20) & 0xF) << 24; tmp |= ((rdev->mc.vram_start >> 20) & 0xF) << 20; @@ -1972,7 +1975,9 @@ static void evergreen_gpu_init(struct radeon_device *rdev) mc_shared_chmap = RREG32(MC_SHARED_CHMAP); - if (rdev->flags & RADEON_IS_IGP) + if ((rdev->family == CHIP_PALM) || + (rdev->family == CHIP_SUMO) || + (rdev->family == CHIP_SUMO2)) mc_arb_ramcfg = RREG32(FUS_MC_ARB_RAMCFG); else mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); @@ -2362,7 +2367,9 @@ int evergreen_mc_init(struct radeon_device *rdev) /* Get VRAM informations */ rdev->mc.vram_is_ddr = true; - if (rdev->flags & RADEON_IS_IGP) + if ((rdev->family == CHIP_PALM) || + (rdev->family == CHIP_SUMO) || + (rdev->family == CHIP_SUMO2)) tmp = RREG32(FUS_MC_ARB_RAMCFG); else tmp = RREG32(MC_ARB_RAMCFG); @@ -2394,12 +2401,14 @@ int evergreen_mc_init(struct radeon_device *rdev) rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); /* Setup GPU memory space */ - if (rdev->flags & RADEON_IS_IGP) { + if ((rdev->family == CHIP_PALM) || + (rdev->family == CHIP_SUMO) || + (rdev->family == CHIP_SUMO2)) { /* size in bytes on fusion */ rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); } else { - /* size in MB on evergreen */ + /* size in MB on evergreen/cayman/tn */ rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; } @@ -2557,7 +2566,9 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev) WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); } - WREG32(DACA_AUTODETECT_INT_CONTROL, 0); + /* only one DAC on DCE6 */ + if (!ASIC_IS_DCE6(rdev)) + WREG32(DACA_AUTODETECT_INT_CONTROL, 0); WREG32(DACB_AUTODETECT_INT_CONTROL, 0); tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY; diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 4e83fdcf4bc5..222acd2d33df 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -226,7 +226,7 @@ set_scissors(struct radeon_device *rdev, int x1, int y1, x1 = 1; if (y2 == 0) y1 = 1; - if (rdev->family == CHIP_CAYMAN) { + if (rdev->family >= CHIP_CAYMAN) { if ((x2 == 1) && (y2 == 1)) x2 = 2; } diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 160799c14b91..a48ca53fcd6a 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -42,6 +42,8 @@ extern void evergreen_irq_suspend(struct radeon_device *rdev); extern int evergreen_mc_init(struct radeon_device *rdev); extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev); extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev); +extern void si_rlc_fini(struct radeon_device *rdev); +extern int si_rlc_init(struct radeon_device *rdev); #define EVERGREEN_PFP_UCODE_SIZE 1120 #define EVERGREEN_PM4_UCODE_SIZE 1376 @@ -53,6 +55,8 @@ extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev); #define CAYMAN_RLC_UCODE_SIZE 1024 #define CAYMAN_MC_UCODE_SIZE 6037 +#define ARUBA_RLC_UCODE_SIZE 1536 + /* Firmware Names */ MODULE_FIRMWARE("radeon/BARTS_pfp.bin"); MODULE_FIRMWARE("radeon/BARTS_me.bin"); @@ -68,6 +72,9 @@ MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin"); MODULE_FIRMWARE("radeon/CAYMAN_me.bin"); MODULE_FIRMWARE("radeon/CAYMAN_mc.bin"); MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin"); +MODULE_FIRMWARE("radeon/ARUBA_pfp.bin"); +MODULE_FIRMWARE("radeon/ARUBA_me.bin"); +MODULE_FIRMWARE("radeon/ARUBA_rlc.bin"); #define BTC_IO_MC_REGS_SIZE 29 @@ -326,6 +333,15 @@ int ni_init_microcode(struct radeon_device *rdev) rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4; mc_req_size = CAYMAN_MC_UCODE_SIZE * 4; break; + case CHIP_ARUBA: + chip_name = "ARUBA"; + rlc_chip_name = "ARUBA"; + /* pfp/me same size as CAYMAN */ + pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4; + me_req_size = CAYMAN_PM4_UCODE_SIZE * 4; + rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4; + mc_req_size = 0; + break; default: BUG(); } @@ -365,15 +381,18 @@ int ni_init_microcode(struct radeon_device *rdev) err = -EINVAL; } - snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); - err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev); - if (err) - goto out; - if (rdev->mc_fw->size != mc_req_size) { - printk(KERN_ERR - "ni_mc: Bogus length %zu in firmware \"%s\"\n", - rdev->mc_fw->size, fw_name); - err = -EINVAL; + /* no MC ucode on TN */ + if (!(rdev->flags & RADEON_IS_IGP)) { + snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); + err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->mc_fw->size != mc_req_size) { + printk(KERN_ERR + "ni_mc: Bogus length %zu in firmware \"%s\"\n", + rdev->mc_fw->size, fw_name); + err = -EINVAL; + } } out: platform_device_unregister(pdev); @@ -478,6 +497,7 @@ static u32 cayman_get_tile_pipe_to_backend_map(struct radeon_device *rdev, memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * CAYMAN_MAX_PIPES); switch (rdev->family) { case CHIP_CAYMAN: + case CHIP_ARUBA: force_no_swizzle = true; break; default: @@ -610,7 +630,6 @@ static void cayman_gpu_init(struct radeon_device *rdev) switch (rdev->family) { case CHIP_CAYMAN: - default: rdev->config.cayman.max_shader_engines = 2; rdev->config.cayman.max_pipes_per_simd = 4; rdev->config.cayman.max_tile_pipes = 8; @@ -632,6 +651,43 @@ static void cayman_gpu_init(struct radeon_device *rdev) rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30; rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130; break; + case CHIP_ARUBA: + default: + rdev->config.cayman.max_shader_engines = 1; + rdev->config.cayman.max_pipes_per_simd = 4; + rdev->config.cayman.max_tile_pipes = 2; + if ((rdev->pdev->device == 0x9900) || + (rdev->pdev->device == 0x9901)) { + rdev->config.cayman.max_simds_per_se = 6; + rdev->config.cayman.max_backends_per_se = 2; + } else if ((rdev->pdev->device == 0x9903) || + (rdev->pdev->device == 0x9904)) { + rdev->config.cayman.max_simds_per_se = 4; + rdev->config.cayman.max_backends_per_se = 2; + } else if ((rdev->pdev->device == 0x9990) || + (rdev->pdev->device == 0x9991)) { + rdev->config.cayman.max_simds_per_se = 3; + rdev->config.cayman.max_backends_per_se = 1; + } else { + rdev->config.cayman.max_simds_per_se = 2; + rdev->config.cayman.max_backends_per_se = 1; + } + rdev->config.cayman.max_texture_channel_caches = 2; + rdev->config.cayman.max_gprs = 256; + rdev->config.cayman.max_threads = 256; + rdev->config.cayman.max_gs_threads = 32; + rdev->config.cayman.max_stack_entries = 512; + rdev->config.cayman.sx_num_of_sets = 8; + rdev->config.cayman.sx_max_export_size = 256; + rdev->config.cayman.sx_max_export_pos_size = 64; + rdev->config.cayman.sx_max_export_smx_size = 192; + rdev->config.cayman.max_hw_contexts = 8; + rdev->config.cayman.sq_num_cf_insts = 2; + + rdev->config.cayman.sc_prim_fifo_size = 0x40; + rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30; + rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130; + break; } /* Initialize HDP */ @@ -652,7 +708,9 @@ static void cayman_gpu_init(struct radeon_device *rdev) cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE); cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG); - cgts_tcc_disable = 0xff000000; + cgts_tcc_disable = 0xffff0000; + for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++) + cgts_tcc_disable &= ~(1 << (16 + i)); gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE); gc_user_shader_pipe_config = RREG32(GC_USER_SHADER_PIPE_CONFIG); cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE); @@ -804,8 +862,13 @@ static void cayman_gpu_init(struct radeon_device *rdev) rdev->config.cayman.tile_config |= (3 << 0); break; } - rdev->config.cayman.tile_config |= - ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; + + /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */ + if (rdev->flags & RADEON_IS_IGP) + rdev->config.evergreen.tile_config |= 1 << 4; + else + rdev->config.cayman.tile_config |= + ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; rdev->config.cayman.tile_config |= ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; rdev->config.cayman.tile_config |= @@ -1440,18 +1503,29 @@ static int cayman_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ evergreen_pcie_gen2_enable(rdev); - if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) { - r = ni_init_microcode(rdev); + if (rdev->flags & RADEON_IS_IGP) { + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { + r = ni_init_microcode(rdev); + if (r) { + DRM_ERROR("Failed to load firmware!\n"); + return r; + } + } + } else { + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) { + r = ni_init_microcode(rdev); + if (r) { + DRM_ERROR("Failed to load firmware!\n"); + return r; + } + } + + r = ni_mc_load_microcode(rdev); if (r) { - DRM_ERROR("Failed to load firmware!\n"); + DRM_ERROR("Failed to load MC firmware!\n"); return r; } } - r = ni_mc_load_microcode(rdev); - if (r) { - DRM_ERROR("Failed to load MC firmware!\n"); - return r; - } r = r600_vram_scratch_init(rdev); if (r) @@ -1470,6 +1544,15 @@ static int cayman_startup(struct radeon_device *rdev) dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); } + /* allocate rlc buffers */ + if (rdev->flags & RADEON_IS_IGP) { + r = si_rlc_init(rdev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + } + /* allocate wb buffer */ r = radeon_wb_init(rdev); if (r) @@ -1654,6 +1737,8 @@ int cayman_init(struct radeon_device *rdev) dev_err(rdev->dev, "disabling GPU acceleration\n"); cayman_cp_fini(rdev); r600_irq_fini(rdev); + if (rdev->flags & RADEON_IS_IGP) + si_rlc_fini(rdev); radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_vm_manager_fini(rdev); @@ -1665,8 +1750,11 @@ int cayman_init(struct radeon_device *rdev) /* Don't start up if the MC ucode is missing. * The default clocks and voltages before the MC ucode * is loaded are not suffient for advanced operations. + * + * We can skip this check for TN, because there is no MC + * ucode. */ - if (!rdev->mc_fw) { + if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) { DRM_ERROR("radeon: MC ucode required for NI+.\n"); return -EINVAL; } @@ -1679,6 +1767,8 @@ void cayman_fini(struct radeon_device *rdev) r600_blit_fini(rdev); cayman_cp_fini(rdev); r600_irq_fini(rdev); + if (rdev->flags & RADEON_IS_IGP) + si_rlc_fini(rdev); radeon_wb_fini(rdev); radeon_vm_manager_fini(rdev); r100_ib_fini(rdev); @@ -1702,7 +1792,12 @@ int cayman_vm_init(struct radeon_device *rdev) /* number of VMs */ rdev->vm_manager.nvm = 8; /* base offset of vram pages */ - rdev->vm_manager.vram_base_offset = 0; + if (rdev->flags & RADEON_IS_IGP) { + u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET); + tmp <<= 22; + rdev->vm_manager.vram_base_offset = tmp; + } else + rdev->vm_manager.vram_base_offset = 0; return 0; } diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 9a7f3b6e02de..2aa7046ada56 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -106,6 +106,7 @@ #define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 3) #define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 5) #define ENABLE_ADVANCED_DRIVER_MODEL (1 << 6) +#define FUS_MC_VM_FB_OFFSET 0x2068 #define MC_SHARED_BLACKOUT_CNTL 0x20ac #define MC_ARB_RAMCFG 0x2760 diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 5eb23829353f..391bd2636a80 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -49,6 +49,7 @@ #define EVERGREEN_PM4_UCODE_SIZE 1376 #define EVERGREEN_RLC_UCODE_SIZE 768 #define CAYMAN_RLC_UCODE_SIZE 1024 +#define ARUBA_RLC_UCODE_SIZE 1536 /* Firmware Names */ MODULE_FIRMWARE("radeon/R600_pfp.bin"); @@ -2778,7 +2779,7 @@ void r600_ih_ring_init(struct radeon_device *rdev, unsigned ring_size) rdev->ih.rptr = 0; } -static int r600_ih_ring_alloc(struct radeon_device *rdev) +int r600_ih_ring_alloc(struct radeon_device *rdev) { int r; @@ -2814,7 +2815,7 @@ static int r600_ih_ring_alloc(struct radeon_device *rdev) return 0; } -static void r600_ih_ring_fini(struct radeon_device *rdev) +void r600_ih_ring_fini(struct radeon_device *rdev) { int r; if (rdev->ih.ring_obj) { @@ -2861,10 +2862,17 @@ static int r600_rlc_init(struct radeon_device *rdev) r600_rlc_stop(rdev); - WREG32(RLC_HB_BASE, 0); WREG32(RLC_HB_CNTL, 0); - WREG32(RLC_HB_RPTR, 0); - WREG32(RLC_HB_WPTR, 0); + + if (rdev->family == CHIP_ARUBA) { + WREG32(TN_RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); + WREG32(TN_RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8); + } + if (rdev->family <= CHIP_CAYMAN) { + WREG32(RLC_HB_BASE, 0); + WREG32(RLC_HB_RPTR, 0); + WREG32(RLC_HB_WPTR, 0); + } if (rdev->family <= CHIP_CAICOS) { WREG32(RLC_HB_WPTR_LSB_ADDR, 0); WREG32(RLC_HB_WPTR_MSB_ADDR, 0); @@ -2873,7 +2881,12 @@ static int r600_rlc_init(struct radeon_device *rdev) WREG32(RLC_UCODE_CNTL, 0); fw_data = (const __be32 *)rdev->rlc_fw->data; - if (rdev->family >= CHIP_CAYMAN) { + if (rdev->family >= CHIP_ARUBA) { + for (i = 0; i < ARUBA_RLC_UCODE_SIZE; i++) { + WREG32(RLC_UCODE_ADDR, i); + WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++)); + } + } else if (rdev->family >= CHIP_CAYMAN) { for (i = 0; i < CAYMAN_RLC_UCODE_SIZE; i++) { WREG32(RLC_UCODE_ADDR, i); WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++)); diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 8ae328ff5fdd..3568a2e345fa 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -593,6 +593,10 @@ #define RLC_UCODE_ADDR 0x3f2c #define RLC_UCODE_DATA 0x3f30 +/* new for TN */ +#define TN_RLC_SAVE_AND_RESTORE_BASE 0x3f10 +#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20 + #define SRBM_SOFT_RESET 0xe60 # define SOFT_RESET_RLC (1 << 13) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index d2870a014ec0..138b95216d8d 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -236,12 +236,12 @@ void radeon_pm_resume(struct radeon_device *rdev); void radeon_combios_get_power_modes(struct radeon_device *rdev); void radeon_atombios_get_power_modes(struct radeon_device *rdev); void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type); -int radeon_atom_get_max_vddc(struct radeon_device *rdev, u16 *voltage); void rs690_pm_info(struct radeon_device *rdev); extern int rv6xx_get_temp(struct radeon_device *rdev); extern int rv770_get_temp(struct radeon_device *rdev); extern int evergreen_get_temp(struct radeon_device *rdev); extern int sumo_get_temp(struct radeon_device *rdev); +extern int si_get_temp(struct radeon_device *rdev); extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw, unsigned *bankh, unsigned *mtaspect, unsigned *tile_split); @@ -632,6 +632,7 @@ struct radeon_ib { uint32_t *ptr; struct radeon_fence *fence; unsigned vm_id; + bool is_const_ib; }; /* @@ -771,6 +772,18 @@ struct r600_blit { void r600_blit_suspend(struct radeon_device *rdev); +/* + * SI RLC stuff + */ +struct si_rlc { + /* for power gating */ + struct radeon_bo *save_restore_obj; + uint64_t save_restore_gpu_addr; + /* for clear state */ + struct radeon_bo *clear_state_obj; + uint64_t clear_state_gpu_addr; +}; + int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib **ib, unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib); @@ -836,7 +849,9 @@ struct radeon_cs_parser { int chunk_ib_idx; int chunk_relocs_idx; int chunk_flags_idx; + int chunk_const_ib_idx; struct radeon_ib *ib; + struct radeon_ib *const_ib; void *track; unsigned family; int parser_error; @@ -978,6 +993,7 @@ enum radeon_int_thermal_type { THERMAL_TYPE_EVERGREEN, THERMAL_TYPE_SUMO, THERMAL_TYPE_NI, + THERMAL_TYPE_SI, }; struct radeon_voltage { @@ -1369,6 +1385,37 @@ struct cayman_asic { struct r100_gpu_lockup lockup; }; +struct si_asic { + unsigned max_shader_engines; + unsigned max_pipes_per_simd; + unsigned max_tile_pipes; + unsigned max_simds_per_se; + unsigned max_backends_per_se; + unsigned max_texture_channel_caches; + unsigned max_gprs; + unsigned max_gs_threads; + unsigned max_hw_contexts; + unsigned sc_prim_fifo_size_frontend; + unsigned sc_prim_fifo_size_backend; + unsigned sc_hiz_tile_fifo_size; + unsigned sc_earlyz_tile_fifo_size; + + unsigned num_shader_engines; + unsigned num_tile_pipes; + unsigned num_backends_per_se; + unsigned backend_disable_mask_per_asic; + unsigned backend_map; + unsigned num_texture_channel_caches; + unsigned mem_max_burst_length_bytes; + unsigned mem_row_size_in_kb; + unsigned shader_engine_tile_size; + unsigned num_gpus; + unsigned multi_gpu_tile_size; + + unsigned tile_config; + struct r100_gpu_lockup lockup; +}; + union radeon_asic_config { struct r300_asic r300; struct r100_asic r100; @@ -1376,6 +1423,7 @@ union radeon_asic_config { struct rv770_asic rv770; struct evergreen_asic evergreen; struct cayman_asic cayman; + struct si_asic si; }; /* @@ -1491,10 +1539,12 @@ struct radeon_device { const struct firmware *pfp_fw; /* r6/700 PFP firmware */ const struct firmware *rlc_fw; /* r6/700 RLC firmware */ const struct firmware *mc_fw; /* NI MC firmware */ + const struct firmware *ce_fw; /* SI CE firmware */ struct r600_blit r600_blit; struct r600_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ struct r600_ih ih; /* r6/700 interrupt ring */ + struct si_rlc rlc; struct work_struct hotplug_work; int num_crtc; /* number of crtcs */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */ @@ -1638,6 +1688,9 @@ void r100_pll_errata_after_index(struct radeon_device *rdev); #define ASIC_IS_DCE41(rdev) ((rdev->family >= CHIP_PALM) && \ (rdev->flags & RADEON_IS_IGP)) #define ASIC_IS_DCE5(rdev) ((rdev->family >= CHIP_BARTS)) +#define ASIC_IS_DCE6(rdev) ((rdev->family >= CHIP_ARUBA)) +#define ASIC_IS_DCE61(rdev) ((rdev->family >= CHIP_ARUBA) && \ + (rdev->flags & RADEON_IS_IGP)) /* * BIOS helpers. diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 479c89e0af17..be4dc2ff0e40 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1408,6 +1408,200 @@ static struct radeon_asic cayman_asic = { }, }; +static struct radeon_asic trinity_asic = { + .init = &cayman_init, + .fini = &cayman_fini, + .suspend = &cayman_suspend, + .resume = &cayman_resume, + .gpu_is_lockup = &cayman_gpu_is_lockup, + .asic_reset = &cayman_asic_reset, + .vga_set_state = &r600_vga_set_state, + .ioctl_wait_idle = r600_ioctl_wait_idle, + .gui_idle = &r600_gui_idle, + .mc_wait_for_idle = &evergreen_mc_wait_for_idle, + .gart = { + .tlb_flush = &cayman_pcie_gart_tlb_flush, + .set_page = &rs600_gart_set_page, + }, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &cayman_ring_ib_execute, + .ib_parse = &evergreen_ib_parse, + .emit_fence = &cayman_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + .cs_parse = &evergreen_cs_parse, + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + }, + [CAYMAN_RING_TYPE_CP1_INDEX] = { + .ib_execute = &cayman_ring_ib_execute, + .ib_parse = &evergreen_ib_parse, + .emit_fence = &cayman_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + .cs_parse = &evergreen_cs_parse, + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + }, + [CAYMAN_RING_TYPE_CP2_INDEX] = { + .ib_execute = &cayman_ring_ib_execute, + .ib_parse = &evergreen_ib_parse, + .emit_fence = &cayman_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + .cs_parse = &evergreen_cs_parse, + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + } + }, + .irq = { + .set = &evergreen_irq_set, + .process = &evergreen_irq_process, + }, + .display = { + .bandwidth_update = &dce6_bandwidth_update, + .get_vblank_counter = &evergreen_get_vblank_counter, + .wait_for_vblank = &dce4_wait_for_vblank, + }, + .copy = { + .blit = &r600_copy_blit, + .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = NULL, + .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = &r600_copy_blit, + .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + }, + .surface = { + .set_reg = r600_set_surface_reg, + .clear_reg = r600_clear_surface_reg, + }, + .hpd = { + .init = &evergreen_hpd_init, + .fini = &evergreen_hpd_fini, + .sense = &evergreen_hpd_sense, + .set_polarity = &evergreen_hpd_set_polarity, + }, + .pm = { + .misc = &evergreen_pm_misc, + .prepare = &evergreen_pm_prepare, + .finish = &evergreen_pm_finish, + .init_profile = &sumo_pm_init_profile, + .get_dynpm_state = &r600_pm_get_dynpm_state, + .get_engine_clock = &radeon_atom_get_engine_clock, + .set_engine_clock = &radeon_atom_set_engine_clock, + .get_memory_clock = NULL, + .set_memory_clock = NULL, + .get_pcie_lanes = NULL, + .set_pcie_lanes = NULL, + .set_clock_gating = NULL, + }, + .pflip = { + .pre_page_flip = &evergreen_pre_page_flip, + .page_flip = &evergreen_page_flip, + .post_page_flip = &evergreen_post_page_flip, + }, +}; + +static const struct radeon_vm_funcs si_vm_funcs = { + .init = &si_vm_init, + .fini = &si_vm_fini, + .bind = &si_vm_bind, + .unbind = &si_vm_unbind, + .tlb_flush = &si_vm_tlb_flush, + .page_flags = &cayman_vm_page_flags, + .set_page = &cayman_vm_set_page, +}; + +static struct radeon_asic si_asic = { + .init = &si_init, + .fini = &si_fini, + .suspend = &si_suspend, + .resume = &si_resume, + .gpu_is_lockup = &si_gpu_is_lockup, + .asic_reset = &si_asic_reset, + .vga_set_state = &r600_vga_set_state, + .ioctl_wait_idle = r600_ioctl_wait_idle, + .gui_idle = &r600_gui_idle, + .mc_wait_for_idle = &evergreen_mc_wait_for_idle, + .gart = { + .tlb_flush = &si_pcie_gart_tlb_flush, + .set_page = &rs600_gart_set_page, + }, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &si_ring_ib_execute, + .ib_parse = &si_ib_parse, + .emit_fence = &si_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + }, + [CAYMAN_RING_TYPE_CP1_INDEX] = { + .ib_execute = &si_ring_ib_execute, + .ib_parse = &si_ib_parse, + .emit_fence = &si_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + }, + [CAYMAN_RING_TYPE_CP2_INDEX] = { + .ib_execute = &si_ring_ib_execute, + .ib_parse = &si_ib_parse, + .emit_fence = &si_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + } + }, + .irq = { + .set = &si_irq_set, + .process = &si_irq_process, + }, + .display = { + .bandwidth_update = &dce6_bandwidth_update, + .get_vblank_counter = &evergreen_get_vblank_counter, + .wait_for_vblank = &dce4_wait_for_vblank, + }, + .copy = { + .blit = NULL, + .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = NULL, + .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = NULL, + .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + }, + .surface = { + .set_reg = r600_set_surface_reg, + .clear_reg = r600_clear_surface_reg, + }, + .hpd = { + .init = &evergreen_hpd_init, + .fini = &evergreen_hpd_fini, + .sense = &evergreen_hpd_sense, + .set_polarity = &evergreen_hpd_set_polarity, + }, + .pm = { + .misc = &evergreen_pm_misc, + .prepare = &evergreen_pm_prepare, + .finish = &evergreen_pm_finish, + .init_profile = &sumo_pm_init_profile, + .get_dynpm_state = &r600_pm_get_dynpm_state, + .get_engine_clock = &radeon_atom_get_engine_clock, + .set_engine_clock = &radeon_atom_set_engine_clock, + .get_memory_clock = &radeon_atom_get_memory_clock, + .set_memory_clock = &radeon_atom_set_memory_clock, + .get_pcie_lanes = NULL, + .set_pcie_lanes = NULL, + .set_clock_gating = NULL, + }, + .pflip = { + .pre_page_flip = &evergreen_pre_page_flip, + .page_flip = &evergreen_page_flip, + .post_page_flip = &evergreen_post_page_flip, + }, +}; + int radeon_asic_init(struct radeon_device *rdev) { radeon_register_accessor_init(rdev); @@ -1525,6 +1719,20 @@ int radeon_asic_init(struct radeon_device *rdev) rdev->num_crtc = 6; rdev->vm_manager.funcs = &cayman_vm_funcs; break; + case CHIP_ARUBA: + rdev->asic = &trinity_asic; + /* set num crtcs */ + rdev->num_crtc = 4; + rdev->vm_manager.funcs = &cayman_vm_funcs; + break; + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + rdev->asic = &si_asic; + /* set num crtcs */ + rdev->num_crtc = 6; + rdev->vm_manager.funcs = &si_vm_funcs; + break; default: /* FIXME: not supported yet */ return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index b8f0a16bf65f..3d9f9f1d8f90 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -461,4 +461,29 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, unsigned pfn, uint64_t addr, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); +/* DCE6 - SI */ +void dce6_bandwidth_update(struct radeon_device *rdev); + +/* + * si + */ +void si_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +void si_pcie_gart_tlb_flush(struct radeon_device *rdev); +int si_init(struct radeon_device *rdev); +void si_fini(struct radeon_device *rdev); +int si_suspend(struct radeon_device *rdev); +int si_resume(struct radeon_device *rdev); +bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); +int si_asic_reset(struct radeon_device *rdev); +void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +int si_irq_set(struct radeon_device *rdev); +int si_irq_process(struct radeon_device *rdev); +int si_vm_init(struct radeon_device *rdev); +void si_vm_fini(struct radeon_device *rdev); +int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); +void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); +void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm); +int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); + #endif diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 73541373bf56..f6e69b8c06c6 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -56,6 +56,10 @@ extern void radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_enum, uint32_t supported_device); +/* local */ +static int radeon_atom_get_max_vddc(struct radeon_device *rdev, u8 voltage_type, + u16 voltage_id, u16 *voltage); + union atom_supported_devices { struct _ATOM_SUPPORTED_DEVICES_INFO info; struct _ATOM_SUPPORTED_DEVICES_INFO_2 info_2; @@ -253,7 +257,9 @@ static struct radeon_hpd radeon_atom_get_hpd_info_from_gpio(struct radeon_device memset(&hpd, 0, sizeof(struct radeon_hpd)); - if (ASIC_IS_DCE4(rdev)) + if (ASIC_IS_DCE6(rdev)) + reg = SI_DC_GPIO_HPD_A; + else if (ASIC_IS_DCE4(rdev)) reg = EVERGREEN_DC_GPIO_HPD_A; else reg = AVIVO_DC_GPIO_HPD_A; @@ -1888,6 +1894,8 @@ static const char *pp_lib_thermal_controller_names[] = { "emc2103", "Sumo", "Northern Islands", + "Southern Islands", + "lm96163", }; union power_info { @@ -1904,6 +1912,7 @@ union pplib_clock_info { struct _ATOM_PPLIB_RS780_CLOCK_INFO rs780; struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO evergreen; struct _ATOM_PPLIB_SUMO_CLOCK_INFO sumo; + struct _ATOM_PPLIB_SI_CLOCK_INFO si; }; union pplib_power_state { @@ -2161,6 +2170,11 @@ static void radeon_atombios_add_pplib_thermal_controller(struct radeon_device *r (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); rdev->pm.int_thermal_type = THERMAL_TYPE_NI; + } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_SISLANDS) { + DRM_INFO("Internal thermal controller %s fan control\n", + (controller->ucFanParameters & + ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); + rdev->pm.int_thermal_type = THERMAL_TYPE_SI; } else if ((controller->ucType == ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO) || (controller->ucType == @@ -2281,6 +2295,7 @@ static bool radeon_atombios_parse_pplib_clock_info(struct radeon_device *rdev, union pplib_clock_info *clock_info) { u32 sclk, mclk; + u16 vddc; if (rdev->flags & RADEON_IS_IGP) { if (rdev->family >= CHIP_PALM) { @@ -2292,6 +2307,19 @@ static bool radeon_atombios_parse_pplib_clock_info(struct radeon_device *rdev, sclk |= clock_info->rs780.ucLowEngineClockHigh << 16; rdev->pm.power_state[state_index].clock_info[mode_index].sclk = sclk; } + } else if (ASIC_IS_DCE6(rdev)) { + sclk = le16_to_cpu(clock_info->si.usEngineClockLow); + sclk |= clock_info->si.ucEngineClockHigh << 16; + mclk = le16_to_cpu(clock_info->si.usMemoryClockLow); + mclk |= clock_info->si.ucMemoryClockHigh << 16; + rdev->pm.power_state[state_index].clock_info[mode_index].mclk = mclk; + rdev->pm.power_state[state_index].clock_info[mode_index].sclk = sclk; + rdev->pm.power_state[state_index].clock_info[mode_index].voltage.type = + VOLTAGE_SW; + rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage = + le16_to_cpu(clock_info->si.usVDDC); + rdev->pm.power_state[state_index].clock_info[mode_index].voltage.vddci = + le16_to_cpu(clock_info->si.usVDDCI); } else if (ASIC_IS_DCE4(rdev)) { sclk = le16_to_cpu(clock_info->evergreen.usEngineClockLow); sclk |= clock_info->evergreen.ucEngineClockHigh << 16; @@ -2319,11 +2347,18 @@ static bool radeon_atombios_parse_pplib_clock_info(struct radeon_device *rdev, } /* patch up vddc if necessary */ - if (rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage == 0xff01) { - u16 vddc; - - if (radeon_atom_get_max_vddc(rdev, &vddc) == 0) + switch (rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage) { + case ATOM_VIRTUAL_VOLTAGE_ID0: + case ATOM_VIRTUAL_VOLTAGE_ID1: + case ATOM_VIRTUAL_VOLTAGE_ID2: + case ATOM_VIRTUAL_VOLTAGE_ID3: + if (radeon_atom_get_max_vddc(rdev, VOLTAGE_TYPE_VDDC, + rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage, + &vddc) == 0) rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage = vddc; + break; + default: + break; } if (rdev->flags & RADEON_IS_IGP) { @@ -2433,9 +2468,9 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev) int i, j, non_clock_array_index, clock_array_index; int state_index = 0, mode_index = 0; union pplib_clock_info *clock_info; - struct StateArray *state_array; - struct ClockInfoArray *clock_info_array; - struct NonClockInfoArray *non_clock_info_array; + struct _StateArray *state_array; + struct _ClockInfoArray *clock_info_array; + struct _NonClockInfoArray *non_clock_info_array; bool valid; union power_info *power_info; int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); @@ -2448,13 +2483,13 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev) power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); radeon_atombios_add_pplib_thermal_controller(rdev, &power_info->pplib.sThermalController); - state_array = (struct StateArray *) + state_array = (struct _StateArray *) (mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib.usStateArrayOffset)); - clock_info_array = (struct ClockInfoArray *) + clock_info_array = (struct _ClockInfoArray *) (mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib.usClockInfoArrayOffset)); - non_clock_info_array = (struct NonClockInfoArray *) + non_clock_info_array = (struct _NonClockInfoArray *) (mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset)); rdev->pm.power_state = kzalloc(sizeof(struct radeon_power_state) * @@ -2481,7 +2516,7 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev) if (clock_array_index >= clock_info_array->ucNumEntries) continue; clock_info = (union pplib_clock_info *) - &clock_info_array->clockInfo[clock_array_index]; + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; valid = radeon_atombios_parse_pplib_clock_info(rdev, state_index, mode_index, clock_info); @@ -2638,6 +2673,7 @@ union set_voltage { struct _SET_VOLTAGE_PS_ALLOCATION alloc; struct _SET_VOLTAGE_PARAMETERS v1; struct _SET_VOLTAGE_PARAMETERS_V2 v2; + struct _SET_VOLTAGE_PARAMETERS_V1_3 v3; }; void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type) @@ -2664,6 +2700,11 @@ void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 v args.v2.ucVoltageMode = SET_ASIC_VOLTAGE_MODE_SET_VOLTAGE; args.v2.usVoltageLevel = cpu_to_le16(voltage_level); break; + case 3: + args.v3.ucVoltageType = voltage_type; + args.v3.ucVoltageMode = ATOM_SET_VOLTAGE; + args.v3.usVoltageLevel = cpu_to_le16(voltage_level); + break; default: DRM_ERROR("Unknown table version %d, %d\n", frev, crev); return; @@ -2672,8 +2713,8 @@ void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 v atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); } -int radeon_atom_get_max_vddc(struct radeon_device *rdev, - u16 *voltage) +static int radeon_atom_get_max_vddc(struct radeon_device *rdev, u8 voltage_type, + u16 voltage_id, u16 *voltage) { union set_voltage args; int index = GetIndexIntoMasterTable(COMMAND, SetVoltage); @@ -2694,6 +2735,15 @@ int radeon_atom_get_max_vddc(struct radeon_device *rdev, *voltage = le16_to_cpu(args.v2.usVoltageLevel); break; + case 3: + args.v3.ucVoltageType = voltage_type; + args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL; + args.v3.usVoltageLevel = cpu_to_le16(voltage_id); + + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); + + *voltage = le16_to_cpu(args.v3.usVoltageLevel); + break; default: DRM_ERROR("Unknown table version %d, %d\n", frev, crev); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 64774ac94449..bd05156edbdb 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1085,7 +1085,7 @@ static int radeon_dvi_mode_valid(struct drm_connector *connector, (radeon_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B)) return MODE_OK; else if (radeon_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_A) { - if (0) { + if (ASIC_IS_DCE6(rdev)) { /* HDMI 1.3+ supports max clock of 340 Mhz */ if (mode->clock > 340000) return MODE_CLOCK_HIGH; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d9d9f5a59c42..5cac83278338 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -103,8 +103,13 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority p->ring = RADEON_RING_TYPE_GFX_INDEX; break; case RADEON_CS_RING_COMPUTE: - /* for now */ - p->ring = RADEON_RING_TYPE_GFX_INDEX; + if (p->rdev->family >= CHIP_TAHITI) { + if (p->priority > 0) + p->ring = CAYMAN_RING_TYPE_CP1_INDEX; + else + p->ring = CAYMAN_RING_TYPE_CP2_INDEX; + } else + p->ring = RADEON_RING_TYPE_GFX_INDEX; break; } return 0; @@ -170,6 +175,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) p->chunk_ib_idx = -1; p->chunk_relocs_idx = -1; p->chunk_flags_idx = -1; + p->chunk_const_ib_idx = -1; p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); if (p->chunks_array == NULL) { return -ENOMEM; @@ -208,6 +214,12 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) if (p->chunks[i].length_dw == 0) return -EINVAL; } + if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) { + p->chunk_const_ib_idx = i; + /* zero length CONST IB isn't useful */ + if (p->chunks[i].length_dw == 0) + return -EINVAL; + } if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { p->chunk_flags_idx = i; /* zero length flags aren't useful */ @@ -246,6 +258,13 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return -EINVAL; } + /* we only support VM on SI+ */ + if ((p->rdev->family >= CHIP_TAHITI) && + ((p->cs_flags & RADEON_CS_USE_VM) == 0)) { + DRM_ERROR("VM required on SI+!\n"); + return -EINVAL; + } + if (radeon_cs_get_ring(p, ring, priority)) return -EINVAL; @@ -389,6 +408,32 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) return 0; + if ((rdev->family >= CHIP_TAHITI) && + (parser->chunk_const_ib_idx != -1)) { + ib_chunk = &parser->chunks[parser->chunk_const_ib_idx]; + if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { + DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); + return -EINVAL; + } + r = radeon_ib_get(rdev, parser->ring, &parser->const_ib, + ib_chunk->length_dw * 4); + if (r) { + DRM_ERROR("Failed to get const ib !\n"); + return r; + } + parser->const_ib->is_const_ib = true; + parser->const_ib->length_dw = ib_chunk->length_dw; + /* Copy the packet into the IB */ + if (DRM_COPY_FROM_USER(parser->const_ib->ptr, ib_chunk->user_ptr, + ib_chunk->length_dw * 4)) { + return -EFAULT; + } + r = radeon_ring_ib_parse(rdev, parser->ring, parser->const_ib); + if (r) { + return r; + } + } + ib_chunk = &parser->chunks[parser->chunk_ib_idx]; if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); @@ -424,11 +469,25 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (r) { DRM_ERROR("Failed to synchronize rings !\n"); } + + if ((rdev->family >= CHIP_TAHITI) && + (parser->chunk_const_ib_idx != -1)) { + parser->const_ib->vm_id = vm->id; + /* ib pool is bind at 0 in virtual address space to gpu_addr is the + * offset inside the pool bo + */ + parser->const_ib->gpu_addr = parser->const_ib->sa_bo.offset; + r = radeon_ib_schedule(rdev, parser->const_ib); + if (r) + goto out; + } + parser->ib->vm_id = vm->id; /* ib pool is bind at 0 in virtual address space to gpu_addr is the * offset inside the pool bo */ parser->ib->gpu_addr = parser->ib->sa_bo.offset; + parser->ib->is_const_ib = false; r = radeon_ib_schedule(rdev, parser->ib); out: if (!r) { diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 49f7cb7e226b..ea7df16e2f84 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -89,6 +89,10 @@ static const char radeon_family_name[][16] = { "TURKS", "CAICOS", "CAYMAN", + "ARUBA", + "TAHITI", + "PITCAIRN", + "VERDE", "LAST", }; @@ -964,7 +968,7 @@ int radeon_resume_kms(struct drm_device *dev) /* init dig PHYs, disp eng pll */ if (rdev->is_atom_bios) { radeon_atom_encoder_init(rdev); - radeon_atom_dcpll_init(rdev); + radeon_atom_disp_eng_pll_init(rdev); } /* reset hpd state */ radeon_hpd_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 1ebcef25b915..8086c96e0b06 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1296,7 +1296,7 @@ int radeon_modeset_init(struct radeon_device *rdev) /* init dig PHYs, disp eng pll */ if (rdev->is_atom_bios) { radeon_atom_encoder_init(rdev); - radeon_atom_dcpll_init(rdev); + radeon_atom_disp_eng_pll_init(rdev); } /* initialize hpd */ diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 498d21d50ba3..ef7bb3f6ecae 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -56,9 +56,10 @@ * 2.12.0 - RADEON_CS_KEEP_TILING_FLAGS * 2.13.0 - virtual memory support, streamout * 2.14.0 - add evergreen tiling informations + * 2.15.0 - add max_pipes query */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 14 +#define KMS_DRIVER_MINOR 15 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 26e92708d114..74670696277d 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -307,6 +307,8 @@ void radeon_panel_mode_fixup(struct drm_encoder *encoder, bool radeon_dig_monitor_is_duallink(struct drm_encoder *encoder, u32 pixel_clock) { + struct drm_device *dev = encoder->dev; + struct radeon_device *rdev = dev->dev_private; struct drm_connector *connector; struct radeon_connector *radeon_connector; struct radeon_connector_atom_dig *dig_connector; @@ -324,7 +326,7 @@ bool radeon_dig_monitor_is_duallink(struct drm_encoder *encoder, case DRM_MODE_CONNECTOR_HDMIB: if (radeon_connector->use_digital) { /* HDMI 1.3 supports up to 340 Mhz over single link */ - if (0 && drm_detect_hdmi_monitor(radeon_connector->edid)) { + if (ASIC_IS_DCE6(rdev) && drm_detect_hdmi_monitor(radeon_connector->edid)) { if (pixel_clock > 340000) return true; else @@ -346,7 +348,7 @@ bool radeon_dig_monitor_is_duallink(struct drm_encoder *encoder, return false; else { /* HDMI 1.3 supports up to 340 Mhz over single link */ - if (0 && drm_detect_hdmi_monitor(radeon_connector->edid)) { + if (ASIC_IS_DCE6(rdev) && drm_detect_hdmi_monitor(radeon_connector->edid)) { if (pixel_clock > 340000) return true; else diff --git a/drivers/gpu/drm/radeon/radeon_family.h b/drivers/gpu/drm/radeon/radeon_family.h index ec2f1ea84f81..d1fafeabea09 100644 --- a/drivers/gpu/drm/radeon/radeon_family.h +++ b/drivers/gpu/drm/radeon/radeon_family.h @@ -87,6 +87,10 @@ enum radeon_family { CHIP_TURKS, CHIP_CAICOS, CHIP_CAYMAN, + CHIP_ARUBA, + CHIP_TAHITI, + CHIP_PITCAIRN, + CHIP_VERDE, CHIP_LAST, }; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 1986ebae1ef2..3c2628b14d56 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -171,7 +171,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) value = rdev->accel_working; break; case RADEON_INFO_TILING_CONFIG: - if (rdev->family >= CHIP_CAYMAN) + if (rdev->family >= CHIP_TAHITI) + value = rdev->config.si.tile_config; + else if (rdev->family >= CHIP_CAYMAN) value = rdev->config.cayman.tile_config; else if (rdev->family >= CHIP_CEDAR) value = rdev->config.evergreen.tile_config; @@ -210,7 +212,10 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) value = rdev->clock.spll.reference_freq * 10; break; case RADEON_INFO_NUM_BACKENDS: - if (rdev->family >= CHIP_CAYMAN) + if (rdev->family >= CHIP_TAHITI) + value = rdev->config.si.max_backends_per_se * + rdev->config.si.max_shader_engines; + else if (rdev->family >= CHIP_CAYMAN) value = rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines; else if (rdev->family >= CHIP_CEDAR) @@ -224,7 +229,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } break; case RADEON_INFO_NUM_TILE_PIPES: - if (rdev->family >= CHIP_CAYMAN) + if (rdev->family >= CHIP_TAHITI) + value = rdev->config.si.max_tile_pipes; + else if (rdev->family >= CHIP_CAYMAN) value = rdev->config.cayman.max_tile_pipes; else if (rdev->family >= CHIP_CEDAR) value = rdev->config.evergreen.max_tile_pipes; @@ -240,7 +247,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) value = 1; break; case RADEON_INFO_BACKEND_MAP: - if (rdev->family >= CHIP_CAYMAN) + if (rdev->family >= CHIP_TAHITI) + value = rdev->config.si.backend_map; + else if (rdev->family >= CHIP_CAYMAN) value = rdev->config.cayman.backend_map; else if (rdev->family >= CHIP_CEDAR) value = rdev->config.evergreen.backend_map; @@ -264,6 +273,21 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return -EINVAL; value = RADEON_IB_VM_MAX_SIZE; break; + case RADEON_INFO_MAX_PIPES: + if (rdev->family >= CHIP_TAHITI) + value = rdev->config.si.max_pipes_per_simd; + else if (rdev->family >= CHIP_CAYMAN) + value = rdev->config.cayman.max_pipes_per_simd; + else if (rdev->family >= CHIP_CEDAR) + value = rdev->config.evergreen.max_pipes; + else if (rdev->family >= CHIP_RV770) + value = rdev->config.rv770.max_pipes; + else if (rdev->family >= CHIP_R600) + value = rdev->config.r600.max_pipes; + else { + return -EINVAL; + } + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 8a85598fb242..f7eb5d8b9fd3 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -491,7 +491,7 @@ extern int radeon_dp_get_panel_mode(struct drm_encoder *encoder, struct drm_connector *connector); extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode); extern void radeon_atom_encoder_init(struct radeon_device *rdev); -extern void radeon_atom_dcpll_init(struct radeon_device *rdev); +extern void radeon_atom_disp_eng_pll_init(struct radeon_device *rdev); extern void atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t lane_num, uint8_t lane_set); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 3575129c1940..caa55d68f319 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -474,6 +474,9 @@ static ssize_t radeon_hwmon_show_temp(struct device *dev, case THERMAL_TYPE_SUMO: temp = sumo_get_temp(rdev); break; + case THERMAL_TYPE_SI: + temp = si_get_temp(rdev); + break; default: temp = 0; break; @@ -514,6 +517,10 @@ static int radeon_hwmon_init(struct radeon_device *rdev) case THERMAL_TYPE_EVERGREEN: case THERMAL_TYPE_NI: case THERMAL_TYPE_SUMO: + case THERMAL_TYPE_SI: + /* No support for TN yet */ + if (rdev->family == CHIP_ARUBA) + return err; rdev->pm.int_hwmon_dev = hwmon_device_register(rdev->dev); if (IS_ERR(rdev->pm.int_hwmon_dev)) { err = PTR_ERR(rdev->pm.int_hwmon_dev); diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h index 509863411285..5d8f735d6aaf 100644 --- a/drivers/gpu/drm/radeon/radeon_reg.h +++ b/drivers/gpu/drm/radeon/radeon_reg.h @@ -56,6 +56,7 @@ #include "r600_reg.h" #include "evergreen_reg.h" #include "ni_reg.h" +#include "si_reg.h" #define RADEON_MC_AGP_LOCATION 0x014c #define RADEON_MC_AGP_START_MASK 0x0000FFFF diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 30566201dffb..cc33b3d7c33b 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -133,6 +133,7 @@ retry: (*ib)->gpu_addr += (*ib)->sa_bo.offset; (*ib)->fence = fence; (*ib)->vm_id = 0; + (*ib)->is_const_ib = false; /* ib are most likely to be allocated in a ring fashion * thus rdev->ib_pool.head_id should be the id of the * oldest ib diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c new file mode 100644 index 000000000000..ac7a199ffece --- /dev/null +++ b/drivers/gpu/drm/radeon/si.c @@ -0,0 +1,4128 @@ +/* + * Copyright 2011 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include <linux/firmware.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/module.h> +#include "drmP.h" +#include "radeon.h" +#include "radeon_asic.h" +#include "radeon_drm.h" +#include "sid.h" +#include "atom.h" +#include "si_blit_shaders.h" + +#define SI_PFP_UCODE_SIZE 2144 +#define SI_PM4_UCODE_SIZE 2144 +#define SI_CE_UCODE_SIZE 2144 +#define SI_RLC_UCODE_SIZE 2048 +#define SI_MC_UCODE_SIZE 7769 + +MODULE_FIRMWARE("radeon/TAHITI_pfp.bin"); +MODULE_FIRMWARE("radeon/TAHITI_me.bin"); +MODULE_FIRMWARE("radeon/TAHITI_ce.bin"); +MODULE_FIRMWARE("radeon/TAHITI_mc.bin"); +MODULE_FIRMWARE("radeon/TAHITI_rlc.bin"); +MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin"); +MODULE_FIRMWARE("radeon/PITCAIRN_me.bin"); +MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin"); +MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin"); +MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin"); +MODULE_FIRMWARE("radeon/VERDE_pfp.bin"); +MODULE_FIRMWARE("radeon/VERDE_me.bin"); +MODULE_FIRMWARE("radeon/VERDE_ce.bin"); +MODULE_FIRMWARE("radeon/VERDE_mc.bin"); +MODULE_FIRMWARE("radeon/VERDE_rlc.bin"); + +extern int r600_ih_ring_alloc(struct radeon_device *rdev); +extern void r600_ih_ring_fini(struct radeon_device *rdev); +extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev); +extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save); +extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save); +extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev); + +/* get temperature in millidegrees */ +int si_get_temp(struct radeon_device *rdev) +{ + u32 temp; + int actual_temp = 0; + + temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> + CTF_TEMP_SHIFT; + + if (temp & 0x200) + actual_temp = 255; + else + actual_temp = temp & 0x1ff; + + actual_temp = (actual_temp * 1000); + + return actual_temp; +} + +#define TAHITI_IO_MC_REGS_SIZE 36 + +static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = { + {0x0000006f, 0x03044000}, + {0x00000070, 0x0480c018}, + {0x00000071, 0x00000040}, + {0x00000072, 0x01000000}, + {0x00000074, 0x000000ff}, + {0x00000075, 0x00143400}, + {0x00000076, 0x08ec0800}, + {0x00000077, 0x040000cc}, + {0x00000079, 0x00000000}, + {0x0000007a, 0x21000409}, + {0x0000007c, 0x00000000}, + {0x0000007d, 0xe8000000}, + {0x0000007e, 0x044408a8}, + {0x0000007f, 0x00000003}, + {0x00000080, 0x00000000}, + {0x00000081, 0x01000000}, + {0x00000082, 0x02000000}, + {0x00000083, 0x00000000}, + {0x00000084, 0xe3f3e4f4}, + {0x00000085, 0x00052024}, + {0x00000087, 0x00000000}, + {0x00000088, 0x66036603}, + {0x00000089, 0x01000000}, + {0x0000008b, 0x1c0a0000}, + {0x0000008c, 0xff010000}, + {0x0000008e, 0xffffefff}, + {0x0000008f, 0xfff3efff}, + {0x00000090, 0xfff3efbf}, + {0x00000094, 0x00101101}, + {0x00000095, 0x00000fff}, + {0x00000096, 0x00116fff}, + {0x00000097, 0x60010000}, + {0x00000098, 0x10010000}, + {0x00000099, 0x00006000}, + {0x0000009a, 0x00001000}, + {0x0000009f, 0x00a77400} +}; + +static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = { + {0x0000006f, 0x03044000}, + {0x00000070, 0x0480c018}, + {0x00000071, 0x00000040}, + {0x00000072, 0x01000000}, + {0x00000074, 0x000000ff}, + {0x00000075, 0x00143400}, + {0x00000076, 0x08ec0800}, + {0x00000077, 0x040000cc}, + {0x00000079, 0x00000000}, + {0x0000007a, 0x21000409}, + {0x0000007c, 0x00000000}, + {0x0000007d, 0xe8000000}, + {0x0000007e, 0x044408a8}, + {0x0000007f, 0x00000003}, + {0x00000080, 0x00000000}, + {0x00000081, 0x01000000}, + {0x00000082, 0x02000000}, + {0x00000083, 0x00000000}, + {0x00000084, 0xe3f3e4f4}, + {0x00000085, 0x00052024}, + {0x00000087, 0x00000000}, + {0x00000088, 0x66036603}, + {0x00000089, 0x01000000}, + {0x0000008b, 0x1c0a0000}, + {0x0000008c, 0xff010000}, + {0x0000008e, 0xffffefff}, + {0x0000008f, 0xfff3efff}, + {0x00000090, 0xfff3efbf}, + {0x00000094, 0x00101101}, + {0x00000095, 0x00000fff}, + {0x00000096, 0x00116fff}, + {0x00000097, 0x60010000}, + {0x00000098, 0x10010000}, + {0x00000099, 0x00006000}, + {0x0000009a, 0x00001000}, + {0x0000009f, 0x00a47400} +}; + +static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = { + {0x0000006f, 0x03044000}, + {0x00000070, 0x0480c018}, + {0x00000071, 0x00000040}, + {0x00000072, 0x01000000}, + {0x00000074, 0x000000ff}, + {0x00000075, 0x00143400}, + {0x00000076, 0x08ec0800}, + {0x00000077, 0x040000cc}, + {0x00000079, 0x00000000}, + {0x0000007a, 0x21000409}, + {0x0000007c, 0x00000000}, + {0x0000007d, 0xe8000000}, + {0x0000007e, 0x044408a8}, + {0x0000007f, 0x00000003}, + {0x00000080, 0x00000000}, + {0x00000081, 0x01000000}, + {0x00000082, 0x02000000}, + {0x00000083, 0x00000000}, + {0x00000084, 0xe3f3e4f4}, + {0x00000085, 0x00052024}, + {0x00000087, 0x00000000}, + {0x00000088, 0x66036603}, + {0x00000089, 0x01000000}, + {0x0000008b, 0x1c0a0000}, + {0x0000008c, 0xff010000}, + {0x0000008e, 0xffffefff}, + {0x0000008f, 0xfff3efff}, + {0x00000090, 0xfff3efbf}, + {0x00000094, 0x00101101}, + {0x00000095, 0x00000fff}, + {0x00000096, 0x00116fff}, + {0x00000097, 0x60010000}, + {0x00000098, 0x10010000}, + {0x00000099, 0x00006000}, + {0x0000009a, 0x00001000}, + {0x0000009f, 0x00a37400} +}; + +/* ucode loading */ +static int si_mc_load_microcode(struct radeon_device *rdev) +{ + const __be32 *fw_data; + u32 running, blackout = 0; + u32 *io_mc_regs; + int i, ucode_size, regs_size; + + if (!rdev->mc_fw) + return -EINVAL; + + switch (rdev->family) { + case CHIP_TAHITI: + io_mc_regs = (u32 *)&tahiti_io_mc_regs; + ucode_size = SI_MC_UCODE_SIZE; + regs_size = TAHITI_IO_MC_REGS_SIZE; + break; + case CHIP_PITCAIRN: + io_mc_regs = (u32 *)&pitcairn_io_mc_regs; + ucode_size = SI_MC_UCODE_SIZE; + regs_size = TAHITI_IO_MC_REGS_SIZE; + break; + case CHIP_VERDE: + default: + io_mc_regs = (u32 *)&verde_io_mc_regs; + ucode_size = SI_MC_UCODE_SIZE; + regs_size = TAHITI_IO_MC_REGS_SIZE; + break; + } + + running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK; + + if (running == 0) { + if (running) { + blackout = RREG32(MC_SHARED_BLACKOUT_CNTL); + WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1); + } + + /* reset the engine and set to writable */ + WREG32(MC_SEQ_SUP_CNTL, 0x00000008); + WREG32(MC_SEQ_SUP_CNTL, 0x00000010); + + /* load mc io regs */ + for (i = 0; i < regs_size; i++) { + WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]); + WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]); + } + /* load the MC ucode */ + fw_data = (const __be32 *)rdev->mc_fw->data; + for (i = 0; i < ucode_size; i++) + WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++)); + + /* put the engine back into the active state */ + WREG32(MC_SEQ_SUP_CNTL, 0x00000008); + WREG32(MC_SEQ_SUP_CNTL, 0x00000004); + WREG32(MC_SEQ_SUP_CNTL, 0x00000001); + + /* wait for training to complete */ + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0) + break; + udelay(1); + } + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1) + break; + udelay(1); + } + + if (running) + WREG32(MC_SHARED_BLACKOUT_CNTL, blackout); + } + + return 0; +} + +static int si_init_microcode(struct radeon_device *rdev) +{ + struct platform_device *pdev; + const char *chip_name; + const char *rlc_chip_name; + size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size; + char fw_name[30]; + int err; + + DRM_DEBUG("\n"); + + pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0); + err = IS_ERR(pdev); + if (err) { + printk(KERN_ERR "radeon_cp: Failed to register firmware\n"); + return -EINVAL; + } + + switch (rdev->family) { + case CHIP_TAHITI: + chip_name = "TAHITI"; + rlc_chip_name = "TAHITI"; + pfp_req_size = SI_PFP_UCODE_SIZE * 4; + me_req_size = SI_PM4_UCODE_SIZE * 4; + ce_req_size = SI_CE_UCODE_SIZE * 4; + rlc_req_size = SI_RLC_UCODE_SIZE * 4; + mc_req_size = SI_MC_UCODE_SIZE * 4; + break; + case CHIP_PITCAIRN: + chip_name = "PITCAIRN"; + rlc_chip_name = "PITCAIRN"; + pfp_req_size = SI_PFP_UCODE_SIZE * 4; + me_req_size = SI_PM4_UCODE_SIZE * 4; + ce_req_size = SI_CE_UCODE_SIZE * 4; + rlc_req_size = SI_RLC_UCODE_SIZE * 4; + mc_req_size = SI_MC_UCODE_SIZE * 4; + break; + case CHIP_VERDE: + chip_name = "VERDE"; + rlc_chip_name = "VERDE"; + pfp_req_size = SI_PFP_UCODE_SIZE * 4; + me_req_size = SI_PM4_UCODE_SIZE * 4; + ce_req_size = SI_CE_UCODE_SIZE * 4; + rlc_req_size = SI_RLC_UCODE_SIZE * 4; + mc_req_size = SI_MC_UCODE_SIZE * 4; + break; + default: BUG(); + } + + DRM_INFO("Loading %s Microcode\n", chip_name); + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); + err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->pfp_fw->size != pfp_req_size) { + printk(KERN_ERR + "si_cp: Bogus length %zu in firmware \"%s\"\n", + rdev->pfp_fw->size, fw_name); + err = -EINVAL; + goto out; + } + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); + err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->me_fw->size != me_req_size) { + printk(KERN_ERR + "si_cp: Bogus length %zu in firmware \"%s\"\n", + rdev->me_fw->size, fw_name); + err = -EINVAL; + } + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name); + err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->ce_fw->size != ce_req_size) { + printk(KERN_ERR + "si_cp: Bogus length %zu in firmware \"%s\"\n", + rdev->ce_fw->size, fw_name); + err = -EINVAL; + } + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name); + err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->rlc_fw->size != rlc_req_size) { + printk(KERN_ERR + "si_rlc: Bogus length %zu in firmware \"%s\"\n", + rdev->rlc_fw->size, fw_name); + err = -EINVAL; + } + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); + err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->mc_fw->size != mc_req_size) { + printk(KERN_ERR + "si_mc: Bogus length %zu in firmware \"%s\"\n", + rdev->mc_fw->size, fw_name); + err = -EINVAL; + } + +out: + platform_device_unregister(pdev); + + if (err) { + if (err != -EINVAL) + printk(KERN_ERR + "si_cp: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(rdev->pfp_fw); + rdev->pfp_fw = NULL; + release_firmware(rdev->me_fw); + rdev->me_fw = NULL; + release_firmware(rdev->ce_fw); + rdev->ce_fw = NULL; + release_firmware(rdev->rlc_fw); + rdev->rlc_fw = NULL; + release_firmware(rdev->mc_fw); + rdev->mc_fw = NULL; + } + return err; +} + +/* watermark setup */ +static u32 dce6_line_buffer_adjust(struct radeon_device *rdev, + struct radeon_crtc *radeon_crtc, + struct drm_display_mode *mode, + struct drm_display_mode *other_mode) +{ + u32 tmp; + /* + * Line Buffer Setup + * There are 3 line buffers, each one shared by 2 display controllers. + * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between + * the display controllers. The paritioning is done via one of four + * preset allocations specified in bits 21:20: + * 0 - half lb + * 2 - whole lb, other crtc must be disabled + */ + /* this can get tricky if we have two large displays on a paired group + * of crtcs. Ideally for multiple large displays we'd assign them to + * non-linked crtcs for maximum line buffer allocation. + */ + if (radeon_crtc->base.enabled && mode) { + if (other_mode) + tmp = 0; /* 1/2 */ + else + tmp = 2; /* whole */ + } else + tmp = 0; + + WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset, + DC_LB_MEMORY_CONFIG(tmp)); + + if (radeon_crtc->base.enabled && mode) { + switch (tmp) { + case 0: + default: + return 4096 * 2; + case 2: + return 8192 * 2; + } + } + + /* controller not enabled, so no lb used */ + return 0; +} + +static u32 si_get_number_of_dram_channels(struct radeon_device *rdev) +{ + u32 tmp = RREG32(MC_SHARED_CHMAP); + + switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { + case 0: + default: + return 1; + case 1: + return 2; + case 2: + return 4; + case 3: + return 8; + case 4: + return 3; + case 5: + return 6; + case 6: + return 10; + case 7: + return 12; + case 8: + return 16; + } +} + +struct dce6_wm_params { + u32 dram_channels; /* number of dram channels */ + u32 yclk; /* bandwidth per dram data pin in kHz */ + u32 sclk; /* engine clock in kHz */ + u32 disp_clk; /* display clock in kHz */ + u32 src_width; /* viewport width */ + u32 active_time; /* active display time in ns */ + u32 blank_time; /* blank time in ns */ + bool interlaced; /* mode is interlaced */ + fixed20_12 vsc; /* vertical scale ratio */ + u32 num_heads; /* number of active crtcs */ + u32 bytes_per_pixel; /* bytes per pixel display + overlay */ + u32 lb_size; /* line buffer allocated to pipe */ + u32 vtaps; /* vertical scaler taps */ +}; + +static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm) +{ + /* Calculate raw DRAM Bandwidth */ + fixed20_12 dram_efficiency; /* 0.7 */ + fixed20_12 yclk, dram_channels, bandwidth; + fixed20_12 a; + + a.full = dfixed_const(1000); + yclk.full = dfixed_const(wm->yclk); + yclk.full = dfixed_div(yclk, a); + dram_channels.full = dfixed_const(wm->dram_channels * 4); + a.full = dfixed_const(10); + dram_efficiency.full = dfixed_const(7); + dram_efficiency.full = dfixed_div(dram_efficiency, a); + bandwidth.full = dfixed_mul(dram_channels, yclk); + bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); + + return dfixed_trunc(bandwidth); +} + +static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm) +{ + /* Calculate DRAM Bandwidth and the part allocated to display. */ + fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ + fixed20_12 yclk, dram_channels, bandwidth; + fixed20_12 a; + + a.full = dfixed_const(1000); + yclk.full = dfixed_const(wm->yclk); + yclk.full = dfixed_div(yclk, a); + dram_channels.full = dfixed_const(wm->dram_channels * 4); + a.full = dfixed_const(10); + disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ + disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); + bandwidth.full = dfixed_mul(dram_channels, yclk); + bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); + + return dfixed_trunc(bandwidth); +} + +static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm) +{ + /* Calculate the display Data return Bandwidth */ + fixed20_12 return_efficiency; /* 0.8 */ + fixed20_12 sclk, bandwidth; + fixed20_12 a; + + a.full = dfixed_const(1000); + sclk.full = dfixed_const(wm->sclk); + sclk.full = dfixed_div(sclk, a); + a.full = dfixed_const(10); + return_efficiency.full = dfixed_const(8); + return_efficiency.full = dfixed_div(return_efficiency, a); + a.full = dfixed_const(32); + bandwidth.full = dfixed_mul(a, sclk); + bandwidth.full = dfixed_mul(bandwidth, return_efficiency); + + return dfixed_trunc(bandwidth); +} + +static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm) +{ + return 32; +} + +static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm) +{ + /* Calculate the DMIF Request Bandwidth */ + fixed20_12 disp_clk_request_efficiency; /* 0.8 */ + fixed20_12 disp_clk, sclk, bandwidth; + fixed20_12 a, b1, b2; + u32 min_bandwidth; + + a.full = dfixed_const(1000); + disp_clk.full = dfixed_const(wm->disp_clk); + disp_clk.full = dfixed_div(disp_clk, a); + a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2); + b1.full = dfixed_mul(a, disp_clk); + + a.full = dfixed_const(1000); + sclk.full = dfixed_const(wm->sclk); + sclk.full = dfixed_div(sclk, a); + a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm)); + b2.full = dfixed_mul(a, sclk); + + a.full = dfixed_const(10); + disp_clk_request_efficiency.full = dfixed_const(8); + disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); + + min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2)); + + a.full = dfixed_const(min_bandwidth); + bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency); + + return dfixed_trunc(bandwidth); +} + +static u32 dce6_available_bandwidth(struct dce6_wm_params *wm) +{ + /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ + u32 dram_bandwidth = dce6_dram_bandwidth(wm); + u32 data_return_bandwidth = dce6_data_return_bandwidth(wm); + u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm); + + return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); +} + +static u32 dce6_average_bandwidth(struct dce6_wm_params *wm) +{ + /* Calculate the display mode Average Bandwidth + * DisplayMode should contain the source and destination dimensions, + * timing, etc. + */ + fixed20_12 bpp; + fixed20_12 line_time; + fixed20_12 src_width; + fixed20_12 bandwidth; + fixed20_12 a; + + a.full = dfixed_const(1000); + line_time.full = dfixed_const(wm->active_time + wm->blank_time); + line_time.full = dfixed_div(line_time, a); + bpp.full = dfixed_const(wm->bytes_per_pixel); + src_width.full = dfixed_const(wm->src_width); + bandwidth.full = dfixed_mul(src_width, bpp); + bandwidth.full = dfixed_mul(bandwidth, wm->vsc); + bandwidth.full = dfixed_div(bandwidth, line_time); + + return dfixed_trunc(bandwidth); +} + +static u32 dce6_latency_watermark(struct dce6_wm_params *wm) +{ + /* First calcualte the latency in ns */ + u32 mc_latency = 2000; /* 2000 ns. */ + u32 available_bandwidth = dce6_available_bandwidth(wm); + u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; + u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; + u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ + u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + + (wm->num_heads * cursor_line_pair_return_time); + u32 latency = mc_latency + other_heads_data_return_time + dc_latency; + u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; + u32 tmp, dmif_size = 12288; + fixed20_12 a, b, c; + + if (wm->num_heads == 0) + return 0; + + a.full = dfixed_const(2); + b.full = dfixed_const(1); + if ((wm->vsc.full > a.full) || + ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || + (wm->vtaps >= 5) || + ((wm->vsc.full >= a.full) && wm->interlaced)) + max_src_lines_per_dst_line = 4; + else + max_src_lines_per_dst_line = 2; + + a.full = dfixed_const(available_bandwidth); + b.full = dfixed_const(wm->num_heads); + a.full = dfixed_div(a, b); + + b.full = dfixed_const(mc_latency + 512); + c.full = dfixed_const(wm->disp_clk); + b.full = dfixed_div(b, c); + + c.full = dfixed_const(dmif_size); + b.full = dfixed_div(c, b); + + tmp = min(dfixed_trunc(a), dfixed_trunc(b)); + + b.full = dfixed_const(1000); + c.full = dfixed_const(wm->disp_clk); + b.full = dfixed_div(c, b); + c.full = dfixed_const(wm->bytes_per_pixel); + b.full = dfixed_mul(b, c); + + lb_fill_bw = min(tmp, dfixed_trunc(b)); + + a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); + b.full = dfixed_const(1000); + c.full = dfixed_const(lb_fill_bw); + b.full = dfixed_div(c, b); + a.full = dfixed_div(a, b); + line_fill_time = dfixed_trunc(a); + + if (line_fill_time < wm->active_time) + return latency; + else + return latency + (line_fill_time - wm->active_time); + +} + +static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm) +{ + if (dce6_average_bandwidth(wm) <= + (dce6_dram_bandwidth_for_display(wm) / wm->num_heads)) + return true; + else + return false; +}; + +static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm) +{ + if (dce6_average_bandwidth(wm) <= + (dce6_available_bandwidth(wm) / wm->num_heads)) + return true; + else + return false; +}; + +static bool dce6_check_latency_hiding(struct dce6_wm_params *wm) +{ + u32 lb_partitions = wm->lb_size / wm->src_width; + u32 line_time = wm->active_time + wm->blank_time; + u32 latency_tolerant_lines; + u32 latency_hiding; + fixed20_12 a; + + a.full = dfixed_const(1); + if (wm->vsc.full > a.full) + latency_tolerant_lines = 1; + else { + if (lb_partitions <= (wm->vtaps + 1)) + latency_tolerant_lines = 1; + else + latency_tolerant_lines = 2; + } + + latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); + + if (dce6_latency_watermark(wm) <= latency_hiding) + return true; + else + return false; +} + +static void dce6_program_watermarks(struct radeon_device *rdev, + struct radeon_crtc *radeon_crtc, + u32 lb_size, u32 num_heads) +{ + struct drm_display_mode *mode = &radeon_crtc->base.mode; + struct dce6_wm_params wm; + u32 pixel_period; + u32 line_time = 0; + u32 latency_watermark_a = 0, latency_watermark_b = 0; + u32 priority_a_mark = 0, priority_b_mark = 0; + u32 priority_a_cnt = PRIORITY_OFF; + u32 priority_b_cnt = PRIORITY_OFF; + u32 tmp, arb_control3; + fixed20_12 a, b, c; + + if (radeon_crtc->base.enabled && num_heads && mode) { + pixel_period = 1000000 / (u32)mode->clock; + line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); + priority_a_cnt = 0; + priority_b_cnt = 0; + + wm.yclk = rdev->pm.current_mclk * 10; + wm.sclk = rdev->pm.current_sclk * 10; + wm.disp_clk = mode->clock; + wm.src_width = mode->crtc_hdisplay; + wm.active_time = mode->crtc_hdisplay * pixel_period; + wm.blank_time = line_time - wm.active_time; + wm.interlaced = false; + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + wm.interlaced = true; + wm.vsc = radeon_crtc->vsc; + wm.vtaps = 1; + if (radeon_crtc->rmx_type != RMX_OFF) + wm.vtaps = 2; + wm.bytes_per_pixel = 4; /* XXX: get this from fb config */ + wm.lb_size = lb_size; + if (rdev->family == CHIP_ARUBA) + wm.dram_channels = evergreen_get_number_of_dram_channels(rdev); + else + wm.dram_channels = si_get_number_of_dram_channels(rdev); + wm.num_heads = num_heads; + + /* set for high clocks */ + latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535); + /* set for low clocks */ + /* wm.yclk = low clk; wm.sclk = low clk */ + latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535); + + /* possibly force display priority to high */ + /* should really do this at mode validation time... */ + if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) || + !dce6_average_bandwidth_vs_available_bandwidth(&wm) || + !dce6_check_latency_hiding(&wm) || + (rdev->disp_priority == 2)) { + DRM_DEBUG_KMS("force priority to high\n"); + priority_a_cnt |= PRIORITY_ALWAYS_ON; + priority_b_cnt |= PRIORITY_ALWAYS_ON; + } + + a.full = dfixed_const(1000); + b.full = dfixed_const(mode->clock); + b.full = dfixed_div(b, a); + c.full = dfixed_const(latency_watermark_a); + c.full = dfixed_mul(c, b); + c.full = dfixed_mul(c, radeon_crtc->hsc); + c.full = dfixed_div(c, a); + a.full = dfixed_const(16); + c.full = dfixed_div(c, a); + priority_a_mark = dfixed_trunc(c); + priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK; + + a.full = dfixed_const(1000); + b.full = dfixed_const(mode->clock); + b.full = dfixed_div(b, a); + c.full = dfixed_const(latency_watermark_b); + c.full = dfixed_mul(c, b); + c.full = dfixed_mul(c, radeon_crtc->hsc); + c.full = dfixed_div(c, a); + a.full = dfixed_const(16); + c.full = dfixed_div(c, a); + priority_b_mark = dfixed_trunc(c); + priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK; + } + + /* select wm A */ + arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset); + tmp = arb_control3; + tmp &= ~LATENCY_WATERMARK_MASK(3); + tmp |= LATENCY_WATERMARK_MASK(1); + WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp); + WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, + (LATENCY_LOW_WATERMARK(latency_watermark_a) | + LATENCY_HIGH_WATERMARK(line_time))); + /* select wm B */ + tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset); + tmp &= ~LATENCY_WATERMARK_MASK(3); + tmp |= LATENCY_WATERMARK_MASK(2); + WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp); + WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, + (LATENCY_LOW_WATERMARK(latency_watermark_b) | + LATENCY_HIGH_WATERMARK(line_time))); + /* restore original selection */ + WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3); + + /* write the priority marks */ + WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt); + WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt); + +} + +void dce6_bandwidth_update(struct radeon_device *rdev) +{ + struct drm_display_mode *mode0 = NULL; + struct drm_display_mode *mode1 = NULL; + u32 num_heads = 0, lb_size; + int i; + + radeon_update_display_priority(rdev); + + for (i = 0; i < rdev->num_crtc; i++) { + if (rdev->mode_info.crtcs[i]->base.enabled) + num_heads++; + } + for (i = 0; i < rdev->num_crtc; i += 2) { + mode0 = &rdev->mode_info.crtcs[i]->base.mode; + mode1 = &rdev->mode_info.crtcs[i+1]->base.mode; + lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1); + dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads); + lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0); + dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads); + } +} + +/* + * Core functions + */ +static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev, + u32 num_tile_pipes, + u32 num_backends_per_asic, + u32 *backend_disable_mask_per_asic, + u32 num_shader_engines) +{ + u32 backend_map = 0; + u32 enabled_backends_mask = 0; + u32 enabled_backends_count = 0; + u32 num_backends_per_se; + u32 cur_pipe; + u32 swizzle_pipe[SI_MAX_PIPES]; + u32 cur_backend = 0; + u32 i; + bool force_no_swizzle; + + /* force legal values */ + if (num_tile_pipes < 1) + num_tile_pipes = 1; + if (num_tile_pipes > rdev->config.si.max_tile_pipes) + num_tile_pipes = rdev->config.si.max_tile_pipes; + if (num_shader_engines < 1) + num_shader_engines = 1; + if (num_shader_engines > rdev->config.si.max_shader_engines) + num_shader_engines = rdev->config.si.max_shader_engines; + if (num_backends_per_asic < num_shader_engines) + num_backends_per_asic = num_shader_engines; + if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines)) + num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines; + + /* make sure we have the same number of backends per se */ + num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines); + /* set up the number of backends per se */ + num_backends_per_se = num_backends_per_asic / num_shader_engines; + if (num_backends_per_se > rdev->config.si.max_backends_per_se) { + num_backends_per_se = rdev->config.si.max_backends_per_se; + num_backends_per_asic = num_backends_per_se * num_shader_engines; + } + + /* create enable mask and count for enabled backends */ + for (i = 0; i < SI_MAX_BACKENDS; ++i) { + if (((*backend_disable_mask_per_asic >> i) & 1) == 0) { + enabled_backends_mask |= (1 << i); + ++enabled_backends_count; + } + if (enabled_backends_count == num_backends_per_asic) + break; + } + + /* force the backends mask to match the current number of backends */ + if (enabled_backends_count != num_backends_per_asic) { + u32 this_backend_enabled; + u32 shader_engine; + u32 backend_per_se; + + enabled_backends_mask = 0; + enabled_backends_count = 0; + *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK; + for (i = 0; i < SI_MAX_BACKENDS; ++i) { + /* calc the current se */ + shader_engine = i / rdev->config.si.max_backends_per_se; + /* calc the backend per se */ + backend_per_se = i % rdev->config.si.max_backends_per_se; + /* default to not enabled */ + this_backend_enabled = 0; + if ((shader_engine < num_shader_engines) && + (backend_per_se < num_backends_per_se)) + this_backend_enabled = 1; + if (this_backend_enabled) { + enabled_backends_mask |= (1 << i); + *backend_disable_mask_per_asic &= ~(1 << i); + ++enabled_backends_count; + } + } + } + + + memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES); + switch (rdev->family) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + force_no_swizzle = true; + break; + default: + force_no_swizzle = false; + break; + } + if (force_no_swizzle) { + bool last_backend_enabled = false; + + force_no_swizzle = false; + for (i = 0; i < SI_MAX_BACKENDS; ++i) { + if (((enabled_backends_mask >> i) & 1) == 1) { + if (last_backend_enabled) + force_no_swizzle = true; + last_backend_enabled = true; + } else + last_backend_enabled = false; + } + } + + switch (num_tile_pipes) { + case 1: + case 3: + case 5: + case 7: + DRM_ERROR("odd number of pipes!\n"); + break; + case 2: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + break; + case 4: + if (force_no_swizzle) { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + } else { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 1; + swizzle_pipe[3] = 3; + } + break; + case 6: + if (force_no_swizzle) { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + swizzle_pipe[4] = 4; + swizzle_pipe[5] = 5; + } else { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 1; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 5; + } + break; + case 8: + if (force_no_swizzle) { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + swizzle_pipe[4] = 4; + swizzle_pipe[5] = 5; + swizzle_pipe[6] = 6; + swizzle_pipe[7] = 7; + } else { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + swizzle_pipe[6] = 5; + swizzle_pipe[7] = 7; + } + break; + } + + for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { + while (((1 << cur_backend) & enabled_backends_mask) == 0) + cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS; + + backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4))); + + cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS; + } + + return backend_map; +} + +static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev, + u32 disable_mask_per_se, + u32 max_disable_mask_per_se, + u32 num_shader_engines) +{ + u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se); + u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se; + + if (num_shader_engines == 1) + return disable_mask_per_asic; + else if (num_shader_engines == 2) + return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se); + else + return 0xffffffff; +} + +static void si_tiling_mode_table_init(struct radeon_device *rdev) +{ + const u32 num_tile_mode_states = 32; + u32 reg_offset, gb_tile_moden, split_equal_to_row_size; + + switch (rdev->config.si.mem_row_size_in_kb) { + case 1: + split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB; + break; + case 2: + default: + split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB; + break; + case 4: + split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB; + break; + } + + if ((rdev->family == CHIP_TAHITI) || + (rdev->family == CHIP_PITCAIRN)) { + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { + switch (reg_offset) { + case 0: /* non-AA compressed depth or any compressed stencil */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 1: /* 2xAA/4xAA compressed depth only */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 2: /* 8xAA compressed depth only */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 8: /* 1D and 1D Array Surfaces */ + gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 9: /* Displayable maps. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 10: /* Display 8bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 11: /* Display 16bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 12: /* Display 32bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 13: /* Thin. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 14: /* Thin 8 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 15: /* Thin 16 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 16: /* Thin 32 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 17: /* Thin 64 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 21: /* 8 bpp PRT. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 22: /* 16 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 23: /* 32 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 24: /* 64 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 25: /* 128 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + NUM_BANKS(ADDR_SURF_8_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + default: + gb_tile_moden = 0; + break; + } + WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); + } + } else if (rdev->family == CHIP_VERDE) { + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { + switch (reg_offset) { + case 0: /* non-AA compressed depth or any compressed stencil */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 1: /* 2xAA/4xAA compressed depth only */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 2: /* 8xAA compressed depth only */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 8: /* 1D and 1D Array Surfaces */ + gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 9: /* Displayable maps. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 10: /* Display 8bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 11: /* Display 16bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 12: /* Display 32bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 13: /* Thin. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 14: /* Thin 8 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 15: /* Thin 16 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 16: /* Thin 32 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 17: /* Thin 64 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 21: /* 8 bpp PRT. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 22: /* 16 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 23: /* 32 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 24: /* 64 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 25: /* 128 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + NUM_BANKS(ADDR_SURF_8_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + default: + gb_tile_moden = 0; + break; + } + WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); + } + } else + DRM_ERROR("unknown asic: 0x%x\n", rdev->family); +} + +static void si_gpu_init(struct radeon_device *rdev) +{ + u32 cc_rb_backend_disable = 0; + u32 cc_gc_shader_array_config; + u32 gb_addr_config = 0; + u32 mc_shared_chmap, mc_arb_ramcfg; + u32 gb_backend_map; + u32 cgts_tcc_disable; + u32 sx_debug_1; + u32 gc_user_shader_array_config; + u32 gc_user_rb_backend_disable; + u32 cgts_user_tcc_disable; + u32 hdp_host_path_cntl; + u32 tmp; + int i, j; + + switch (rdev->family) { + case CHIP_TAHITI: + rdev->config.si.max_shader_engines = 2; + rdev->config.si.max_pipes_per_simd = 4; + rdev->config.si.max_tile_pipes = 12; + rdev->config.si.max_simds_per_se = 8; + rdev->config.si.max_backends_per_se = 4; + rdev->config.si.max_texture_channel_caches = 12; + rdev->config.si.max_gprs = 256; + rdev->config.si.max_gs_threads = 32; + rdev->config.si.max_hw_contexts = 8; + + rdev->config.si.sc_prim_fifo_size_frontend = 0x20; + rdev->config.si.sc_prim_fifo_size_backend = 0x100; + rdev->config.si.sc_hiz_tile_fifo_size = 0x30; + rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + break; + case CHIP_PITCAIRN: + rdev->config.si.max_shader_engines = 2; + rdev->config.si.max_pipes_per_simd = 4; + rdev->config.si.max_tile_pipes = 8; + rdev->config.si.max_simds_per_se = 5; + rdev->config.si.max_backends_per_se = 4; + rdev->config.si.max_texture_channel_caches = 8; + rdev->config.si.max_gprs = 256; + rdev->config.si.max_gs_threads = 32; + rdev->config.si.max_hw_contexts = 8; + + rdev->config.si.sc_prim_fifo_size_frontend = 0x20; + rdev->config.si.sc_prim_fifo_size_backend = 0x100; + rdev->config.si.sc_hiz_tile_fifo_size = 0x30; + rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + break; + case CHIP_VERDE: + default: + rdev->config.si.max_shader_engines = 1; + rdev->config.si.max_pipes_per_simd = 4; + rdev->config.si.max_tile_pipes = 4; + rdev->config.si.max_simds_per_se = 2; + rdev->config.si.max_backends_per_se = 4; + rdev->config.si.max_texture_channel_caches = 4; + rdev->config.si.max_gprs = 256; + rdev->config.si.max_gs_threads = 32; + rdev->config.si.max_hw_contexts = 8; + + rdev->config.si.sc_prim_fifo_size_frontend = 0x20; + rdev->config.si.sc_prim_fifo_size_backend = 0x40; + rdev->config.si.sc_hiz_tile_fifo_size = 0x30; + rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + break; + } + + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + + WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); + + evergreen_fix_pci_max_read_req_size(rdev); + + WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); + + mc_shared_chmap = RREG32(MC_SHARED_CHMAP); + mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); + + cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE); + cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG); + cgts_tcc_disable = 0xffff0000; + for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++) + cgts_tcc_disable &= ~(1 << (16 + i)); + gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE); + gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG); + cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE); + + rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines; + rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes; + tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT; + rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp); + tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT; + rdev->config.si.backend_disable_mask_per_asic = + si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK, + rdev->config.si.num_shader_engines); + rdev->config.si.backend_map = + si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes, + rdev->config.si.num_backends_per_se * + rdev->config.si.num_shader_engines, + &rdev->config.si.backend_disable_mask_per_asic, + rdev->config.si.num_shader_engines); + tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT; + rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp); + rdev->config.si.mem_max_burst_length_bytes = 256; + tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; + rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; + if (rdev->config.si.mem_row_size_in_kb > 4) + rdev->config.si.mem_row_size_in_kb = 4; + /* XXX use MC settings? */ + rdev->config.si.shader_engine_tile_size = 32; + rdev->config.si.num_gpus = 1; + rdev->config.si.multi_gpu_tile_size = 64; + + gb_addr_config = 0; + switch (rdev->config.si.num_tile_pipes) { + case 1: + gb_addr_config |= NUM_PIPES(0); + break; + case 2: + gb_addr_config |= NUM_PIPES(1); + break; + case 4: + gb_addr_config |= NUM_PIPES(2); + break; + case 8: + default: + gb_addr_config |= NUM_PIPES(3); + break; + } + + tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1; + gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp); + gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1); + tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1; + gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp); + switch (rdev->config.si.num_gpus) { + case 1: + default: + gb_addr_config |= NUM_GPUS(0); + break; + case 2: + gb_addr_config |= NUM_GPUS(1); + break; + case 4: + gb_addr_config |= NUM_GPUS(2); + break; + } + switch (rdev->config.si.multi_gpu_tile_size) { + case 16: + gb_addr_config |= MULTI_GPU_TILE_SIZE(0); + break; + case 32: + default: + gb_addr_config |= MULTI_GPU_TILE_SIZE(1); + break; + case 64: + gb_addr_config |= MULTI_GPU_TILE_SIZE(2); + break; + case 128: + gb_addr_config |= MULTI_GPU_TILE_SIZE(3); + break; + } + switch (rdev->config.si.mem_row_size_in_kb) { + case 1: + default: + gb_addr_config |= ROW_SIZE(0); + break; + case 2: + gb_addr_config |= ROW_SIZE(1); + break; + case 4: + gb_addr_config |= ROW_SIZE(2); + break; + } + + tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT; + rdev->config.si.num_tile_pipes = (1 << tmp); + tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT; + rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256; + tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT; + rdev->config.si.num_shader_engines = tmp + 1; + tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT; + rdev->config.si.num_gpus = tmp + 1; + tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT; + rdev->config.si.multi_gpu_tile_size = 1 << tmp; + tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT; + rdev->config.si.mem_row_size_in_kb = 1 << tmp; + + gb_backend_map = + si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes, + rdev->config.si.num_backends_per_se * + rdev->config.si.num_shader_engines, + &rdev->config.si.backend_disable_mask_per_asic, + rdev->config.si.num_shader_engines); + + /* setup tiling info dword. gb_addr_config is not adequate since it does + * not have bank info, so create a custom tiling dword. + * bits 3:0 num_pipes + * bits 7:4 num_banks + * bits 11:8 group_size + * bits 15:12 row_size + */ + rdev->config.si.tile_config = 0; + switch (rdev->config.si.num_tile_pipes) { + case 1: + rdev->config.si.tile_config |= (0 << 0); + break; + case 2: + rdev->config.si.tile_config |= (1 << 0); + break; + case 4: + rdev->config.si.tile_config |= (2 << 0); + break; + case 8: + default: + /* XXX what about 12? */ + rdev->config.si.tile_config |= (3 << 0); + break; + } + rdev->config.si.tile_config |= + ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; + rdev->config.si.tile_config |= + ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; + rdev->config.si.tile_config |= + ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; + + rdev->config.si.backend_map = gb_backend_map; + WREG32(GB_ADDR_CONFIG, gb_addr_config); + WREG32(DMIF_ADDR_CONFIG, gb_addr_config); + WREG32(HDP_ADDR_CONFIG, gb_addr_config); + + /* primary versions */ + WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config); + + WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable); + + /* user versions */ + WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config); + + WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable); + + si_tiling_mode_table_init(rdev); + + /* set HW defaults for 3D engine */ + WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | + ROQ_IB2_START(0x2b))); + WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); + + sx_debug_1 = RREG32(SX_DEBUG_1); + WREG32(SX_DEBUG_1, sx_debug_1); + + WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); + + WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) | + SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) | + SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) | + SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size))); + + WREG32(VGT_NUM_INSTANCES, 1); + + WREG32(CP_PERFMON_CNTL, 0); + + WREG32(SQ_CONFIG, 0); + + WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | + FORCE_EOV_MAX_REZ_CNT(255))); + + WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | + AUTO_INVLD_EN(ES_AND_GS_AUTO)); + + WREG32(VGT_GS_VERTEX_REUSE, 16); + WREG32(PA_SC_LINE_STIPPLE_STATE, 0); + + WREG32(CB_PERFCOUNTER0_SELECT0, 0); + WREG32(CB_PERFCOUNTER0_SELECT1, 0); + WREG32(CB_PERFCOUNTER1_SELECT0, 0); + WREG32(CB_PERFCOUNTER1_SELECT1, 0); + WREG32(CB_PERFCOUNTER2_SELECT0, 0); + WREG32(CB_PERFCOUNTER2_SELECT1, 0); + WREG32(CB_PERFCOUNTER3_SELECT0, 0); + WREG32(CB_PERFCOUNTER3_SELECT1, 0); + + tmp = RREG32(HDP_MISC_CNTL); + tmp |= HDP_FLUSH_INVALIDATE_CACHE; + WREG32(HDP_MISC_CNTL, tmp); + + hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); + WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); + + WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); + + udelay(50); +} + +/* + * GPU scratch registers helpers function. + */ +static void si_scratch_init(struct radeon_device *rdev) +{ + int i; + + rdev->scratch.num_reg = 7; + rdev->scratch.reg_base = SCRATCH_REG0; + for (i = 0; i < rdev->scratch.num_reg; i++) { + rdev->scratch.free[i] = true; + rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); + } +} + +void si_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + + /* flush read cache over gart */ + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA); + radeon_ring_write(ring, 0xFFFFFFFF); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 10); /* poll interval */ + /* EVENT_WRITE_EOP - flush caches, send int */ + radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, 0); +} + +/* + * IB stuff + */ +void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + u32 header; + + if (ib->is_const_ib) + header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); + else + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + + radeon_ring_write(ring, header); + radeon_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + (ib->gpu_addr & 0xFFFFFFFC)); + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); + radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24)); + + /* flush read cache over gart for this vmid */ + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(ring, ib->vm_id); + radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA); + radeon_ring_write(ring, 0xFFFFFFFF); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 10); /* poll interval */ +} + +/* + * CP. + */ +static void si_cp_enable(struct radeon_device *rdev, bool enable) +{ + if (enable) + WREG32(CP_ME_CNTL, 0); + else { + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); + WREG32(SCRATCH_UMSK, 0); + } + udelay(50); +} + +static int si_cp_load_microcode(struct radeon_device *rdev) +{ + const __be32 *fw_data; + int i; + + if (!rdev->me_fw || !rdev->pfp_fw) + return -EINVAL; + + si_cp_enable(rdev, false); + + /* PFP */ + fw_data = (const __be32 *)rdev->pfp_fw->data; + WREG32(CP_PFP_UCODE_ADDR, 0); + for (i = 0; i < SI_PFP_UCODE_SIZE; i++) + WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_PFP_UCODE_ADDR, 0); + + /* CE */ + fw_data = (const __be32 *)rdev->ce_fw->data; + WREG32(CP_CE_UCODE_ADDR, 0); + for (i = 0; i < SI_CE_UCODE_SIZE; i++) + WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_CE_UCODE_ADDR, 0); + + /* ME */ + fw_data = (const __be32 *)rdev->me_fw->data; + WREG32(CP_ME_RAM_WADDR, 0); + for (i = 0; i < SI_PM4_UCODE_SIZE; i++) + WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_ME_RAM_WADDR, 0); + + WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(CP_CE_UCODE_ADDR, 0); + WREG32(CP_ME_RAM_WADDR, 0); + WREG32(CP_ME_RAM_RADDR, 0); + return 0; +} + +static int si_cp_start(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + int r, i; + + r = radeon_ring_lock(rdev, ring, 7 + 4); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + /* init the CP */ + radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5)); + radeon_ring_write(ring, 0x1); + radeon_ring_write(ring, 0x0); + radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1); + radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + + /* init the CE partitions */ + radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); + radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); + radeon_ring_write(ring, 0xc000); + radeon_ring_write(ring, 0xe000); + radeon_ring_unlock_commit(rdev, ring); + + si_cp_enable(rdev, true); + + r = radeon_ring_lock(rdev, ring, si_default_size + 10); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + + /* setup clear context state */ + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + for (i = 0; i < si_default_size; i++) + radeon_ring_write(ring, si_default_state[i]); + + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); + + /* set clear context state */ + radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(ring, 0x00000316); + radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ + + radeon_ring_unlock_commit(rdev, ring); + + for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) { + ring = &rdev->ring[i]; + r = radeon_ring_lock(rdev, ring, 2); + + /* clear the compute context state */ + radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0)); + radeon_ring_write(ring, 0); + + radeon_ring_unlock_commit(rdev, ring); + } + + return 0; +} + +static void si_cp_fini(struct radeon_device *rdev) +{ + si_cp_enable(rdev, false); + radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); + radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); +} + +static int si_cp_resume(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + u32 tmp; + u32 rb_bufsz; + int r; + + /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */ + WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP | + SOFT_RESET_PA | + SOFT_RESET_VGT | + SOFT_RESET_SPI | + SOFT_RESET_SX)); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + RREG32(GRBM_SOFT_RESET); + + WREG32(CP_SEM_WAIT_TIMER, 0x0); + WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); + + /* Set the write pointer delay */ + WREG32(CP_RB_WPTR_DELAY, 0); + + WREG32(CP_DEBUG, 0); + WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); + + /* ring 0 - compute and gfx */ + /* Set ring buffer size */ + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + rb_bufsz = drm_order(ring->ring_size / 8); + tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; +#ifdef __BIG_ENDIAN + tmp |= BUF_SWAP_32BIT; +#endif + WREG32(CP_RB0_CNTL, tmp); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); + ring->wptr = 0; + WREG32(CP_RB0_WPTR, ring->wptr); + + /* set the wb address wether it's enabled or not */ + WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); + WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); + + if (rdev->wb.enabled) + WREG32(SCRATCH_UMSK, 0xff); + else { + tmp |= RB_NO_UPDATE; + WREG32(SCRATCH_UMSK, 0); + } + + mdelay(1); + WREG32(CP_RB0_CNTL, tmp); + + WREG32(CP_RB0_BASE, ring->gpu_addr >> 8); + + ring->rptr = RREG32(CP_RB0_RPTR); + + /* ring1 - compute only */ + /* Set ring buffer size */ + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + rb_bufsz = drm_order(ring->ring_size / 8); + tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; +#ifdef __BIG_ENDIAN + tmp |= BUF_SWAP_32BIT; +#endif + WREG32(CP_RB1_CNTL, tmp); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA); + ring->wptr = 0; + WREG32(CP_RB1_WPTR, ring->wptr); + + /* set the wb address wether it's enabled or not */ + WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC); + WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF); + + mdelay(1); + WREG32(CP_RB1_CNTL, tmp); + + WREG32(CP_RB1_BASE, ring->gpu_addr >> 8); + + ring->rptr = RREG32(CP_RB1_RPTR); + + /* ring2 - compute only */ + /* Set ring buffer size */ + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + rb_bufsz = drm_order(ring->ring_size / 8); + tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; +#ifdef __BIG_ENDIAN + tmp |= BUF_SWAP_32BIT; +#endif + WREG32(CP_RB2_CNTL, tmp); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA); + ring->wptr = 0; + WREG32(CP_RB2_WPTR, ring->wptr); + + /* set the wb address wether it's enabled or not */ + WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC); + WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF); + + mdelay(1); + WREG32(CP_RB2_CNTL, tmp); + + WREG32(CP_RB2_BASE, ring->gpu_addr >> 8); + + ring->rptr = RREG32(CP_RB2_RPTR); + + /* start the rings */ + si_cp_start(rdev); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true; + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true; + r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); + if (r) { + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; + return r; + } + r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + if (r) { + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + } + r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + if (r) { + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; + } + + return 0; +} + +bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 srbm_status; + u32 grbm_status, grbm_status2; + u32 grbm_status_se0, grbm_status_se1; + struct r100_gpu_lockup *lockup = &rdev->config.si.lockup; + int r; + + srbm_status = RREG32(SRBM_STATUS); + grbm_status = RREG32(GRBM_STATUS); + grbm_status2 = RREG32(GRBM_STATUS2); + grbm_status_se0 = RREG32(GRBM_STATUS_SE0); + grbm_status_se1 = RREG32(GRBM_STATUS_SE1); + if (!(grbm_status & GUI_ACTIVE)) { + r100_gpu_lockup_update(lockup, ring); + return false; + } + /* force CP activities */ + r = radeon_ring_lock(rdev, ring, 2); + if (!r) { + /* PACKET2 NOP */ + radeon_ring_write(ring, 0x80000000); + radeon_ring_write(ring, 0x80000000); + radeon_ring_unlock_commit(rdev, ring); + } + /* XXX deal with CP0,1,2 */ + ring->rptr = RREG32(ring->rptr_reg); + return r100_gpu_cp_is_lockup(rdev, lockup, ring); +} + +static int si_gpu_soft_reset(struct radeon_device *rdev) +{ + struct evergreen_mc_save save; + u32 grbm_reset = 0; + + if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE)) + return 0; + + dev_info(rdev->dev, "GPU softreset \n"); + dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", + RREG32(GRBM_STATUS)); + dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", + RREG32(GRBM_STATUS2)); + dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", + RREG32(GRBM_STATUS_SE0)); + dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", + RREG32(GRBM_STATUS_SE1)); + dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", + RREG32(SRBM_STATUS)); + evergreen_mc_stop(rdev, &save); + if (radeon_mc_wait_for_idle(rdev)) { + dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); + } + /* Disable CP parsing/prefetching */ + WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); + + /* reset all the gfx blocks */ + grbm_reset = (SOFT_RESET_CP | + SOFT_RESET_CB | + SOFT_RESET_DB | + SOFT_RESET_GDS | + SOFT_RESET_PA | + SOFT_RESET_SC | + SOFT_RESET_SPI | + SOFT_RESET_SX | + SOFT_RESET_TC | + SOFT_RESET_TA | + SOFT_RESET_VGT | + SOFT_RESET_IA); + + dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset); + WREG32(GRBM_SOFT_RESET, grbm_reset); + (void)RREG32(GRBM_SOFT_RESET); + udelay(50); + WREG32(GRBM_SOFT_RESET, 0); + (void)RREG32(GRBM_SOFT_RESET); + /* Wait a little for things to settle down */ + udelay(50); + dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", + RREG32(GRBM_STATUS)); + dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", + RREG32(GRBM_STATUS2)); + dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", + RREG32(GRBM_STATUS_SE0)); + dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", + RREG32(GRBM_STATUS_SE1)); + dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", + RREG32(SRBM_STATUS)); + evergreen_mc_resume(rdev, &save); + return 0; +} + +int si_asic_reset(struct radeon_device *rdev) +{ + return si_gpu_soft_reset(rdev); +} + +/* MC */ +static void si_mc_program(struct radeon_device *rdev) +{ + struct evergreen_mc_save save; + u32 tmp; + int i, j; + + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); + + evergreen_mc_stop(rdev, &save); + if (radeon_mc_wait_for_idle(rdev)) { + dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); + } + /* Lockout access through VGA aperture*/ + WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); + /* Update configuration */ + WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, + rdev->mc.vram_start >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, + rdev->mc.vram_end >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, + rdev->vram_scratch.gpu_addr >> 12); + tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; + tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); + WREG32(MC_VM_FB_LOCATION, tmp); + /* XXX double check these! */ + WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); + WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); + WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); + WREG32(MC_VM_AGP_BASE, 0); + WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); + WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); + if (radeon_mc_wait_for_idle(rdev)) { + dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); + } + evergreen_mc_resume(rdev, &save); + /* we need to own VRAM, so turn off the VGA renderer here + * to stop it overwriting our objects */ + rv515_vga_render_disable(rdev); +} + +/* SI MC address space is 40 bits */ +static void si_vram_location(struct radeon_device *rdev, + struct radeon_mc *mc, u64 base) +{ + mc->vram_start = base; + if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) { + dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n"); + mc->real_vram_size = mc->aper_size; + mc->mc_vram_size = mc->aper_size; + } + mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; + dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", + mc->mc_vram_size >> 20, mc->vram_start, + mc->vram_end, mc->real_vram_size >> 20); +} + +static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc) +{ + u64 size_af, size_bf; + + size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align; + size_bf = mc->vram_start & ~mc->gtt_base_align; + if (size_bf > size_af) { + if (mc->gtt_size > size_bf) { + dev_warn(rdev->dev, "limiting GTT\n"); + mc->gtt_size = size_bf; + } + mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size; + } else { + if (mc->gtt_size > size_af) { + dev_warn(rdev->dev, "limiting GTT\n"); + mc->gtt_size = size_af; + } + mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align; + } + mc->gtt_end = mc->gtt_start + mc->gtt_size - 1; + dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n", + mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end); +} + +static void si_vram_gtt_location(struct radeon_device *rdev, + struct radeon_mc *mc) +{ + if (mc->mc_vram_size > 0xFFC0000000ULL) { + /* leave room for at least 1024M GTT */ + dev_warn(rdev->dev, "limiting VRAM\n"); + mc->real_vram_size = 0xFFC0000000ULL; + mc->mc_vram_size = 0xFFC0000000ULL; + } + si_vram_location(rdev, &rdev->mc, 0); + rdev->mc.gtt_base_align = 0; + si_gtt_location(rdev, mc); +} + +static int si_mc_init(struct radeon_device *rdev) +{ + u32 tmp; + int chansize, numchan; + + /* Get VRAM informations */ + rdev->mc.vram_is_ddr = true; + tmp = RREG32(MC_ARB_RAMCFG); + if (tmp & CHANSIZE_OVERRIDE) { + chansize = 16; + } else if (tmp & CHANSIZE_MASK) { + chansize = 64; + } else { + chansize = 32; + } + tmp = RREG32(MC_SHARED_CHMAP); + switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { + case 0: + default: + numchan = 1; + break; + case 1: + numchan = 2; + break; + case 2: + numchan = 4; + break; + case 3: + numchan = 8; + break; + case 4: + numchan = 3; + break; + case 5: + numchan = 6; + break; + case 6: + numchan = 10; + break; + case 7: + numchan = 12; + break; + case 8: + numchan = 16; + break; + } + rdev->mc.vram_width = numchan * chansize; + /* Could aper size report 0 ? */ + rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); + rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); + /* size in MB on si */ + rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; + rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; + rdev->mc.visible_vram_size = rdev->mc.aper_size; + si_vram_gtt_location(rdev, &rdev->mc); + radeon_update_bandwidth_info(rdev); + + return 0; +} + +/* + * GART + */ +void si_pcie_gart_tlb_flush(struct radeon_device *rdev) +{ + /* flush hdp cache */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); + + /* bits 0-15 are the VM contexts0-15 */ + WREG32(VM_INVALIDATE_REQUEST, 1); +} + +int si_pcie_gart_enable(struct radeon_device *rdev) +{ + int r, i; + + if (rdev->gart.robj == NULL) { + dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; + } + r = radeon_gart_table_vram_pin(rdev); + if (r) + return r; + radeon_gart_restore(rdev); + /* Setup TLB control */ + WREG32(MC_VM_MX_L1_TLB_CNTL, + (0xA << 7) | + ENABLE_L1_TLB | + SYSTEM_ACCESS_MODE_NOT_IN_SYS | + ENABLE_ADVANCED_DRIVER_MODEL | + SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7) | + CONTEXT1_IDENTITY_ACCESS_MODE(1)); + WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); + WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | + L2_CACHE_BIGK_FRAGMENT_SIZE(0)); + /* setup context0 */ + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); + WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); + WREG32(VM_CONTEXT0_CNTL2, 0); + WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); + + WREG32(0x15D4, 0); + WREG32(0x15D8, 0); + WREG32(0x15DC, 0); + + /* empty context1-15 */ + /* FIXME start with 1G, once using 2 level pt switch to full + * vm size space + */ + /* set vm size, must be a multiple of 4 */ + WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); + WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE); + for (i = 1; i < 16; i++) { + if (i < 8) + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), + rdev->gart.table_addr >> 12); + else + WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), + rdev->gart.table_addr >> 12); + } + + /* enable context1-15 */ + WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); + WREG32(VM_CONTEXT1_CNTL2, 0); + WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); + + si_pcie_gart_tlb_flush(rdev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(rdev->mc.gtt_size >> 20), + (unsigned long long)rdev->gart.table_addr); + rdev->gart.ready = true; + return 0; +} + +void si_pcie_gart_disable(struct radeon_device *rdev) +{ + /* Disable all tables */ + WREG32(VM_CONTEXT0_CNTL, 0); + WREG32(VM_CONTEXT1_CNTL, 0); + /* Setup TLB control */ + WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | + SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7) | + CONTEXT1_IDENTITY_ACCESS_MODE(1)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | + L2_CACHE_BIGK_FRAGMENT_SIZE(0)); + radeon_gart_table_vram_unpin(rdev); +} + +void si_pcie_gart_fini(struct radeon_device *rdev) +{ + si_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); +} + +/* vm parser */ +static bool si_vm_reg_valid(u32 reg) +{ + /* context regs are fine */ + if (reg >= 0x28000) + return true; + + /* check config regs */ + switch (reg) { + case GRBM_GFX_INDEX: + case VGT_VTX_VECT_EJECT_REG: + case VGT_CACHE_INVALIDATION: + case VGT_ESGS_RING_SIZE: + case VGT_GSVS_RING_SIZE: + case VGT_GS_VERTEX_REUSE: + case VGT_PRIMITIVE_TYPE: + case VGT_INDEX_TYPE: + case VGT_NUM_INDICES: + case VGT_NUM_INSTANCES: + case VGT_TF_RING_SIZE: + case VGT_HS_OFFCHIP_PARAM: + case VGT_TF_MEMORY_BASE: + case PA_CL_ENHANCE: + case PA_SU_LINE_STIPPLE_VALUE: + case PA_SC_LINE_STIPPLE_STATE: + case PA_SC_ENHANCE: + case SQC_CACHES: + case SPI_STATIC_THREAD_MGMT_1: + case SPI_STATIC_THREAD_MGMT_2: + case SPI_STATIC_THREAD_MGMT_3: + case SPI_PS_MAX_WAVE_ID: + case SPI_CONFIG_CNTL: + case SPI_CONFIG_CNTL_1: + case TA_CNTL_AUX: + return true; + default: + DRM_ERROR("Invalid register 0x%x in CS\n", reg); + return false; + } +} + +static int si_vm_packet3_ce_check(struct radeon_device *rdev, + u32 *ib, struct radeon_cs_packet *pkt) +{ + switch (pkt->opcode) { + case PACKET3_NOP: + case PACKET3_SET_BASE: + case PACKET3_SET_CE_DE_COUNTERS: + case PACKET3_LOAD_CONST_RAM: + case PACKET3_WRITE_CONST_RAM: + case PACKET3_WRITE_CONST_RAM_OFFSET: + case PACKET3_DUMP_CONST_RAM: + case PACKET3_INCREMENT_CE_COUNTER: + case PACKET3_WAIT_ON_DE_COUNTER: + case PACKET3_CE_WRITE: + break; + default: + DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode); + return -EINVAL; + } + return 0; +} + +static int si_vm_packet3_gfx_check(struct radeon_device *rdev, + u32 *ib, struct radeon_cs_packet *pkt) +{ + u32 idx = pkt->idx + 1; + u32 idx_value = ib[idx]; + u32 start_reg, end_reg, reg, i; + + switch (pkt->opcode) { + case PACKET3_NOP: + case PACKET3_SET_BASE: + case PACKET3_CLEAR_STATE: + case PACKET3_INDEX_BUFFER_SIZE: + case PACKET3_DISPATCH_DIRECT: + case PACKET3_DISPATCH_INDIRECT: + case PACKET3_ALLOC_GDS: + case PACKET3_WRITE_GDS_RAM: + case PACKET3_ATOMIC_GDS: + case PACKET3_ATOMIC: + case PACKET3_OCCLUSION_QUERY: + case PACKET3_SET_PREDICATION: + case PACKET3_COND_EXEC: + case PACKET3_PRED_EXEC: + case PACKET3_DRAW_INDIRECT: + case PACKET3_DRAW_INDEX_INDIRECT: + case PACKET3_INDEX_BASE: + case PACKET3_DRAW_INDEX_2: + case PACKET3_CONTEXT_CONTROL: + case PACKET3_INDEX_TYPE: + case PACKET3_DRAW_INDIRECT_MULTI: + case PACKET3_DRAW_INDEX_AUTO: + case PACKET3_DRAW_INDEX_IMMD: + case PACKET3_NUM_INSTANCES: + case PACKET3_DRAW_INDEX_MULTI_AUTO: + case PACKET3_STRMOUT_BUFFER_UPDATE: + case PACKET3_DRAW_INDEX_OFFSET_2: + case PACKET3_DRAW_INDEX_MULTI_ELEMENT: + case PACKET3_DRAW_INDEX_INDIRECT_MULTI: + case PACKET3_MPEG_INDEX: + case PACKET3_WAIT_REG_MEM: + case PACKET3_MEM_WRITE: + case PACKET3_PFP_SYNC_ME: + case PACKET3_SURFACE_SYNC: + case PACKET3_EVENT_WRITE: + case PACKET3_EVENT_WRITE_EOP: + case PACKET3_EVENT_WRITE_EOS: + case PACKET3_SET_CONTEXT_REG: + case PACKET3_SET_CONTEXT_REG_INDIRECT: + case PACKET3_SET_SH_REG: + case PACKET3_SET_SH_REG_OFFSET: + case PACKET3_INCREMENT_DE_COUNTER: + case PACKET3_WAIT_ON_CE_COUNTER: + case PACKET3_WAIT_ON_AVAIL_BUFFER: + case PACKET3_ME_WRITE: + break; + case PACKET3_COPY_DATA: + if ((idx_value & 0xf00) == 0) { + reg = ib[idx + 3] * 4; + if (!si_vm_reg_valid(reg)) + return -EINVAL; + } + break; + case PACKET3_WRITE_DATA: + if ((idx_value & 0xf00) == 0) { + start_reg = ib[idx + 1] * 4; + if (idx_value & 0x10000) { + if (!si_vm_reg_valid(start_reg)) + return -EINVAL; + } else { + for (i = 0; i < (pkt->count - 2); i++) { + reg = start_reg + (4 * i); + if (!si_vm_reg_valid(reg)) + return -EINVAL; + } + } + } + break; + case PACKET3_COND_WRITE: + if (idx_value & 0x100) { + reg = ib[idx + 5] * 4; + if (!si_vm_reg_valid(reg)) + return -EINVAL; + } + break; + case PACKET3_COPY_DW: + if (idx_value & 0x2) { + reg = ib[idx + 3] * 4; + if (!si_vm_reg_valid(reg)) + return -EINVAL; + } + break; + case PACKET3_SET_CONFIG_REG: + start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CONFIG_REG_START) || + (start_reg >= PACKET3_SET_CONFIG_REG_END) || + (end_reg >= PACKET3_SET_CONFIG_REG_END)) { + DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + return -EINVAL; + } + for (i = 0; i < pkt->count; i++) { + reg = start_reg + (4 * i); + if (!si_vm_reg_valid(reg)) + return -EINVAL; + } + break; + default: + DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode); + return -EINVAL; + } + return 0; +} + +static int si_vm_packet3_compute_check(struct radeon_device *rdev, + u32 *ib, struct radeon_cs_packet *pkt) +{ + u32 idx = pkt->idx + 1; + u32 idx_value = ib[idx]; + u32 start_reg, reg, i; + + switch (pkt->opcode) { + case PACKET3_NOP: + case PACKET3_SET_BASE: + case PACKET3_CLEAR_STATE: + case PACKET3_DISPATCH_DIRECT: + case PACKET3_DISPATCH_INDIRECT: + case PACKET3_ALLOC_GDS: + case PACKET3_WRITE_GDS_RAM: + case PACKET3_ATOMIC_GDS: + case PACKET3_ATOMIC: + case PACKET3_OCCLUSION_QUERY: + case PACKET3_SET_PREDICATION: + case PACKET3_COND_EXEC: + case PACKET3_PRED_EXEC: + case PACKET3_CONTEXT_CONTROL: + case PACKET3_STRMOUT_BUFFER_UPDATE: + case PACKET3_WAIT_REG_MEM: + case PACKET3_MEM_WRITE: + case PACKET3_PFP_SYNC_ME: + case PACKET3_SURFACE_SYNC: + case PACKET3_EVENT_WRITE: + case PACKET3_EVENT_WRITE_EOP: + case PACKET3_EVENT_WRITE_EOS: + case PACKET3_SET_CONTEXT_REG: + case PACKET3_SET_CONTEXT_REG_INDIRECT: + case PACKET3_SET_SH_REG: + case PACKET3_SET_SH_REG_OFFSET: + case PACKET3_INCREMENT_DE_COUNTER: + case PACKET3_WAIT_ON_CE_COUNTER: + case PACKET3_WAIT_ON_AVAIL_BUFFER: + case PACKET3_ME_WRITE: + break; + case PACKET3_COPY_DATA: + if ((idx_value & 0xf00) == 0) { + reg = ib[idx + 3] * 4; + if (!si_vm_reg_valid(reg)) + return -EINVAL; + } + break; + case PACKET3_WRITE_DATA: + if ((idx_value & 0xf00) == 0) { + start_reg = ib[idx + 1] * 4; + if (idx_value & 0x10000) { + if (!si_vm_reg_valid(start_reg)) + return -EINVAL; + } else { + for (i = 0; i < (pkt->count - 2); i++) { + reg = start_reg + (4 * i); + if (!si_vm_reg_valid(reg)) + return -EINVAL; + } + } + } + break; + case PACKET3_COND_WRITE: + if (idx_value & 0x100) { + reg = ib[idx + 5] * 4; + if (!si_vm_reg_valid(reg)) + return -EINVAL; + } + break; + case PACKET3_COPY_DW: + if (idx_value & 0x2) { + reg = ib[idx + 3] * 4; + if (!si_vm_reg_valid(reg)) + return -EINVAL; + } + break; + default: + DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode); + return -EINVAL; + } + return 0; +} + +int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) +{ + int ret = 0; + u32 idx = 0; + struct radeon_cs_packet pkt; + + do { + pkt.idx = idx; + pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]); + pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]); + pkt.one_reg_wr = 0; + switch (pkt.type) { + case PACKET_TYPE0: + dev_err(rdev->dev, "Packet0 not allowed!\n"); + ret = -EINVAL; + break; + case PACKET_TYPE2: + idx += 1; + break; + case PACKET_TYPE3: + pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]); + if (ib->is_const_ib) + ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt); + else { + switch (ib->fence->ring) { + case RADEON_RING_TYPE_GFX_INDEX: + ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt); + break; + case CAYMAN_RING_TYPE_CP1_INDEX: + case CAYMAN_RING_TYPE_CP2_INDEX: + ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt); + break; + default: + dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring); + ret = -EINVAL; + break; + } + } + idx += pkt.count + 2; + break; + default: + dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type); + ret = -EINVAL; + break; + } + if (ret) + break; + } while (idx < ib->length_dw); + + return ret; +} + +/* + * vm + */ +int si_vm_init(struct radeon_device *rdev) +{ + /* number of VMs */ + rdev->vm_manager.nvm = 16; + /* base offset of vram pages */ + rdev->vm_manager.vram_base_offset = 0; + + return 0; +} + +void si_vm_fini(struct radeon_device *rdev) +{ +} + +int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) +{ + if (id < 8) + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12); + else + WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2), + vm->pt_gpu_addr >> 12); + /* flush hdp cache */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); + /* bits 0-15 are the VM contexts0-15 */ + WREG32(VM_INVALIDATE_REQUEST, 1 << id); + return 0; +} + +void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) +{ + if (vm->id < 8) + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0); + else + WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2), 0); + /* flush hdp cache */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); + /* bits 0-15 are the VM contexts0-15 */ + WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id); +} + +void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm) +{ + if (vm->id == -1) + return; + + /* flush hdp cache */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); + /* bits 0-15 are the VM contexts0-15 */ + WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id); +} + +/* + * RLC + */ +void si_rlc_fini(struct radeon_device *rdev) +{ + int r; + + /* save restore block */ + if (rdev->rlc.save_restore_obj) { + r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false); + if (unlikely(r != 0)) + dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r); + radeon_bo_unpin(rdev->rlc.save_restore_obj); + radeon_bo_unreserve(rdev->rlc.save_restore_obj); + + radeon_bo_unref(&rdev->rlc.save_restore_obj); + rdev->rlc.save_restore_obj = NULL; + } + + /* clear state block */ + if (rdev->rlc.clear_state_obj) { + r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false); + if (unlikely(r != 0)) + dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r); + radeon_bo_unpin(rdev->rlc.clear_state_obj); + radeon_bo_unreserve(rdev->rlc.clear_state_obj); + + radeon_bo_unref(&rdev->rlc.clear_state_obj); + rdev->rlc.clear_state_obj = NULL; + } +} + +int si_rlc_init(struct radeon_device *rdev) +{ + int r; + + /* save restore block */ + if (rdev->rlc.save_restore_obj == NULL) { + r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, &rdev->rlc.save_restore_obj); + if (r) { + dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r); + return r; + } + } + + r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false); + if (unlikely(r != 0)) { + si_rlc_fini(rdev); + return r; + } + r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->rlc.save_restore_gpu_addr); + if (r) { + radeon_bo_unreserve(rdev->rlc.save_restore_obj); + dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r); + si_rlc_fini(rdev); + return r; + } + + /* clear state block */ + if (rdev->rlc.clear_state_obj == NULL) { + r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, &rdev->rlc.clear_state_obj); + if (r) { + dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); + si_rlc_fini(rdev); + return r; + } + } + r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false); + if (unlikely(r != 0)) { + si_rlc_fini(rdev); + return r; + } + r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->rlc.clear_state_gpu_addr); + if (r) { + + radeon_bo_unreserve(rdev->rlc.clear_state_obj); + dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r); + si_rlc_fini(rdev); + return r; + } + + return 0; +} + +static void si_rlc_stop(struct radeon_device *rdev) +{ + WREG32(RLC_CNTL, 0); +} + +static void si_rlc_start(struct radeon_device *rdev) +{ + WREG32(RLC_CNTL, RLC_ENABLE); +} + +static int si_rlc_resume(struct radeon_device *rdev) +{ + u32 i; + const __be32 *fw_data; + + if (!rdev->rlc_fw) + return -EINVAL; + + si_rlc_stop(rdev); + + WREG32(RLC_RL_BASE, 0); + WREG32(RLC_RL_SIZE, 0); + WREG32(RLC_LB_CNTL, 0); + WREG32(RLC_LB_CNTR_MAX, 0xffffffff); + WREG32(RLC_LB_CNTR_INIT, 0); + + WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); + WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8); + + WREG32(RLC_MC_CNTL, 0); + WREG32(RLC_UCODE_CNTL, 0); + + fw_data = (const __be32 *)rdev->rlc_fw->data; + for (i = 0; i < SI_RLC_UCODE_SIZE; i++) { + WREG32(RLC_UCODE_ADDR, i); + WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++)); + } + WREG32(RLC_UCODE_ADDR, 0); + + si_rlc_start(rdev); + + return 0; +} + +static void si_enable_interrupts(struct radeon_device *rdev) +{ + u32 ih_cntl = RREG32(IH_CNTL); + u32 ih_rb_cntl = RREG32(IH_RB_CNTL); + + ih_cntl |= ENABLE_INTR; + ih_rb_cntl |= IH_RB_ENABLE; + WREG32(IH_CNTL, ih_cntl); + WREG32(IH_RB_CNTL, ih_rb_cntl); + rdev->ih.enabled = true; +} + +static void si_disable_interrupts(struct radeon_device *rdev) +{ + u32 ih_rb_cntl = RREG32(IH_RB_CNTL); + u32 ih_cntl = RREG32(IH_CNTL); + + ih_rb_cntl &= ~IH_RB_ENABLE; + ih_cntl &= ~ENABLE_INTR; + WREG32(IH_RB_CNTL, ih_rb_cntl); + WREG32(IH_CNTL, ih_cntl); + /* set rptr, wptr to 0 */ + WREG32(IH_RB_RPTR, 0); + WREG32(IH_RB_WPTR, 0); + rdev->ih.enabled = false; + rdev->ih.wptr = 0; + rdev->ih.rptr = 0; +} + +static void si_disable_interrupt_state(struct radeon_device *rdev) +{ + u32 tmp; + + WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + WREG32(CP_INT_CNTL_RING1, 0); + WREG32(CP_INT_CNTL_RING2, 0); + WREG32(GRBM_INT_CNTL, 0); + WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); + WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); + if (rdev->num_crtc >= 4) { + WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); + WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); + } + if (rdev->num_crtc >= 6) { + WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); + WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); + } + + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); + if (rdev->num_crtc >= 4) { + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); + } + if (rdev->num_crtc >= 6) { + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); + } + + WREG32(DACA_AUTODETECT_INT_CONTROL, 0); + + tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY; + WREG32(DC_HPD1_INT_CONTROL, tmp); + tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY; + WREG32(DC_HPD2_INT_CONTROL, tmp); + tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY; + WREG32(DC_HPD3_INT_CONTROL, tmp); + tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY; + WREG32(DC_HPD4_INT_CONTROL, tmp); + tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY; + WREG32(DC_HPD5_INT_CONTROL, tmp); + tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY; + WREG32(DC_HPD6_INT_CONTROL, tmp); + +} + +static int si_irq_init(struct radeon_device *rdev) +{ + int ret = 0; + int rb_bufsz; + u32 interrupt_cntl, ih_cntl, ih_rb_cntl; + + /* allocate ring */ + ret = r600_ih_ring_alloc(rdev); + if (ret) + return ret; + + /* disable irqs */ + si_disable_interrupts(rdev); + + /* init rlc */ + ret = si_rlc_resume(rdev); + if (ret) { + r600_ih_ring_fini(rdev); + return ret; + } + + /* setup interrupt control */ + /* set dummy read address to ring address */ + WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + interrupt_cntl = RREG32(INTERRUPT_CNTL); + /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi + * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; + /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; + WREG32(INTERRUPT_CNTL, interrupt_cntl); + + WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); + rb_bufsz = drm_order(rdev->ih.ring_size / 4); + + ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | + IH_WPTR_OVERFLOW_CLEAR | + (rb_bufsz << 1)); + + if (rdev->wb.enabled) + ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE; + + /* set the writeback address whether it's enabled or not */ + WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC); + WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF); + + WREG32(IH_RB_CNTL, ih_rb_cntl); + + /* set rptr, wptr to 0 */ + WREG32(IH_RB_RPTR, 0); + WREG32(IH_RB_WPTR, 0); + + /* Default settings for IH_CNTL (disabled at first) */ + ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0); + /* RPTR_REARM only works if msi's are enabled */ + if (rdev->msi_enabled) + ih_cntl |= RPTR_REARM; + WREG32(IH_CNTL, ih_cntl); + + /* force the active interrupt state to all disabled */ + si_disable_interrupt_state(rdev); + + /* enable irqs */ + si_enable_interrupts(rdev); + + return ret; +} + +int si_irq_set(struct radeon_device *rdev) +{ + u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE; + u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0; + u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; + u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; + u32 grbm_int_cntl = 0; + u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; + + if (!rdev->irq.installed) { + WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); + return -EINVAL; + } + /* don't enable anything if the ih is disabled */ + if (!rdev->ih.enabled) { + si_disable_interrupts(rdev); + /* force the active interrupt state to all disabled */ + si_disable_interrupt_state(rdev); + return 0; + } + + hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; + hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; + hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN; + hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN; + hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; + hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; + + /* enable CP interrupts on all rings */ + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + DRM_DEBUG("si_irq_set: sw int gfx\n"); + cp_int_cntl |= TIME_STAMP_INT_ENABLE; + } + if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) { + DRM_DEBUG("si_irq_set: sw int cp1\n"); + cp_int_cntl1 |= TIME_STAMP_INT_ENABLE; + } + if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) { + DRM_DEBUG("si_irq_set: sw int cp2\n"); + cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; + } + if (rdev->irq.crtc_vblank_int[0] || + rdev->irq.pflip[0]) { + DRM_DEBUG("si_irq_set: vblank 0\n"); + crtc1 |= VBLANK_INT_MASK; + } + if (rdev->irq.crtc_vblank_int[1] || + rdev->irq.pflip[1]) { + DRM_DEBUG("si_irq_set: vblank 1\n"); + crtc2 |= VBLANK_INT_MASK; + } + if (rdev->irq.crtc_vblank_int[2] || + rdev->irq.pflip[2]) { + DRM_DEBUG("si_irq_set: vblank 2\n"); + crtc3 |= VBLANK_INT_MASK; + } + if (rdev->irq.crtc_vblank_int[3] || + rdev->irq.pflip[3]) { + DRM_DEBUG("si_irq_set: vblank 3\n"); + crtc4 |= VBLANK_INT_MASK; + } + if (rdev->irq.crtc_vblank_int[4] || + rdev->irq.pflip[4]) { + DRM_DEBUG("si_irq_set: vblank 4\n"); + crtc5 |= VBLANK_INT_MASK; + } + if (rdev->irq.crtc_vblank_int[5] || + rdev->irq.pflip[5]) { + DRM_DEBUG("si_irq_set: vblank 5\n"); + crtc6 |= VBLANK_INT_MASK; + } + if (rdev->irq.hpd[0]) { + DRM_DEBUG("si_irq_set: hpd 1\n"); + hpd1 |= DC_HPDx_INT_EN; + } + if (rdev->irq.hpd[1]) { + DRM_DEBUG("si_irq_set: hpd 2\n"); + hpd2 |= DC_HPDx_INT_EN; + } + if (rdev->irq.hpd[2]) { + DRM_DEBUG("si_irq_set: hpd 3\n"); + hpd3 |= DC_HPDx_INT_EN; + } + if (rdev->irq.hpd[3]) { + DRM_DEBUG("si_irq_set: hpd 4\n"); + hpd4 |= DC_HPDx_INT_EN; + } + if (rdev->irq.hpd[4]) { + DRM_DEBUG("si_irq_set: hpd 5\n"); + hpd5 |= DC_HPDx_INT_EN; + } + if (rdev->irq.hpd[5]) { + DRM_DEBUG("si_irq_set: hpd 6\n"); + hpd6 |= DC_HPDx_INT_EN; + } + if (rdev->irq.gui_idle) { + DRM_DEBUG("gui idle\n"); + grbm_int_cntl |= GUI_IDLE_INT_ENABLE; + } + + WREG32(CP_INT_CNTL_RING0, cp_int_cntl); + WREG32(CP_INT_CNTL_RING1, cp_int_cntl1); + WREG32(CP_INT_CNTL_RING2, cp_int_cntl2); + + WREG32(GRBM_INT_CNTL, grbm_int_cntl); + + WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); + WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2); + if (rdev->num_crtc >= 4) { + WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3); + WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4); + } + if (rdev->num_crtc >= 6) { + WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5); + WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); + } + + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2); + if (rdev->num_crtc >= 4) { + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4); + } + if (rdev->num_crtc >= 6) { + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6); + } + + WREG32(DC_HPD1_INT_CONTROL, hpd1); + WREG32(DC_HPD2_INT_CONTROL, hpd2); + WREG32(DC_HPD3_INT_CONTROL, hpd3); + WREG32(DC_HPD4_INT_CONTROL, hpd4); + WREG32(DC_HPD5_INT_CONTROL, hpd5); + WREG32(DC_HPD6_INT_CONTROL, hpd6); + + return 0; +} + +static inline void si_irq_ack(struct radeon_device *rdev) +{ + u32 tmp; + + rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS); + rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); + rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); + rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); + rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); + rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); + rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET); + rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET); + if (rdev->num_crtc >= 4) { + rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET); + rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET); + } + if (rdev->num_crtc >= 6) { + rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET); + rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET); + } + + if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) + WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); + if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) + WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK); + if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) + WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK); + if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) + WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); + + if (rdev->num_crtc >= 4) { + if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) + WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) + WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK); + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) + WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK); + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) + WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK); + } + + if (rdev->num_crtc >= 6) { + if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) + WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) + WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK); + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) + WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK); + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) + WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK); + } + + if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { + tmp = RREG32(DC_HPD1_INT_CONTROL); + tmp |= DC_HPDx_INT_ACK; + WREG32(DC_HPD1_INT_CONTROL, tmp); + } + if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { + tmp = RREG32(DC_HPD2_INT_CONTROL); + tmp |= DC_HPDx_INT_ACK; + WREG32(DC_HPD2_INT_CONTROL, tmp); + } + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { + tmp = RREG32(DC_HPD3_INT_CONTROL); + tmp |= DC_HPDx_INT_ACK; + WREG32(DC_HPD3_INT_CONTROL, tmp); + } + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { + tmp = RREG32(DC_HPD4_INT_CONTROL); + tmp |= DC_HPDx_INT_ACK; + WREG32(DC_HPD4_INT_CONTROL, tmp); + } + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { + tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp |= DC_HPDx_INT_ACK; + WREG32(DC_HPD5_INT_CONTROL, tmp); + } + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { + tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp |= DC_HPDx_INT_ACK; + WREG32(DC_HPD6_INT_CONTROL, tmp); + } +} + +static void si_irq_disable(struct radeon_device *rdev) +{ + si_disable_interrupts(rdev); + /* Wait and acknowledge irq */ + mdelay(1); + si_irq_ack(rdev); + si_disable_interrupt_state(rdev); +} + +static void si_irq_suspend(struct radeon_device *rdev) +{ + si_irq_disable(rdev); + si_rlc_stop(rdev); +} + +static void si_irq_fini(struct radeon_device *rdev) +{ + si_irq_suspend(rdev); + r600_ih_ring_fini(rdev); +} + +static inline u32 si_get_ih_wptr(struct radeon_device *rdev) +{ + u32 wptr, tmp; + + if (rdev->wb.enabled) + wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); + else + wptr = RREG32(IH_RB_WPTR); + + if (wptr & RB_OVERFLOW) { + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 16). Hopefully + * this should allow us to catchup. + */ + dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n", + wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask); + rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask; + tmp = RREG32(IH_RB_CNTL); + tmp |= IH_WPTR_OVERFLOW_CLEAR; + WREG32(IH_RB_CNTL, tmp); + } + return (wptr & rdev->ih.ptr_mask); +} + +/* SI IV Ring + * Each IV ring entry is 128 bits: + * [7:0] - interrupt source id + * [31:8] - reserved + * [59:32] - interrupt source data + * [63:60] - reserved + * [71:64] - RINGID + * [79:72] - VMID + * [127:80] - reserved + */ +int si_irq_process(struct radeon_device *rdev) +{ + u32 wptr; + u32 rptr; + u32 src_id, src_data, ring_id; + u32 ring_index; + unsigned long flags; + bool queue_hotplug = false; + + if (!rdev->ih.enabled || rdev->shutdown) + return IRQ_NONE; + + wptr = si_get_ih_wptr(rdev); + rptr = rdev->ih.rptr; + DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + + spin_lock_irqsave(&rdev->ih.lock, flags); + if (rptr == wptr) { + spin_unlock_irqrestore(&rdev->ih.lock, flags); + return IRQ_NONE; + } +restart_ih: + /* Order reading of wptr vs. reading of IH ring data */ + rmb(); + + /* display interrupts */ + si_irq_ack(rdev); + + rdev->ih.wptr = wptr; + while (rptr != wptr) { + /* wptr/rptr are in bytes! */ + ring_index = rptr / 4; + src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; + src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; + ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; + + switch (src_id) { + case 1: /* D1 vblank/vline */ + switch (src_data) { + case 0: /* D1 vblank */ + if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[0]) + radeon_crtc_handle_flip(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + } + break; + case 1: /* D1 vline */ + if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + } + break; + default: + DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + break; + case 2: /* D2 vblank/vline */ + switch (src_data) { + case 0: /* D2 vblank */ + if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[1]) + radeon_crtc_handle_flip(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + } + break; + case 1: /* D2 vline */ + if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + } + break; + default: + DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + break; + case 3: /* D3 vblank/vline */ + switch (src_data) { + case 0: /* D3 vblank */ + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[2]) + radeon_crtc_handle_flip(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + } + break; + case 1: /* D3 vline */ + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + } + break; + default: + DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + break; + case 4: /* D4 vblank/vline */ + switch (src_data) { + case 0: /* D4 vblank */ + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[3]) + radeon_crtc_handle_flip(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + } + break; + case 1: /* D4 vline */ + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + } + break; + default: + DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + break; + case 5: /* D5 vblank/vline */ + switch (src_data) { + case 0: /* D5 vblank */ + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[4]) + radeon_crtc_handle_flip(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + } + break; + case 1: /* D5 vline */ + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + } + break; + default: + DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + break; + case 6: /* D6 vblank/vline */ + switch (src_data) { + case 0: /* D6 vblank */ + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[5]) + radeon_crtc_handle_flip(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + } + break; + case 1: /* D6 vline */ + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + } + break; + default: + DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + break; + case 42: /* HPD hotplug */ + switch (src_data) { + case 0: + if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + } + break; + case 1: + if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + } + break; + case 2: + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + } + break; + case 3: + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + } + break; + case 4: + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + } + break; + case 5: + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + } + break; + default: + DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + break; + case 176: /* RINGID0 CP_INT */ + radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); + break; + case 177: /* RINGID1 CP_INT */ + radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); + break; + case 178: /* RINGID2 CP_INT */ + radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); + break; + case 181: /* CP EOP event */ + DRM_DEBUG("IH: CP EOP\n"); + switch (ring_id) { + case 0: + radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); + break; + case 1: + radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); + break; + case 2: + radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); + break; + } + break; + case 233: /* GUI IDLE */ + DRM_DEBUG("IH: GUI idle\n"); + rdev->pm.gui_idle = true; + wake_up(&rdev->irq.idle_queue); + break; + default: + DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + + /* wptr/rptr are in bytes! */ + rptr += 16; + rptr &= rdev->ih.ptr_mask; + } + /* make sure wptr hasn't changed while processing */ + wptr = si_get_ih_wptr(rdev); + if (wptr != rdev->ih.wptr) + goto restart_ih; + if (queue_hotplug) + schedule_work(&rdev->hotplug_work); + rdev->ih.rptr = rptr; + WREG32(IH_RB_RPTR, rdev->ih.rptr); + spin_unlock_irqrestore(&rdev->ih.lock, flags); + return IRQ_HANDLED; +} + +/* + * startup/shutdown callbacks + */ +static int si_startup(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + int r; + + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || + !rdev->rlc_fw || !rdev->mc_fw) { + r = si_init_microcode(rdev); + if (r) { + DRM_ERROR("Failed to load firmware!\n"); + return r; + } + } + + r = si_mc_load_microcode(rdev); + if (r) { + DRM_ERROR("Failed to load MC firmware!\n"); + return r; + } + + r = r600_vram_scratch_init(rdev); + if (r) + return r; + + si_mc_program(rdev); + r = si_pcie_gart_enable(rdev); + if (r) + return r; + si_gpu_init(rdev); + +#if 0 + r = evergreen_blit_init(rdev); + if (r) { + r600_blit_fini(rdev); + rdev->asic->copy = NULL; + dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); + } +#endif + /* allocate rlc buffers */ + r = si_rlc_init(rdev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + + /* Enable IRQ */ + r = si_irq_init(rdev); + if (r) { + DRM_ERROR("radeon: IH init failed (%d).\n", r); + radeon_irq_kms_fini(rdev); + return r; + } + si_irq_set(rdev); + + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, + CP_RB0_RPTR, CP_RB0_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, + CP_RB1_RPTR, CP_RB1_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, + CP_RB2_RPTR, CP_RB2_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (r) + return r; + + r = si_cp_load_microcode(rdev); + if (r) + return r; + r = si_cp_resume(rdev); + if (r) + return r; + + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); + if (r) { + DRM_ERROR("radeon: failed testing IB (%d) on CP ring 0\n", r); + rdev->accel_working = false; + return r; + } + + r = radeon_ib_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + if (r) { + DRM_ERROR("radeon: failed testing IB (%d) on CP ring 1\n", r); + rdev->accel_working = false; + return r; + } + + r = radeon_ib_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + if (r) { + DRM_ERROR("radeon: failed testing IB (%d) on CP ring 2\n", r); + rdev->accel_working = false; + return r; + } + + r = radeon_vm_manager_start(rdev); + if (r) + return r; + + return 0; +} + +int si_resume(struct radeon_device *rdev) +{ + int r; + + /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw, + * posting will perform necessary task to bring back GPU into good + * shape. + */ + /* post card */ + atom_asic_init(rdev->mode_info.atom_context); + + rdev->accel_working = true; + r = si_startup(rdev); + if (r) { + DRM_ERROR("si startup failed on resume\n"); + rdev->accel_working = false; + return r; + } + + return r; + +} + +int si_suspend(struct radeon_device *rdev) +{ + /* FIXME: we should wait for ring to be empty */ + radeon_ib_pool_suspend(rdev); + radeon_vm_manager_suspend(rdev); +#if 0 + r600_blit_suspend(rdev); +#endif + si_cp_enable(rdev, false); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; + si_irq_suspend(rdev); + radeon_wb_disable(rdev); + si_pcie_gart_disable(rdev); + return 0; +} + +/* Plan is to move initialization in that function and use + * helper function so that radeon_device_init pretty much + * do nothing more than calling asic specific function. This + * should also allow to remove a bunch of callback function + * like vram_info. + */ +int si_init(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + int r; + + /* This don't do much */ + r = radeon_gem_init(rdev); + if (r) + return r; + /* Read BIOS */ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; + } + /* Must be an ATOMBIOS */ + if (!rdev->is_atom_bios) { + dev_err(rdev->dev, "Expecting atombios for cayman GPU\n"); + return -EINVAL; + } + r = radeon_atombios_init(rdev); + if (r) + return r; + + /* Post card if necessary */ + if (!radeon_card_posted(rdev)) { + if (!rdev->bios) { + dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); + return -EINVAL; + } + DRM_INFO("GPU not posted. posting now...\n"); + atom_asic_init(rdev->mode_info.atom_context); + } + /* Initialize scratch registers */ + si_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); + /* Initialize clocks */ + radeon_get_clock_info(rdev->ddev); + + /* Fence driver */ + r = radeon_fence_driver_init(rdev); + if (r) + return r; + + /* initialize memory controller */ + r = si_mc_init(rdev); + if (r) + return r; + /* Memory manager */ + r = radeon_bo_init(rdev); + if (r) + return r; + + r = radeon_irq_kms_init(rdev); + if (r) + return r; + + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 1024 * 1024); + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 1024 * 1024); + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 1024 * 1024); + + rdev->ih.ring_obj = NULL; + r600_ih_ring_init(rdev, 64 * 1024); + + r = r600_pcie_gart_init(rdev); + if (r) + return r; + + r = radeon_ib_pool_init(rdev); + rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = radeon_vm_manager_init(rdev); + if (r) { + dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); + } + + r = si_startup(rdev); + if (r) { + dev_err(rdev->dev, "disabling GPU acceleration\n"); + si_cp_fini(rdev); + si_irq_fini(rdev); + si_rlc_fini(rdev); + radeon_wb_fini(rdev); + r100_ib_fini(rdev); + radeon_vm_manager_fini(rdev); + radeon_irq_kms_fini(rdev); + si_pcie_gart_fini(rdev); + rdev->accel_working = false; + } + + /* Don't start up if the MC ucode is missing. + * The default clocks and voltages before the MC ucode + * is loaded are not suffient for advanced operations. + */ + if (!rdev->mc_fw) { + DRM_ERROR("radeon: MC ucode required for NI+.\n"); + return -EINVAL; + } + + return 0; +} + +void si_fini(struct radeon_device *rdev) +{ +#if 0 + r600_blit_fini(rdev); +#endif + si_cp_fini(rdev); + si_irq_fini(rdev); + si_rlc_fini(rdev); + radeon_wb_fini(rdev); + radeon_vm_manager_fini(rdev); + r100_ib_fini(rdev); + radeon_irq_kms_fini(rdev); + si_pcie_gart_fini(rdev); + r600_vram_scratch_fini(rdev); + radeon_gem_fini(rdev); + radeon_semaphore_driver_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_bo_fini(rdev); + radeon_atombios_fini(rdev); + kfree(rdev->bios); + rdev->bios = NULL; +} + diff --git a/drivers/gpu/drm/radeon/si_blit_shaders.c b/drivers/gpu/drm/radeon/si_blit_shaders.c new file mode 100644 index 000000000000..a7124b483adf --- /dev/null +++ b/drivers/gpu/drm/radeon/si_blit_shaders.c @@ -0,0 +1,252 @@ +/* + * Copyright 2011 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + */ + +#include <linux/types.h> +#include <linux/kernel.h> + +const u32 si_default_state[] = +{ + 0xc0066900, + 0x00000000, + 0x00000060, /* DB_RENDER_CONTROL */ + 0x00000000, /* DB_COUNT_CONTROL */ + 0x00000000, /* DB_DEPTH_VIEW */ + 0x0000002a, /* DB_RENDER_OVERRIDE */ + 0x00000000, /* DB_RENDER_OVERRIDE2 */ + 0x00000000, /* DB_HTILE_DATA_BASE */ + + 0xc0046900, + 0x00000008, + 0x00000000, /* DB_DEPTH_BOUNDS_MIN */ + 0x00000000, /* DB_DEPTH_BOUNDS_MAX */ + 0x00000000, /* DB_STENCIL_CLEAR */ + 0x00000000, /* DB_DEPTH_CLEAR */ + + 0xc0036900, + 0x0000000f, + 0x00000000, /* DB_DEPTH_INFO */ + 0x00000000, /* DB_Z_INFO */ + 0x00000000, /* DB_STENCIL_INFO */ + + 0xc0016900, + 0x00000080, + 0x00000000, /* PA_SC_WINDOW_OFFSET */ + + 0xc00d6900, + 0x00000083, + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0xaaaaaaaa, /* PA_SC_EDGERULE */ + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ + 0x0000000f, /* CB_TARGET_MASK */ + 0x0000000f, /* CB_SHADER_MASK */ + + 0xc0226900, + 0x00000094, + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ + + 0xc0026900, + 0x000000d9, + 0x00000000, /* CP_RINGID */ + 0x00000000, /* CP_VMID */ + + 0xc0046900, + 0x00000100, + 0xffffffff, /* VGT_MAX_VTX_INDX */ + 0x00000000, /* VGT_MIN_VTX_INDX */ + 0x00000000, /* VGT_INDX_OFFSET */ + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ + + 0xc0046900, + 0x00000105, + 0x00000000, /* CB_BLEND_RED */ + 0x00000000, /* CB_BLEND_GREEN */ + 0x00000000, /* CB_BLEND_BLUE */ + 0x00000000, /* CB_BLEND_ALPHA */ + + 0xc0016900, + 0x000001e0, + 0x00000000, /* CB_BLEND0_CONTROL */ + + 0xc00e6900, + 0x00000200, + 0x00000000, /* DB_DEPTH_CONTROL */ + 0x00000000, /* DB_EQAA */ + 0x00cc0010, /* CB_COLOR_CONTROL */ + 0x00000210, /* DB_SHADER_CONTROL */ + 0x00010000, /* PA_CL_CLIP_CNTL */ + 0x00000004, /* PA_SU_SC_MODE_CNTL */ + 0x00000100, /* PA_CL_VTE_CNTL */ + 0x00000000, /* PA_CL_VS_OUT_CNTL */ + 0x00000000, /* PA_CL_NANINF_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0116900, + 0x00000280, + 0x00000000, /* PA_SU_POINT_SIZE */ + 0x00000000, /* PA_SU_POINT_MINMAX */ + 0x00000008, /* PA_SU_LINE_CNTL */ + 0x00000000, /* PA_SC_LINE_STIPPLE */ + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ + 0x00000000, /* VGT_HOS_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* VGT_GS_MODE */ + + 0xc0026900, + 0x00000292, + 0x00000000, /* PA_SC_MODE_CNTL_0 */ + 0x00000000, /* PA_SC_MODE_CNTL_1 */ + + 0xc0016900, + 0x000002a1, + 0x00000000, /* VGT_PRIMITIVEID_EN */ + + 0xc0016900, + 0x000002a5, + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ + + 0xc0026900, + 0x000002a8, + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ + 0x00000000, + + 0xc0026900, + 0x000002ad, + 0x00000000, /* VGT_REUSE_OFF */ + 0x00000000, + + 0xc0016900, + 0x000002d5, + 0x00000000, /* VGT_SHADER_STAGES_EN */ + + 0xc0016900, + 0x000002dc, + 0x0000aa00, /* DB_ALPHA_TO_MASK */ + + 0xc0066900, + 0x000002de, + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0026900, + 0x000002e5, + 0x00000000, /* VGT_STRMOUT_CONFIG */ + 0x00000000, + + 0xc01b6900, + 0x000002f5, + 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ + 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ + 0x00000000, /* PA_SC_LINE_CNTL */ + 0x00000000, /* PA_SC_AA_CONFIG */ + 0x00000005, /* PA_SU_VTX_CNTL */ + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ + 0xffffffff, + + 0xc0026900, + 0x00000316, + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + 0x00000010, /* */ +}; + +const u32 si_default_size = ARRAY_SIZE(si_default_state); diff --git a/drivers/gpu/drm/radeon/si_blit_shaders.h b/drivers/gpu/drm/radeon/si_blit_shaders.h new file mode 100644 index 000000000000..c739e51e3961 --- /dev/null +++ b/drivers/gpu/drm/radeon/si_blit_shaders.h @@ -0,0 +1,32 @@ +/* + * Copyright 2011 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SI_BLIT_SHADERS_H +#define SI_BLIT_SHADERS_H + +extern const u32 si_default_state[]; + +extern const u32 si_default_size; + +#endif diff --git a/drivers/gpu/drm/radeon/si_reg.h b/drivers/gpu/drm/radeon/si_reg.h new file mode 100644 index 000000000000..eda938a7cb6e --- /dev/null +++ b/drivers/gpu/drm/radeon/si_reg.h @@ -0,0 +1,33 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#ifndef __SI_REG_H__ +#define __SI_REG_H__ + +/* SI */ +#define SI_DC_GPIO_HPD_MASK 0x65b0 +#define SI_DC_GPIO_HPD_A 0x65b4 +#define SI_DC_GPIO_HPD_EN 0x65b8 +#define SI_DC_GPIO_HPD_Y 0x65bc + +#endif diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h new file mode 100644 index 000000000000..53ea2c42dbd6 --- /dev/null +++ b/drivers/gpu/drm/radeon/sid.h @@ -0,0 +1,886 @@ +/* + * Copyright 2011 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#ifndef SI_H +#define SI_H + +#define CG_MULT_THERMAL_STATUS 0x714 +#define ASIC_MAX_TEMP(x) ((x) << 0) +#define ASIC_MAX_TEMP_MASK 0x000001ff +#define ASIC_MAX_TEMP_SHIFT 0 +#define CTF_TEMP(x) ((x) << 9) +#define CTF_TEMP_MASK 0x0003fe00 +#define CTF_TEMP_SHIFT 9 + +#define SI_MAX_SH_GPRS 256 +#define SI_MAX_TEMP_GPRS 16 +#define SI_MAX_SH_THREADS 256 +#define SI_MAX_SH_STACK_ENTRIES 4096 +#define SI_MAX_FRC_EOV_CNT 16384 +#define SI_MAX_BACKENDS 8 +#define SI_MAX_BACKENDS_MASK 0xFF +#define SI_MAX_BACKENDS_PER_SE_MASK 0x0F +#define SI_MAX_SIMDS 12 +#define SI_MAX_SIMDS_MASK 0x0FFF +#define SI_MAX_SIMDS_PER_SE_MASK 0x00FF +#define SI_MAX_PIPES 8 +#define SI_MAX_PIPES_MASK 0xFF +#define SI_MAX_PIPES_PER_SIMD_MASK 0x3F +#define SI_MAX_LDS_NUM 0xFFFF +#define SI_MAX_TCC 16 +#define SI_MAX_TCC_MASK 0xFFFF + +#define VGA_HDP_CONTROL 0x328 +#define VGA_MEMORY_DISABLE (1 << 4) + +#define DMIF_ADDR_CONFIG 0xBD4 + +#define SRBM_STATUS 0xE50 + +#define CC_SYS_RB_BACKEND_DISABLE 0xe80 +#define GC_USER_SYS_RB_BACKEND_DISABLE 0xe84 + +#define VM_L2_CNTL 0x1400 +#define ENABLE_L2_CACHE (1 << 0) +#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) +#define L2_CACHE_PTE_ENDIAN_SWAP_MODE(x) ((x) << 2) +#define L2_CACHE_PDE_ENDIAN_SWAP_MODE(x) ((x) << 4) +#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9) +#define ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE (1 << 10) +#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 15) +#define CONTEXT1_IDENTITY_ACCESS_MODE(x) (((x) & 3) << 19) +#define VM_L2_CNTL2 0x1404 +#define INVALIDATE_ALL_L1_TLBS (1 << 0) +#define INVALIDATE_L2_CACHE (1 << 1) +#define INVALIDATE_CACHE_MODE(x) ((x) << 26) +#define INVALIDATE_PTE_AND_PDE_CACHES 0 +#define INVALIDATE_ONLY_PTE_CACHES 1 +#define INVALIDATE_ONLY_PDE_CACHES 2 +#define VM_L2_CNTL3 0x1408 +#define BANK_SELECT(x) ((x) << 0) +#define L2_CACHE_UPDATE_MODE(x) ((x) << 6) +#define L2_CACHE_BIGK_FRAGMENT_SIZE(x) ((x) << 15) +#define L2_CACHE_BIGK_ASSOCIATIVITY (1 << 20) +#define VM_L2_STATUS 0x140C +#define L2_BUSY (1 << 0) +#define VM_CONTEXT0_CNTL 0x1410 +#define ENABLE_CONTEXT (1 << 0) +#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) +#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) +#define VM_CONTEXT1_CNTL 0x1414 +#define VM_CONTEXT0_CNTL2 0x1430 +#define VM_CONTEXT1_CNTL2 0x1434 +#define VM_CONTEXT8_PAGE_TABLE_BASE_ADDR 0x1438 +#define VM_CONTEXT9_PAGE_TABLE_BASE_ADDR 0x143c +#define VM_CONTEXT10_PAGE_TABLE_BASE_ADDR 0x1440 +#define VM_CONTEXT11_PAGE_TABLE_BASE_ADDR 0x1444 +#define VM_CONTEXT12_PAGE_TABLE_BASE_ADDR 0x1448 +#define VM_CONTEXT13_PAGE_TABLE_BASE_ADDR 0x144c +#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x1450 +#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x1454 + +#define VM_INVALIDATE_REQUEST 0x1478 +#define VM_INVALIDATE_RESPONSE 0x147c + +#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1518 +#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x151c + +#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153c +#define VM_CONTEXT1_PAGE_TABLE_BASE_ADDR 0x1540 +#define VM_CONTEXT2_PAGE_TABLE_BASE_ADDR 0x1544 +#define VM_CONTEXT3_PAGE_TABLE_BASE_ADDR 0x1548 +#define VM_CONTEXT4_PAGE_TABLE_BASE_ADDR 0x154c +#define VM_CONTEXT5_PAGE_TABLE_BASE_ADDR 0x1550 +#define VM_CONTEXT6_PAGE_TABLE_BASE_ADDR 0x1554 +#define VM_CONTEXT7_PAGE_TABLE_BASE_ADDR 0x1558 +#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155c +#define VM_CONTEXT1_PAGE_TABLE_START_ADDR 0x1560 + +#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C +#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x1580 + +#define MC_SHARED_CHMAP 0x2004 +#define NOOFCHAN_SHIFT 12 +#define NOOFCHAN_MASK 0x0000f000 +#define MC_SHARED_CHREMAP 0x2008 + +#define MC_VM_FB_LOCATION 0x2024 +#define MC_VM_AGP_TOP 0x2028 +#define MC_VM_AGP_BOT 0x202C +#define MC_VM_AGP_BASE 0x2030 +#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2034 +#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2038 +#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x203C + +#define MC_VM_MX_L1_TLB_CNTL 0x2064 +#define ENABLE_L1_TLB (1 << 0) +#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) +#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 3) +#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 3) +#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 3) +#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 3) +#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 5) +#define ENABLE_ADVANCED_DRIVER_MODEL (1 << 6) + +#define MC_SHARED_BLACKOUT_CNTL 0x20ac + +#define MC_ARB_RAMCFG 0x2760 +#define NOOFBANK_SHIFT 0 +#define NOOFBANK_MASK 0x00000003 +#define NOOFRANK_SHIFT 2 +#define NOOFRANK_MASK 0x00000004 +#define NOOFROWS_SHIFT 3 +#define NOOFROWS_MASK 0x00000038 +#define NOOFCOLS_SHIFT 6 +#define NOOFCOLS_MASK 0x000000C0 +#define CHANSIZE_SHIFT 8 +#define CHANSIZE_MASK 0x00000100 +#define CHANSIZE_OVERRIDE (1 << 11) +#define NOOFGROUPS_SHIFT 12 +#define NOOFGROUPS_MASK 0x00001000 + +#define MC_SEQ_TRAIN_WAKEUP_CNTL 0x2808 +#define TRAIN_DONE_D0 (1 << 30) +#define TRAIN_DONE_D1 (1 << 31) + +#define MC_SEQ_SUP_CNTL 0x28c8 +#define RUN_MASK (1 << 0) +#define MC_SEQ_SUP_PGM 0x28cc + +#define MC_IO_PAD_CNTL_D0 0x29d0 +#define MEM_FALL_OUT_CMD (1 << 8) + +#define MC_SEQ_IO_DEBUG_INDEX 0x2a44 +#define MC_SEQ_IO_DEBUG_DATA 0x2a48 + +#define HDP_HOST_PATH_CNTL 0x2C00 +#define HDP_NONSURFACE_BASE 0x2C04 +#define HDP_NONSURFACE_INFO 0x2C08 +#define HDP_NONSURFACE_SIZE 0x2C0C + +#define HDP_ADDR_CONFIG 0x2F48 +#define HDP_MISC_CNTL 0x2F4C +#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0) + +#define IH_RB_CNTL 0x3e00 +# define IH_RB_ENABLE (1 << 0) +# define IH_IB_SIZE(x) ((x) << 1) /* log2 */ +# define IH_RB_FULL_DRAIN_ENABLE (1 << 6) +# define IH_WPTR_WRITEBACK_ENABLE (1 << 8) +# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */ +# define IH_WPTR_OVERFLOW_ENABLE (1 << 16) +# define IH_WPTR_OVERFLOW_CLEAR (1 << 31) +#define IH_RB_BASE 0x3e04 +#define IH_RB_RPTR 0x3e08 +#define IH_RB_WPTR 0x3e0c +# define RB_OVERFLOW (1 << 0) +# define WPTR_OFFSET_MASK 0x3fffc +#define IH_RB_WPTR_ADDR_HI 0x3e10 +#define IH_RB_WPTR_ADDR_LO 0x3e14 +#define IH_CNTL 0x3e18 +# define ENABLE_INTR (1 << 0) +# define IH_MC_SWAP(x) ((x) << 1) +# define IH_MC_SWAP_NONE 0 +# define IH_MC_SWAP_16BIT 1 +# define IH_MC_SWAP_32BIT 2 +# define IH_MC_SWAP_64BIT 3 +# define RPTR_REARM (1 << 4) +# define MC_WRREQ_CREDIT(x) ((x) << 15) +# define MC_WR_CLEAN_CNT(x) ((x) << 20) +# define MC_VMID(x) ((x) << 25) + +#define CONFIG_MEMSIZE 0x5428 + +#define INTERRUPT_CNTL 0x5468 +# define IH_DUMMY_RD_OVERRIDE (1 << 0) +# define IH_DUMMY_RD_EN (1 << 1) +# define IH_REQ_NONSNOOP_EN (1 << 3) +# define GEN_IH_INT_EN (1 << 8) +#define INTERRUPT_CNTL2 0x546c + +#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x5480 + +#define BIF_FB_EN 0x5490 +#define FB_READ_EN (1 << 0) +#define FB_WRITE_EN (1 << 1) + +#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 + +#define DC_LB_MEMORY_SPLIT 0x6b0c +#define DC_LB_MEMORY_CONFIG(x) ((x) << 20) + +#define PRIORITY_A_CNT 0x6b18 +#define PRIORITY_MARK_MASK 0x7fff +#define PRIORITY_OFF (1 << 16) +#define PRIORITY_ALWAYS_ON (1 << 20) +#define PRIORITY_B_CNT 0x6b1c + +#define DPG_PIPE_ARBITRATION_CONTROL3 0x6cc8 +# define LATENCY_WATERMARK_MASK(x) ((x) << 16) +#define DPG_PIPE_LATENCY_CONTROL 0x6ccc +# define LATENCY_LOW_WATERMARK(x) ((x) << 0) +# define LATENCY_HIGH_WATERMARK(x) ((x) << 16) + +/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */ +#define VLINE_STATUS 0x6bb8 +# define VLINE_OCCURRED (1 << 0) +# define VLINE_ACK (1 << 4) +# define VLINE_STAT (1 << 12) +# define VLINE_INTERRUPT (1 << 16) +# define VLINE_INTERRUPT_TYPE (1 << 17) +/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */ +#define VBLANK_STATUS 0x6bbc +# define VBLANK_OCCURRED (1 << 0) +# define VBLANK_ACK (1 << 4) +# define VBLANK_STAT (1 << 12) +# define VBLANK_INTERRUPT (1 << 16) +# define VBLANK_INTERRUPT_TYPE (1 << 17) + +/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */ +#define INT_MASK 0x6b40 +# define VBLANK_INT_MASK (1 << 0) +# define VLINE_INT_MASK (1 << 4) + +#define DISP_INTERRUPT_STATUS 0x60f4 +# define LB_D1_VLINE_INTERRUPT (1 << 2) +# define LB_D1_VBLANK_INTERRUPT (1 << 3) +# define DC_HPD1_INTERRUPT (1 << 17) +# define DC_HPD1_RX_INTERRUPT (1 << 18) +# define DACA_AUTODETECT_INTERRUPT (1 << 22) +# define DACB_AUTODETECT_INTERRUPT (1 << 23) +# define DC_I2C_SW_DONE_INTERRUPT (1 << 24) +# define DC_I2C_HW_DONE_INTERRUPT (1 << 25) +#define DISP_INTERRUPT_STATUS_CONTINUE 0x60f8 +# define LB_D2_VLINE_INTERRUPT (1 << 2) +# define LB_D2_VBLANK_INTERRUPT (1 << 3) +# define DC_HPD2_INTERRUPT (1 << 17) +# define DC_HPD2_RX_INTERRUPT (1 << 18) +# define DISP_TIMER_INTERRUPT (1 << 24) +#define DISP_INTERRUPT_STATUS_CONTINUE2 0x60fc +# define LB_D3_VLINE_INTERRUPT (1 << 2) +# define LB_D3_VBLANK_INTERRUPT (1 << 3) +# define DC_HPD3_INTERRUPT (1 << 17) +# define DC_HPD3_RX_INTERRUPT (1 << 18) +#define DISP_INTERRUPT_STATUS_CONTINUE3 0x6100 +# define LB_D4_VLINE_INTERRUPT (1 << 2) +# define LB_D4_VBLANK_INTERRUPT (1 << 3) +# define DC_HPD4_INTERRUPT (1 << 17) +# define DC_HPD4_RX_INTERRUPT (1 << 18) +#define DISP_INTERRUPT_STATUS_CONTINUE4 0x614c +# define LB_D5_VLINE_INTERRUPT (1 << 2) +# define LB_D5_VBLANK_INTERRUPT (1 << 3) +# define DC_HPD5_INTERRUPT (1 << 17) +# define DC_HPD5_RX_INTERRUPT (1 << 18) +#define DISP_INTERRUPT_STATUS_CONTINUE5 0x6150 +# define LB_D6_VLINE_INTERRUPT (1 << 2) +# define LB_D6_VBLANK_INTERRUPT (1 << 3) +# define DC_HPD6_INTERRUPT (1 << 17) +# define DC_HPD6_RX_INTERRUPT (1 << 18) + +/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */ +#define GRPH_INT_STATUS 0x6858 +# define GRPH_PFLIP_INT_OCCURRED (1 << 0) +# define GRPH_PFLIP_INT_CLEAR (1 << 8) +/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */ +#define GRPH_INT_CONTROL 0x685c +# define GRPH_PFLIP_INT_MASK (1 << 0) +# define GRPH_PFLIP_INT_TYPE (1 << 8) + +#define DACA_AUTODETECT_INT_CONTROL 0x66c8 + +#define DC_HPD1_INT_STATUS 0x601c +#define DC_HPD2_INT_STATUS 0x6028 +#define DC_HPD3_INT_STATUS 0x6034 +#define DC_HPD4_INT_STATUS 0x6040 +#define DC_HPD5_INT_STATUS 0x604c +#define DC_HPD6_INT_STATUS 0x6058 +# define DC_HPDx_INT_STATUS (1 << 0) +# define DC_HPDx_SENSE (1 << 1) +# define DC_HPDx_RX_INT_STATUS (1 << 8) + +#define DC_HPD1_INT_CONTROL 0x6020 +#define DC_HPD2_INT_CONTROL 0x602c +#define DC_HPD3_INT_CONTROL 0x6038 +#define DC_HPD4_INT_CONTROL 0x6044 +#define DC_HPD5_INT_CONTROL 0x6050 +#define DC_HPD6_INT_CONTROL 0x605c +# define DC_HPDx_INT_ACK (1 << 0) +# define DC_HPDx_INT_POLARITY (1 << 8) +# define DC_HPDx_INT_EN (1 << 16) +# define DC_HPDx_RX_INT_ACK (1 << 20) +# define DC_HPDx_RX_INT_EN (1 << 24) + +#define DC_HPD1_CONTROL 0x6024 +#define DC_HPD2_CONTROL 0x6030 +#define DC_HPD3_CONTROL 0x603c +#define DC_HPD4_CONTROL 0x6048 +#define DC_HPD5_CONTROL 0x6054 +#define DC_HPD6_CONTROL 0x6060 +# define DC_HPDx_CONNECTION_TIMER(x) ((x) << 0) +# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) +# define DC_HPDx_EN (1 << 28) + +/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */ +#define CRTC_STATUS_FRAME_COUNT 0x6e98 + +#define GRBM_CNTL 0x8000 +#define GRBM_READ_TIMEOUT(x) ((x) << 0) + +#define GRBM_STATUS2 0x8008 +#define RLC_RQ_PENDING (1 << 0) +#define RLC_BUSY (1 << 8) +#define TC_BUSY (1 << 9) + +#define GRBM_STATUS 0x8010 +#define CMDFIFO_AVAIL_MASK 0x0000000F +#define RING2_RQ_PENDING (1 << 4) +#define SRBM_RQ_PENDING (1 << 5) +#define RING1_RQ_PENDING (1 << 6) +#define CF_RQ_PENDING (1 << 7) +#define PF_RQ_PENDING (1 << 8) +#define GDS_DMA_RQ_PENDING (1 << 9) +#define GRBM_EE_BUSY (1 << 10) +#define DB_CLEAN (1 << 12) +#define CB_CLEAN (1 << 13) +#define TA_BUSY (1 << 14) +#define GDS_BUSY (1 << 15) +#define VGT_BUSY (1 << 17) +#define IA_BUSY_NO_DMA (1 << 18) +#define IA_BUSY (1 << 19) +#define SX_BUSY (1 << 20) +#define SPI_BUSY (1 << 22) +#define BCI_BUSY (1 << 23) +#define SC_BUSY (1 << 24) +#define PA_BUSY (1 << 25) +#define DB_BUSY (1 << 26) +#define CP_COHERENCY_BUSY (1 << 28) +#define CP_BUSY (1 << 29) +#define CB_BUSY (1 << 30) +#define GUI_ACTIVE (1 << 31) +#define GRBM_STATUS_SE0 0x8014 +#define GRBM_STATUS_SE1 0x8018 +#define SE_DB_CLEAN (1 << 1) +#define SE_CB_CLEAN (1 << 2) +#define SE_BCI_BUSY (1 << 22) +#define SE_VGT_BUSY (1 << 23) +#define SE_PA_BUSY (1 << 24) +#define SE_TA_BUSY (1 << 25) +#define SE_SX_BUSY (1 << 26) +#define SE_SPI_BUSY (1 << 27) +#define SE_SC_BUSY (1 << 29) +#define SE_DB_BUSY (1 << 30) +#define SE_CB_BUSY (1 << 31) + +#define GRBM_SOFT_RESET 0x8020 +#define SOFT_RESET_CP (1 << 0) +#define SOFT_RESET_CB (1 << 1) +#define SOFT_RESET_RLC (1 << 2) +#define SOFT_RESET_DB (1 << 3) +#define SOFT_RESET_GDS (1 << 4) +#define SOFT_RESET_PA (1 << 5) +#define SOFT_RESET_SC (1 << 6) +#define SOFT_RESET_BCI (1 << 7) +#define SOFT_RESET_SPI (1 << 8) +#define SOFT_RESET_SX (1 << 10) +#define SOFT_RESET_TC (1 << 11) +#define SOFT_RESET_TA (1 << 12) +#define SOFT_RESET_VGT (1 << 14) +#define SOFT_RESET_IA (1 << 15) + +#define GRBM_GFX_INDEX 0x802C + +#define GRBM_INT_CNTL 0x8060 +# define RDERR_INT_ENABLE (1 << 0) +# define GUI_IDLE_INT_ENABLE (1 << 19) + +#define SCRATCH_REG0 0x8500 +#define SCRATCH_REG1 0x8504 +#define SCRATCH_REG2 0x8508 +#define SCRATCH_REG3 0x850C +#define SCRATCH_REG4 0x8510 +#define SCRATCH_REG5 0x8514 +#define SCRATCH_REG6 0x8518 +#define SCRATCH_REG7 0x851C + +#define SCRATCH_UMSK 0x8540 +#define SCRATCH_ADDR 0x8544 + +#define CP_SEM_WAIT_TIMER 0x85BC + +#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x85C8 + +#define CP_ME_CNTL 0x86D8 +#define CP_CE_HALT (1 << 24) +#define CP_PFP_HALT (1 << 26) +#define CP_ME_HALT (1 << 28) + +#define CP_COHER_CNTL2 0x85E8 + +#define CP_RB2_RPTR 0x86f8 +#define CP_RB1_RPTR 0x86fc +#define CP_RB0_RPTR 0x8700 +#define CP_RB_WPTR_DELAY 0x8704 + +#define CP_QUEUE_THRESHOLDS 0x8760 +#define ROQ_IB1_START(x) ((x) << 0) +#define ROQ_IB2_START(x) ((x) << 8) +#define CP_MEQ_THRESHOLDS 0x8764 +#define MEQ1_START(x) ((x) << 0) +#define MEQ2_START(x) ((x) << 8) + +#define CP_PERFMON_CNTL 0x87FC + +#define VGT_VTX_VECT_EJECT_REG 0x88B0 + +#define VGT_CACHE_INVALIDATION 0x88C4 +#define CACHE_INVALIDATION(x) ((x) << 0) +#define VC_ONLY 0 +#define TC_ONLY 1 +#define VC_AND_TC 2 +#define AUTO_INVLD_EN(x) ((x) << 6) +#define NO_AUTO 0 +#define ES_AUTO 1 +#define GS_AUTO 2 +#define ES_AND_GS_AUTO 3 +#define VGT_ESGS_RING_SIZE 0x88C8 +#define VGT_GSVS_RING_SIZE 0x88CC + +#define VGT_GS_VERTEX_REUSE 0x88D4 + +#define VGT_PRIMITIVE_TYPE 0x8958 +#define VGT_INDEX_TYPE 0x895C + +#define VGT_NUM_INDICES 0x8970 +#define VGT_NUM_INSTANCES 0x8974 + +#define VGT_TF_RING_SIZE 0x8988 + +#define VGT_HS_OFFCHIP_PARAM 0x89B0 + +#define VGT_TF_MEMORY_BASE 0x89B8 + +#define CC_GC_SHADER_ARRAY_CONFIG 0x89bc +#define GC_USER_SHADER_ARRAY_CONFIG 0x89c0 + +#define PA_CL_ENHANCE 0x8A14 +#define CLIP_VTX_REORDER_ENA (1 << 0) +#define NUM_CLIP_SEQ(x) ((x) << 1) + +#define PA_SU_LINE_STIPPLE_VALUE 0x8A60 + +#define PA_SC_LINE_STIPPLE_STATE 0x8B10 + +#define PA_SC_FORCE_EOV_MAX_CNTS 0x8B24 +#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0) +#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16) + +#define PA_SC_FIFO_SIZE 0x8BCC +#define SC_FRONTEND_PRIM_FIFO_SIZE(x) ((x) << 0) +#define SC_BACKEND_PRIM_FIFO_SIZE(x) ((x) << 6) +#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 15) +#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 23) + +#define PA_SC_ENHANCE 0x8BF0 + +#define SQ_CONFIG 0x8C00 + +#define SQC_CACHES 0x8C08 + +#define SX_DEBUG_1 0x9060 + +#define SPI_STATIC_THREAD_MGMT_1 0x90E0 +#define SPI_STATIC_THREAD_MGMT_2 0x90E4 +#define SPI_STATIC_THREAD_MGMT_3 0x90E8 +#define SPI_PS_MAX_WAVE_ID 0x90EC + +#define SPI_CONFIG_CNTL 0x9100 + +#define SPI_CONFIG_CNTL_1 0x913C +#define VTX_DONE_DELAY(x) ((x) << 0) +#define INTERP_ONE_PRIM_PER_ROW (1 << 4) + +#define CGTS_TCC_DISABLE 0x9148 +#define CGTS_USER_TCC_DISABLE 0x914C +#define TCC_DISABLE_MASK 0xFFFF0000 +#define TCC_DISABLE_SHIFT 16 + +#define TA_CNTL_AUX 0x9508 + +#define CC_RB_BACKEND_DISABLE 0x98F4 +#define BACKEND_DISABLE(x) ((x) << 16) +#define GB_ADDR_CONFIG 0x98F8 +#define NUM_PIPES(x) ((x) << 0) +#define NUM_PIPES_MASK 0x00000007 +#define NUM_PIPES_SHIFT 0 +#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4) +#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070 +#define PIPE_INTERLEAVE_SIZE_SHIFT 4 +#define NUM_SHADER_ENGINES(x) ((x) << 12) +#define NUM_SHADER_ENGINES_MASK 0x00003000 +#define NUM_SHADER_ENGINES_SHIFT 12 +#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16) +#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000 +#define SHADER_ENGINE_TILE_SIZE_SHIFT 16 +#define NUM_GPUS(x) ((x) << 20) +#define NUM_GPUS_MASK 0x00700000 +#define NUM_GPUS_SHIFT 20 +#define MULTI_GPU_TILE_SIZE(x) ((x) << 24) +#define MULTI_GPU_TILE_SIZE_MASK 0x03000000 +#define MULTI_GPU_TILE_SIZE_SHIFT 24 +#define ROW_SIZE(x) ((x) << 28) +#define ROW_SIZE_MASK 0x30000000 +#define ROW_SIZE_SHIFT 28 + +#define GB_TILE_MODE0 0x9910 +# define MICRO_TILE_MODE(x) ((x) << 0) +# define ADDR_SURF_DISPLAY_MICRO_TILING 0 +# define ADDR_SURF_THIN_MICRO_TILING 1 +# define ADDR_SURF_DEPTH_MICRO_TILING 2 +# define ARRAY_MODE(x) ((x) << 2) +# define ARRAY_LINEAR_GENERAL 0 +# define ARRAY_LINEAR_ALIGNED 1 +# define ARRAY_1D_TILED_THIN1 2 +# define ARRAY_2D_TILED_THIN1 4 +# define PIPE_CONFIG(x) ((x) << 6) +# define ADDR_SURF_P2 0 +# define ADDR_SURF_P4_8x16 4 +# define ADDR_SURF_P4_16x16 5 +# define ADDR_SURF_P4_16x32 6 +# define ADDR_SURF_P4_32x32 7 +# define ADDR_SURF_P8_16x16_8x16 8 +# define ADDR_SURF_P8_16x32_8x16 9 +# define ADDR_SURF_P8_32x32_8x16 10 +# define ADDR_SURF_P8_16x32_16x16 11 +# define ADDR_SURF_P8_32x32_16x16 12 +# define ADDR_SURF_P8_32x32_16x32 13 +# define ADDR_SURF_P8_32x64_32x32 14 +# define TILE_SPLIT(x) ((x) << 11) +# define ADDR_SURF_TILE_SPLIT_64B 0 +# define ADDR_SURF_TILE_SPLIT_128B 1 +# define ADDR_SURF_TILE_SPLIT_256B 2 +# define ADDR_SURF_TILE_SPLIT_512B 3 +# define ADDR_SURF_TILE_SPLIT_1KB 4 +# define ADDR_SURF_TILE_SPLIT_2KB 5 +# define ADDR_SURF_TILE_SPLIT_4KB 6 +# define BANK_WIDTH(x) ((x) << 14) +# define ADDR_SURF_BANK_WIDTH_1 0 +# define ADDR_SURF_BANK_WIDTH_2 1 +# define ADDR_SURF_BANK_WIDTH_4 2 +# define ADDR_SURF_BANK_WIDTH_8 3 +# define BANK_HEIGHT(x) ((x) << 16) +# define ADDR_SURF_BANK_HEIGHT_1 0 +# define ADDR_SURF_BANK_HEIGHT_2 1 +# define ADDR_SURF_BANK_HEIGHT_4 2 +# define ADDR_SURF_BANK_HEIGHT_8 3 +# define MACRO_TILE_ASPECT(x) ((x) << 18) +# define ADDR_SURF_MACRO_ASPECT_1 0 +# define ADDR_SURF_MACRO_ASPECT_2 1 +# define ADDR_SURF_MACRO_ASPECT_4 2 +# define ADDR_SURF_MACRO_ASPECT_8 3 +# define NUM_BANKS(x) ((x) << 20) +# define ADDR_SURF_2_BANK 0 +# define ADDR_SURF_4_BANK 1 +# define ADDR_SURF_8_BANK 2 +# define ADDR_SURF_16_BANK 3 + +#define CB_PERFCOUNTER0_SELECT0 0x9a20 +#define CB_PERFCOUNTER0_SELECT1 0x9a24 +#define CB_PERFCOUNTER1_SELECT0 0x9a28 +#define CB_PERFCOUNTER1_SELECT1 0x9a2c +#define CB_PERFCOUNTER2_SELECT0 0x9a30 +#define CB_PERFCOUNTER2_SELECT1 0x9a34 +#define CB_PERFCOUNTER3_SELECT0 0x9a38 +#define CB_PERFCOUNTER3_SELECT1 0x9a3c + +#define GC_USER_RB_BACKEND_DISABLE 0x9B7C +#define BACKEND_DISABLE_MASK 0x00FF0000 +#define BACKEND_DISABLE_SHIFT 16 + +#define TCP_CHAN_STEER_LO 0xac0c +#define TCP_CHAN_STEER_HI 0xac10 + +#define CP_RB0_BASE 0xC100 +#define CP_RB0_CNTL 0xC104 +#define RB_BUFSZ(x) ((x) << 0) +#define RB_BLKSZ(x) ((x) << 8) +#define BUF_SWAP_32BIT (2 << 16) +#define RB_NO_UPDATE (1 << 27) +#define RB_RPTR_WR_ENA (1 << 31) + +#define CP_RB0_RPTR_ADDR 0xC10C +#define CP_RB0_RPTR_ADDR_HI 0xC110 +#define CP_RB0_WPTR 0xC114 + +#define CP_PFP_UCODE_ADDR 0xC150 +#define CP_PFP_UCODE_DATA 0xC154 +#define CP_ME_RAM_RADDR 0xC158 +#define CP_ME_RAM_WADDR 0xC15C +#define CP_ME_RAM_DATA 0xC160 + +#define CP_CE_UCODE_ADDR 0xC168 +#define CP_CE_UCODE_DATA 0xC16C + +#define CP_RB1_BASE 0xC180 +#define CP_RB1_CNTL 0xC184 +#define CP_RB1_RPTR_ADDR 0xC188 +#define CP_RB1_RPTR_ADDR_HI 0xC18C +#define CP_RB1_WPTR 0xC190 +#define CP_RB2_BASE 0xC194 +#define CP_RB2_CNTL 0xC198 +#define CP_RB2_RPTR_ADDR 0xC19C +#define CP_RB2_RPTR_ADDR_HI 0xC1A0 +#define CP_RB2_WPTR 0xC1A4 +#define CP_INT_CNTL_RING0 0xC1A8 +#define CP_INT_CNTL_RING1 0xC1AC +#define CP_INT_CNTL_RING2 0xC1B0 +# define CNTX_BUSY_INT_ENABLE (1 << 19) +# define CNTX_EMPTY_INT_ENABLE (1 << 20) +# define WAIT_MEM_SEM_INT_ENABLE (1 << 21) +# define TIME_STAMP_INT_ENABLE (1 << 26) +# define CP_RINGID2_INT_ENABLE (1 << 29) +# define CP_RINGID1_INT_ENABLE (1 << 30) +# define CP_RINGID0_INT_ENABLE (1 << 31) +#define CP_INT_STATUS_RING0 0xC1B4 +#define CP_INT_STATUS_RING1 0xC1B8 +#define CP_INT_STATUS_RING2 0xC1BC +# define WAIT_MEM_SEM_INT_STAT (1 << 21) +# define TIME_STAMP_INT_STAT (1 << 26) +# define CP_RINGID2_INT_STAT (1 << 29) +# define CP_RINGID1_INT_STAT (1 << 30) +# define CP_RINGID0_INT_STAT (1 << 31) + +#define CP_DEBUG 0xC1FC + +#define RLC_CNTL 0xC300 +# define RLC_ENABLE (1 << 0) +#define RLC_RL_BASE 0xC304 +#define RLC_RL_SIZE 0xC308 +#define RLC_LB_CNTL 0xC30C +#define RLC_SAVE_AND_RESTORE_BASE 0xC310 +#define RLC_LB_CNTR_MAX 0xC314 +#define RLC_LB_CNTR_INIT 0xC318 + +#define RLC_CLEAR_STATE_RESTORE_BASE 0xC320 + +#define RLC_UCODE_ADDR 0xC32C +#define RLC_UCODE_DATA 0xC330 + +#define RLC_MC_CNTL 0xC344 +#define RLC_UCODE_CNTL 0xC348 + +#define VGT_EVENT_INITIATOR 0x28a90 +# define SAMPLE_STREAMOUTSTATS1 (1 << 0) +# define SAMPLE_STREAMOUTSTATS2 (2 << 0) +# define SAMPLE_STREAMOUTSTATS3 (3 << 0) +# define CACHE_FLUSH_TS (4 << 0) +# define CACHE_FLUSH (6 << 0) +# define CS_PARTIAL_FLUSH (7 << 0) +# define VGT_STREAMOUT_RESET (10 << 0) +# define END_OF_PIPE_INCR_DE (11 << 0) +# define END_OF_PIPE_IB_END (12 << 0) +# define RST_PIX_CNT (13 << 0) +# define VS_PARTIAL_FLUSH (15 << 0) +# define PS_PARTIAL_FLUSH (16 << 0) +# define CACHE_FLUSH_AND_INV_TS_EVENT (20 << 0) +# define ZPASS_DONE (21 << 0) +# define CACHE_FLUSH_AND_INV_EVENT (22 << 0) +# define PERFCOUNTER_START (23 << 0) +# define PERFCOUNTER_STOP (24 << 0) +# define PIPELINESTAT_START (25 << 0) +# define PIPELINESTAT_STOP (26 << 0) +# define PERFCOUNTER_SAMPLE (27 << 0) +# define SAMPLE_PIPELINESTAT (30 << 0) +# define SAMPLE_STREAMOUTSTATS (32 << 0) +# define RESET_VTX_CNT (33 << 0) +# define VGT_FLUSH (36 << 0) +# define BOTTOM_OF_PIPE_TS (40 << 0) +# define DB_CACHE_FLUSH_AND_INV (42 << 0) +# define FLUSH_AND_INV_DB_DATA_TS (43 << 0) +# define FLUSH_AND_INV_DB_META (44 << 0) +# define FLUSH_AND_INV_CB_DATA_TS (45 << 0) +# define FLUSH_AND_INV_CB_META (46 << 0) +# define CS_DONE (47 << 0) +# define PS_DONE (48 << 0) +# define FLUSH_AND_INV_CB_PIXEL_DATA (49 << 0) +# define THREAD_TRACE_START (51 << 0) +# define THREAD_TRACE_STOP (52 << 0) +# define THREAD_TRACE_FLUSH (54 << 0) +# define THREAD_TRACE_FINISH (55 << 0) + +/* + * PM4 + */ +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) +#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ + (((reg) >> 2) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) + +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) + +#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + +#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) + +/* Packet 3 types */ +#define PACKET3_NOP 0x10 +#define PACKET3_SET_BASE 0x11 +#define PACKET3_BASE_INDEX(x) ((x) << 0) +#define GDS_PARTITION_BASE 2 +#define CE_PARTITION_BASE 3 +#define PACKET3_CLEAR_STATE 0x12 +#define PACKET3_INDEX_BUFFER_SIZE 0x13 +#define PACKET3_DISPATCH_DIRECT 0x15 +#define PACKET3_DISPATCH_INDIRECT 0x16 +#define PACKET3_ALLOC_GDS 0x1B +#define PACKET3_WRITE_GDS_RAM 0x1C +#define PACKET3_ATOMIC_GDS 0x1D +#define PACKET3_ATOMIC 0x1E +#define PACKET3_OCCLUSION_QUERY 0x1F +#define PACKET3_SET_PREDICATION 0x20 +#define PACKET3_REG_RMW 0x21 +#define PACKET3_COND_EXEC 0x22 +#define PACKET3_PRED_EXEC 0x23 +#define PACKET3_DRAW_INDIRECT 0x24 +#define PACKET3_DRAW_INDEX_INDIRECT 0x25 +#define PACKET3_INDEX_BASE 0x26 +#define PACKET3_DRAW_INDEX_2 0x27 +#define PACKET3_CONTEXT_CONTROL 0x28 +#define PACKET3_INDEX_TYPE 0x2A +#define PACKET3_DRAW_INDIRECT_MULTI 0x2C +#define PACKET3_DRAW_INDEX_AUTO 0x2D +#define PACKET3_DRAW_INDEX_IMMD 0x2E +#define PACKET3_NUM_INSTANCES 0x2F +#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 +#define PACKET3_INDIRECT_BUFFER_CONST 0x31 +#define PACKET3_INDIRECT_BUFFER 0x32 +#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 +#define PACKET3_DRAW_INDEX_OFFSET_2 0x35 +#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36 +#define PACKET3_WRITE_DATA 0x37 +#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 +#define PACKET3_MEM_SEMAPHORE 0x39 +#define PACKET3_MPEG_INDEX 0x3A +#define PACKET3_COPY_DW 0x3B +#define PACKET3_WAIT_REG_MEM 0x3C +#define PACKET3_MEM_WRITE 0x3D +#define PACKET3_COPY_DATA 0x40 +#define PACKET3_PFP_SYNC_ME 0x42 +#define PACKET3_SURFACE_SYNC 0x43 +# define PACKET3_DEST_BASE_0_ENA (1 << 0) +# define PACKET3_DEST_BASE_1_ENA (1 << 1) +# define PACKET3_CB0_DEST_BASE_ENA (1 << 6) +# define PACKET3_CB1_DEST_BASE_ENA (1 << 7) +# define PACKET3_CB2_DEST_BASE_ENA (1 << 8) +# define PACKET3_CB3_DEST_BASE_ENA (1 << 9) +# define PACKET3_CB4_DEST_BASE_ENA (1 << 10) +# define PACKET3_CB5_DEST_BASE_ENA (1 << 11) +# define PACKET3_CB6_DEST_BASE_ENA (1 << 12) +# define PACKET3_CB7_DEST_BASE_ENA (1 << 13) +# define PACKET3_DB_DEST_BASE_ENA (1 << 14) +# define PACKET3_DEST_BASE_2_ENA (1 << 19) +# define PACKET3_DEST_BASE_3_ENA (1 << 21) +# define PACKET3_TCL1_ACTION_ENA (1 << 22) +# define PACKET3_TC_ACTION_ENA (1 << 23) +# define PACKET3_CB_ACTION_ENA (1 << 25) +# define PACKET3_DB_ACTION_ENA (1 << 26) +# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27) +# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29) +#define PACKET3_ME_INITIALIZE 0x44 +#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) +#define PACKET3_COND_WRITE 0x45 +#define PACKET3_EVENT_WRITE 0x46 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + * 5 - EOP events + * 6 - EOS events + * 7 - CACHE_FLUSH, CACHE_FLUSH_AND_INV_EVENT + */ +#define INV_L2 (1 << 20) + /* INV TC L2 cache when EVENT_INDEX = 7 */ +#define PACKET3_EVENT_WRITE_EOP 0x47 +#define DATA_SEL(x) ((x) << 29) + /* 0 - discard + * 1 - send low 32bit data + * 2 - send 64bit data + * 3 - send 64bit counter value + */ +#define INT_SEL(x) ((x) << 24) + /* 0 - none + * 1 - interrupt only (DATA_SEL = 0) + * 2 - interrupt when data write is confirmed + */ +#define PACKET3_EVENT_WRITE_EOS 0x48 +#define PACKET3_PREAMBLE_CNTL 0x4A +# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) +# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) +#define PACKET3_ONE_REG_WRITE 0x57 +#define PACKET3_LOAD_CONFIG_REG 0x5F +#define PACKET3_LOAD_CONTEXT_REG 0x60 +#define PACKET3_LOAD_SH_REG 0x61 +#define PACKET3_SET_CONFIG_REG 0x68 +#define PACKET3_SET_CONFIG_REG_START 0x00008000 +#define PACKET3_SET_CONFIG_REG_END 0x0000b000 +#define PACKET3_SET_CONTEXT_REG 0x69 +#define PACKET3_SET_CONTEXT_REG_START 0x00028000 +#define PACKET3_SET_CONTEXT_REG_END 0x00029000 +#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 +#define PACKET3_SET_RESOURCE_INDIRECT 0x74 +#define PACKET3_SET_SH_REG 0x76 +#define PACKET3_SET_SH_REG_START 0x0000b000 +#define PACKET3_SET_SH_REG_END 0x0000c000 +#define PACKET3_SET_SH_REG_OFFSET 0x77 +#define PACKET3_ME_WRITE 0x7A +#define PACKET3_SCRATCH_RAM_WRITE 0x7D +#define PACKET3_SCRATCH_RAM_READ 0x7E +#define PACKET3_CE_WRITE 0x7F +#define PACKET3_LOAD_CONST_RAM 0x80 +#define PACKET3_WRITE_CONST_RAM 0x81 +#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82 +#define PACKET3_DUMP_CONST_RAM 0x83 +#define PACKET3_INCREMENT_CE_COUNTER 0x84 +#define PACKET3_INCREMENT_DE_COUNTER 0x85 +#define PACKET3_WAIT_ON_CE_COUNTER 0x86 +#define PACKET3_WAIT_ON_DE_COUNTER 0x87 +#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 +#define PACKET3_SET_CE_DE_COUNTERS 0x89 +#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A + +#endif |